diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java index 436c7ae03..293dd3644 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java @@ -175,6 +175,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf } putAdditionalData(dataMap, responseData, document); + normalizeData(responseData, dataMap); try { resultData.setData(SerializeUtil.fromObjectToBinary(dataMap)); @@ -184,6 +185,14 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf resultData.setEncoding(charsetName); } + protected void normalizeData(final ResponseData responseData, Map dataMap) { + Object titleObj = dataMap.get(fessConfig.getIndexFieldTitle()); + if (titleObj != null) { + dataMap.put(fessConfig.getIndexFieldTitle(), + ComponentUtil.getDocumentHelper().getTitle(responseData, titleObj.toString(), dataMap)); + } + } + protected void processMetaRobots(final ResponseData responseData, final ResultData resultData, final Document document) { final Map configMap = getConfigPrameterMap(responseData, ConfigName.CONFIG); String ignore = configMap.get(IGNORE_META_ROBOTS); diff --git a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java index 571043220..560818e50 100644 --- a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java +++ b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java @@ -61,6 +61,19 @@ public class DocumentHelper { private static final String SIMILAR_DOC_HASH_PREFIX = "$"; + public String getTitle(final ResponseData responseData, final String title, final Map dataMap) { + if (title == null) { + return StringUtil.EMPTY; // empty + } + + final int[] spaceChars = getSpaceChars(); + try (final Reader reader = new StringReader(title)) { + return TextUtil.normalizeText(reader).initialCapacity(title.length()).spaceChars(spaceChars).execute(); + } catch (final IOException e) { + return StringUtil.EMPTY; // empty + } + } + public String getContent(final ResponseData responseData, final String content, final Map dataMap) { if (content == null) { return StringUtil.EMPTY; // empty