Преглед изворни кода

fix #1398 trim spaces in title field

Shinsuke Sugaya пре 7 година
родитељ
комит
ee01e90ff3

+ 9 - 0
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

@@ -175,6 +175,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         }
 
         putAdditionalData(dataMap, responseData, document);
+        normalizeData(responseData, dataMap);
 
         try {
             resultData.setData(SerializeUtil.fromObjectToBinary(dataMap));
@@ -184,6 +185,14 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         resultData.setEncoding(charsetName);
     }
 
+    protected void normalizeData(final ResponseData responseData, Map<String, Object> dataMap) {
+        Object titleObj = dataMap.get(fessConfig.getIndexFieldTitle());
+        if (titleObj != null) {
+            dataMap.put(fessConfig.getIndexFieldTitle(),
+                    ComponentUtil.getDocumentHelper().getTitle(responseData, titleObj.toString(), dataMap));
+        }
+    }
+
     protected void processMetaRobots(final ResponseData responseData, final ResultData resultData, final Document document) {
         final Map<String, String> configMap = getConfigPrameterMap(responseData, ConfigName.CONFIG);
         String ignore = configMap.get(IGNORE_META_ROBOTS);

+ 13 - 0
src/main/java/org/codelibs/fess/helper/DocumentHelper.java

@@ -61,6 +61,19 @@ public class DocumentHelper {
 
     private static final String SIMILAR_DOC_HASH_PREFIX = "$";
 
+    public String getTitle(final ResponseData responseData, final String title, final Map<String, Object> dataMap) {
+        if (title == null) {
+            return StringUtil.EMPTY; // empty
+        }
+
+        final int[] spaceChars = getSpaceChars();
+        try (final Reader reader = new StringReader(title)) {
+            return TextUtil.normalizeText(reader).initialCapacity(title.length()).spaceChars(spaceChars).execute();
+        } catch (final IOException e) {
+            return StringUtil.EMPTY; // empty
+        }
+    }
+
     public String getContent(final ResponseData responseData, final String content, final Map<String, Object> dataMap) {
         if (content == null) {
             return StringUtil.EMPTY; // empty