Browse Source

fix #1506 append ellipsis

Shinsuke Sugaya 7 years ago
parent
commit
dd5ed5a4c0

+ 15 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -234,6 +234,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6 */
     String CRAWLER_DOCUMENT_SPACE_CHARS = "crawler.document.space.chars";
 
+    /** The key of the configuration. e.g. u002eu06d4u2e3cu3002 */
+    String CRAWLER_DOCUMENT_FULLSTOP_CHARS = "crawler.document.fullstop.chars";
+
     /** The key of the configuration. e.g. UTF-8 */
     String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding";
 
@@ -1912,6 +1915,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     String getCrawlerDocumentSpaceChars();
 
+    /**
+     * Get the value for the key 'crawler.document.fullstop.chars'. <br>
+     * The value is, e.g. u002eu06d4u2e3cu3002 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getCrawlerDocumentFullstopChars();
+
     /**
      * Get the value for the key 'crawler.crawling.data.encoding'. <br>
      * The value is, e.g. UTF-8 <br>
@@ -5879,6 +5889,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return get(FessConfig.CRAWLER_DOCUMENT_SPACE_CHARS);
         }
 
+        public String getCrawlerDocumentFullstopChars() {
+            return get(FessConfig.CRAWLER_DOCUMENT_FULLSTOP_CHARS);
+        }
+
         public String getCrawlerCrawlingDataEncoding() {
             return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING);
         }
@@ -7924,6 +7938,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             defaultMap
                     .put(FessConfig.CRAWLER_DOCUMENT_SPACE_CHARS,
                             "u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6");
+            defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FULLSTOP_CHARS, "u002eu06d4u2e3cu3002");
             defaultMap.put(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING, "UTF-8");
             defaultMap.put(FessConfig.CRAWLER_WEB_PROTOCOLS, "http,https");
             defaultMap.put(FessConfig.CRAWLER_FILE_PROTOCOLS, "file,smb,ftp");

+ 26 - 3
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -96,6 +96,8 @@ public interface FessProp {
 
     public static final String CRAWLER_DOCUMENT_SPACE_CHARS = "crawlerDocumentSpaceChars";
 
+    public static final String CRAWLER_DOCUMENT_FULLSTOP_CHARS = "crawlerDocumentFullstopChars";
+
     public static final String INDEX_ADMIN_ARRAY_FIELD_SET = "indexAdminArrayFieldSet";
 
     public static final String INDEX_ADMIN_DATE_FIELD_SET = "indexAdminDateFieldSet";
@@ -1444,9 +1446,12 @@ public interface FessProp {
     String getCrawlerDocumentSpaceChars();
 
     public default int[] getCrawlerDocumentSpaceCharsAsArray() {
-        int[] spaceChars = (int[]) propMap.get(CRAWLER_DOCUMENT_SPACE_CHARS);
+        return getCrawlerDocumentCharsAsArray(CRAWLER_DOCUMENT_SPACE_CHARS, getCrawlerDocumentSpaceChars());
+    }
+
+    public default int[] getCrawlerDocumentCharsAsArray(final String key, final String spaceStr) {
+        int[] spaceChars = (int[]) propMap.get(key);
         if (spaceChars == null) {
-            final String spaceStr = getCrawlerDocumentSpaceChars();
             if (spaceStr.startsWith("u")) {
                 spaceChars =
                         split(spaceStr, "u").get(
@@ -1459,11 +1464,29 @@ public interface FessProp {
                     spaceChars[i] = spaceStr.codePointAt(i);
                 }
             }
-            propMap.put(CRAWLER_DOCUMENT_SPACE_CHARS, spaceChars);
+            propMap.put(key, spaceChars);
         }
         return spaceChars;
     }
 
+    String getCrawlerDocumentFullstopChars();
+
+    public default boolean endsWithFullstop(final String s) {
+        if (StringUtil.isBlank(s)) {
+            return false;
+        }
+        for (final int i : getCrawlerDocumentFullstopCharsAsArray()) {
+            if (s.endsWith(String.valueOf(i))) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public default int[] getCrawlerDocumentFullstopCharsAsArray() {
+        return getCrawlerDocumentCharsAsArray(CRAWLER_DOCUMENT_FULLSTOP_CHARS, getCrawlerDocumentFullstopChars());
+    }
+
     String getQueryAdditionalResponseFields();
 
     public default String[] getQueryAdditionalResponseFields(final String... fields) {

+ 7 - 1
src/main/java/org/codelibs/fess/util/QueryResponseList.java

@@ -24,6 +24,7 @@ import java.util.ListIterator;
 import java.util.Map;
 
 import org.apache.commons.lang3.StringUtils;
+import org.codelibs.core.lang.StringUtil;
 import org.codelibs.core.stream.StreamUtil;
 import org.codelibs.fess.helper.QueryHelper;
 import org.codelibs.fess.helper.ViewHelper;
@@ -41,6 +42,8 @@ import org.slf4j.LoggerFactory;
 
 public class QueryResponseList implements List<Map<String, Object>> {
 
+    private static final String ELLIPSIS = "...";
+
     private static final String SCORE = "score";
 
     private static final Logger logger = LoggerFactory.getLogger(QueryResponseList.class);
@@ -160,7 +163,10 @@ public class QueryResponseList implements List<Map<String, Object>> {
                         for (int i = 0; i < fragments.length; i++) {
                             texts[i] = fragments[i].string();
                         }
-                        final String value = StringUtils.join(texts, "...");
+                        String value = StringUtils.join(texts, ELLIPSIS);
+                        if (StringUtil.isNotBlank(value) && !fessConfig.endsWithFullstop(value)) {
+                            value = value + ELLIPSIS;
+                        }
                         docMap.put(hlPrefix + highlightField.getName(), value);
                     }
                 }

+ 1 - 0
src/main/resources/fess_config.properties

@@ -152,6 +152,7 @@ crawler.document.max.alphanum.term.size=20
 crawler.document.max.symbol.term.size=10
 crawler.document.duplicate.term.removed=false
 crawler.document.space.chars=u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6
+crawler.document.fullstop.chars=u002eu06d4u2e3cu3002
 crawler.crawling.data.encoding=UTF-8
 crawler.web.protocols=http,https
 crawler.file.protocols=file,smb,ftp

+ 19 - 4
src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java

@@ -120,10 +120,25 @@ public class FessPropTest extends UnitFessTestCase {
             }
         };
 
-        int[] spaceChars = fessConfig.getCrawlerDocumentSpaceCharsAsArray();
-        assertEquals(2, spaceChars.length);
-        assertEquals(32, spaceChars[0]);
-        assertEquals(12288, spaceChars[1]);
+        int[] chars = fessConfig.getCrawlerDocumentSpaceCharsAsArray();
+        assertEquals(2, chars.length);
+        assertEquals(32, chars[0]);
+        assertEquals(12288, chars[1]);
+    }
+
+    public void test_getCrawlerDocumentFullstopCharsAsArray() {
+        FessProp.propMap.clear();
+        FessConfig fessConfig = new FessConfig.SimpleImpl() {
+            @Override
+            public String getCrawlerDocumentFullstopChars() {
+                return "u0020u3000";
+            }
+        };
+
+        int[] chars = fessConfig.getCrawlerDocumentFullstopCharsAsArray();
+        assertEquals(2, chars.length);
+        assertEquals(32, chars[0]);
+        assertEquals(12288, chars[1]);
     }
 
     public void test_getCrawlerDocumentHtmlPrunedTagsAsArray() throws Exception {