diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index 7fee4ce65..0bfe3c9fd 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -234,6 +234,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6 */ String CRAWLER_DOCUMENT_SPACE_CHARS = "crawler.document.space.chars"; + /** The key of the configuration. e.g. u002eu06d4u2e3cu3002 */ + String CRAWLER_DOCUMENT_FULLSTOP_CHARS = "crawler.document.fullstop.chars"; + /** The key of the configuration. e.g. UTF-8 */ String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding"; @@ -1912,6 +1915,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ String getCrawlerDocumentSpaceChars(); + /** + * Get the value for the key 'crawler.document.fullstop.chars'.
+ * The value is, e.g. u002eu06d4u2e3cu3002
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getCrawlerDocumentFullstopChars(); + /** * Get the value for the key 'crawler.crawling.data.encoding'.
* The value is, e.g. UTF-8
@@ -5879,6 +5889,10 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return get(FessConfig.CRAWLER_DOCUMENT_SPACE_CHARS); } + public String getCrawlerDocumentFullstopChars() { + return get(FessConfig.CRAWLER_DOCUMENT_FULLSTOP_CHARS); + } + public String getCrawlerCrawlingDataEncoding() { return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING); } @@ -7924,6 +7938,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction defaultMap .put(FessConfig.CRAWLER_DOCUMENT_SPACE_CHARS, "u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6"); + defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FULLSTOP_CHARS, "u002eu06d4u2e3cu3002"); defaultMap.put(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING, "UTF-8"); defaultMap.put(FessConfig.CRAWLER_WEB_PROTOCOLS, "http,https"); defaultMap.put(FessConfig.CRAWLER_FILE_PROTOCOLS, "file,smb,ftp"); diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java index c11d8d4a5..b54fce9c5 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java @@ -96,6 +96,8 @@ public interface FessProp { public static final String CRAWLER_DOCUMENT_SPACE_CHARS = "crawlerDocumentSpaceChars"; + public static final String CRAWLER_DOCUMENT_FULLSTOP_CHARS = "crawlerDocumentFullstopChars"; + public static final String INDEX_ADMIN_ARRAY_FIELD_SET = "indexAdminArrayFieldSet"; public static final String INDEX_ADMIN_DATE_FIELD_SET = "indexAdminDateFieldSet"; @@ -1444,9 +1446,12 @@ public interface FessProp { String getCrawlerDocumentSpaceChars(); public default int[] getCrawlerDocumentSpaceCharsAsArray() { - int[] spaceChars = (int[]) propMap.get(CRAWLER_DOCUMENT_SPACE_CHARS); + return getCrawlerDocumentCharsAsArray(CRAWLER_DOCUMENT_SPACE_CHARS, getCrawlerDocumentSpaceChars()); + } + + public default int[] getCrawlerDocumentCharsAsArray(final String key, final String spaceStr) { + int[] spaceChars = (int[]) propMap.get(key); if (spaceChars == null) { - final String spaceStr = getCrawlerDocumentSpaceChars(); if (spaceStr.startsWith("u")) { spaceChars = split(spaceStr, "u").get( @@ -1459,11 +1464,29 @@ public interface FessProp { spaceChars[i] = spaceStr.codePointAt(i); } } - propMap.put(CRAWLER_DOCUMENT_SPACE_CHARS, spaceChars); + propMap.put(key, spaceChars); } return spaceChars; } + String getCrawlerDocumentFullstopChars(); + + public default boolean endsWithFullstop(final String s) { + if (StringUtil.isBlank(s)) { + return false; + } + for (final int i : getCrawlerDocumentFullstopCharsAsArray()) { + if (s.endsWith(String.valueOf(i))) { + return true; + } + } + return false; + } + + public default int[] getCrawlerDocumentFullstopCharsAsArray() { + return getCrawlerDocumentCharsAsArray(CRAWLER_DOCUMENT_FULLSTOP_CHARS, getCrawlerDocumentFullstopChars()); + } + String getQueryAdditionalResponseFields(); public default String[] getQueryAdditionalResponseFields(final String... fields) { diff --git a/src/main/java/org/codelibs/fess/util/QueryResponseList.java b/src/main/java/org/codelibs/fess/util/QueryResponseList.java index ffade1f1f..f9d108508 100644 --- a/src/main/java/org/codelibs/fess/util/QueryResponseList.java +++ b/src/main/java/org/codelibs/fess/util/QueryResponseList.java @@ -24,6 +24,7 @@ import java.util.ListIterator; import java.util.Map; import org.apache.commons.lang3.StringUtils; +import org.codelibs.core.lang.StringUtil; import org.codelibs.core.stream.StreamUtil; import org.codelibs.fess.helper.QueryHelper; import org.codelibs.fess.helper.ViewHelper; @@ -41,6 +42,8 @@ import org.slf4j.LoggerFactory; public class QueryResponseList implements List> { + private static final String ELLIPSIS = "..."; + private static final String SCORE = "score"; private static final Logger logger = LoggerFactory.getLogger(QueryResponseList.class); @@ -160,7 +163,10 @@ public class QueryResponseList implements List> { for (int i = 0; i < fragments.length; i++) { texts[i] = fragments[i].string(); } - final String value = StringUtils.join(texts, "..."); + String value = StringUtils.join(texts, ELLIPSIS); + if (StringUtil.isNotBlank(value) && !fessConfig.endsWithFullstop(value)) { + value = value + ELLIPSIS; + } docMap.put(hlPrefix + highlightField.getName(), value); } } diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index a55cb7d2b..82bcc68d4 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -152,6 +152,7 @@ crawler.document.max.alphanum.term.size=20 crawler.document.max.symbol.term.size=10 crawler.document.duplicate.term.removed=false crawler.document.space.chars=u0009u000Au000Bu000Cu000Du001Cu001Du001Eu001Fu0020u00A0u1680u180Eu2000u2001u2002u2003u2004u2005u2006u2007u2008u2009u200Au200Bu200Cu202Fu205Fu3000uFEFFuFFFDu00B6 +crawler.document.fullstop.chars=u002eu06d4u2e3cu3002 crawler.crawling.data.encoding=UTF-8 crawler.web.protocols=http,https crawler.file.protocols=file,smb,ftp diff --git a/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java b/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java index da7d25a10..fd9e0bb4d 100644 --- a/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java +++ b/src/test/java/org/codelibs/fess/mylasta/direction/FessPropTest.java @@ -120,10 +120,25 @@ public class FessPropTest extends UnitFessTestCase { } }; - int[] spaceChars = fessConfig.getCrawlerDocumentSpaceCharsAsArray(); - assertEquals(2, spaceChars.length); - assertEquals(32, spaceChars[0]); - assertEquals(12288, spaceChars[1]); + int[] chars = fessConfig.getCrawlerDocumentSpaceCharsAsArray(); + assertEquals(2, chars.length); + assertEquals(32, chars[0]); + assertEquals(12288, chars[1]); + } + + public void test_getCrawlerDocumentFullstopCharsAsArray() { + FessProp.propMap.clear(); + FessConfig fessConfig = new FessConfig.SimpleImpl() { + @Override + public String getCrawlerDocumentFullstopChars() { + return "u0020u3000"; + } + }; + + int[] chars = fessConfig.getCrawlerDocumentFullstopCharsAsArray(); + assertEquals(2, chars.length); + assertEquals(32, chars[0]); + assertEquals(12288, chars[1]); } public void test_getCrawlerDocumentHtmlPrunedTagsAsArray() throws Exception {