diff --git a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java index 321147695..8093d8ef8 100644 --- a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java +++ b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java @@ -36,8 +36,8 @@ public class DocumentHelper implements Serializable { final int maxAlphanumTermSize = getMaxAlphanumTermSize(); final int maxSymbolTermSize = getMaxSymbolTermSize(); - // TODO removeDuplication - return TextUtil.normalizeText(content, content.length(), maxAlphanumTermSize, maxSymbolTermSize, false); + final boolean duplicateTermRemoved = isDuplicateTermRemoved(); + return TextUtil.normalizeText(content, content.length(), maxAlphanumTermSize, maxSymbolTermSize, duplicateTermRemoved); } protected int getMaxAlphanumTermSize() { @@ -50,6 +50,11 @@ public class DocumentHelper implements Serializable { return fessConfig.getCrawlerDocumentMaxSymbolTermSizeAsInteger().intValue(); } + protected boolean isDuplicateTermRemoved() { + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + return fessConfig.isCrawlerDocumentDuplicateTermRemoved(); + } + public String getDigest(final ResponseData responseData, final String content, final Map dataMap, final int maxWidth) { if (content == null) { return StringUtil.EMPTY; // empty diff --git a/src/main/java/org/codelibs/fess/mylasta/action/FessLabels.java b/src/main/java/org/codelibs/fess/mylasta/action/FessLabels.java index 359e77253..371de2679 100644 --- a/src/main/java/org/codelibs/fess/mylasta/action/FessLabels.java +++ b/src/main/java/org/codelibs/fess/mylasta/action/FessLabels.java @@ -725,7 +725,7 @@ public class FessLabels extends ActionMessages { /** The key of the message: {0} results */ public static final String LABELS_searchoptions_num = "{labels.searchoptions_num}"; - /** The key of the message: Preferred Languages */ + /** The key of the message: Languages */ public static final String LABELS_searchoptions_menu_lang = "{labels.searchoptions_menu_lang}"; /** The key of the message: Labels */ diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index a24b89509..95ca090ad 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -137,6 +137,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. 10 */ String CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE = "crawler.document.max.symbol.term.size"; + /** The key of the configuration. e.g. false */ + String CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED = "crawler.document.duplicate.term.removed"; + /** The key of the configuration. e.g. UTF-8 */ String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding"; @@ -1214,6 +1217,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ Integer getCrawlerDocumentMaxSymbolTermSizeAsInteger(); + /** + * Get the value for the key 'crawler.document.duplicate.term.removed'.
+ * The value is, e.g. false
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getCrawlerDocumentDuplicateTermRemoved(); + + /** + * Is the property for the key 'crawler.document.duplicate.term.removed' true?
+ * The value is, e.g. false
+ * @return The determination, true or false. (if not found, exception but basically no way) + */ + boolean isCrawlerDocumentDuplicateTermRemoved(); + /** * Get the value for the key 'crawler.crawling.data.encoding'.
* The value is, e.g. UTF-8
@@ -3782,6 +3799,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return getAsInteger(FessConfig.CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE); } + public String getCrawlerDocumentDuplicateTermRemoved() { + return get(FessConfig.CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED); + } + + public boolean isCrawlerDocumentDuplicateTermRemoved() { + return is(FessConfig.CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED); + } + public String getCrawlerCrawlingDataEncoding() { return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING); } diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index efc2af79e..8b576b679 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -84,6 +84,7 @@ crawler.document.use.site.encoding.on.english=false crawler.document.append.data=true crawler.document.max.alphanum.term.size=20 crawler.document.max.symbol.term.size=10 +crawler.document.duplicate.term.removed=false crawler.crawling.data.encoding=UTF-8 crawler.web.protocols=http,https crawler.file.protocols=file,smb,ftp