Kaynağa Gözat

fix #516 : add crawler.document.duplicate.term.removed

Shinsuke Sugaya 9 yıl önce
ebeveyn
işleme
ef84011e80

+ 7 - 2
src/main/java/org/codelibs/fess/helper/DocumentHelper.java

@@ -36,8 +36,8 @@ public class DocumentHelper implements Serializable {
 
         final int maxAlphanumTermSize = getMaxAlphanumTermSize();
         final int maxSymbolTermSize = getMaxSymbolTermSize();
-        // TODO removeDuplication
-        return TextUtil.normalizeText(content, content.length(), maxAlphanumTermSize, maxSymbolTermSize, false);
+        final boolean duplicateTermRemoved = isDuplicateTermRemoved();
+        return TextUtil.normalizeText(content, content.length(), maxAlphanumTermSize, maxSymbolTermSize, duplicateTermRemoved);
     }
 
     protected int getMaxAlphanumTermSize() {
@@ -50,6 +50,11 @@ public class DocumentHelper implements Serializable {
         return fessConfig.getCrawlerDocumentMaxSymbolTermSizeAsInteger().intValue();
     }
 
+    protected boolean isDuplicateTermRemoved() {
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        return fessConfig.isCrawlerDocumentDuplicateTermRemoved();
+    }
+
     public String getDigest(final ResponseData responseData, final String content, final Map<String, Object> dataMap, final int maxWidth) {
         if (content == null) {
             return StringUtil.EMPTY; // empty

+ 1 - 1
src/main/java/org/codelibs/fess/mylasta/action/FessLabels.java

@@ -725,7 +725,7 @@ public class FessLabels extends ActionMessages {
     /** The key of the message: {0} results */
     public static final String LABELS_searchoptions_num = "{labels.searchoptions_num}";
 
-    /** The key of the message: Preferred Languages */
+    /** The key of the message: Languages */
     public static final String LABELS_searchoptions_menu_lang = "{labels.searchoptions_menu_lang}";
 
     /** The key of the message: Labels */

+ 25 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -137,6 +137,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. 10 */
     String CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE = "crawler.document.max.symbol.term.size";
 
+    /** The key of the configuration. e.g. false */
+    String CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED = "crawler.document.duplicate.term.removed";
+
     /** The key of the configuration. e.g. UTF-8 */
     String CRAWLER_CRAWLING_DATA_ENCODING = "crawler.crawling.data.encoding";
 
@@ -1214,6 +1217,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     Integer getCrawlerDocumentMaxSymbolTermSizeAsInteger();
 
+    /**
+     * Get the value for the key 'crawler.document.duplicate.term.removed'. <br>
+     * The value is, e.g. false <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getCrawlerDocumentDuplicateTermRemoved();
+
+    /**
+     * Is the property for the key 'crawler.document.duplicate.term.removed' true? <br>
+     * The value is, e.g. false <br>
+     * @return The determination, true or false. (if not found, exception but basically no way)
+     */
+    boolean isCrawlerDocumentDuplicateTermRemoved();
+
     /**
      * Get the value for the key 'crawler.crawling.data.encoding'. <br>
      * The value is, e.g. UTF-8 <br>
@@ -3782,6 +3799,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return getAsInteger(FessConfig.CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE);
         }
 
+        public String getCrawlerDocumentDuplicateTermRemoved() {
+            return get(FessConfig.CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED);
+        }
+
+        public boolean isCrawlerDocumentDuplicateTermRemoved() {
+            return is(FessConfig.CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED);
+        }
+
         public String getCrawlerCrawlingDataEncoding() {
             return get(FessConfig.CRAWLER_CRAWLING_DATA_ENCODING);
         }

+ 1 - 0
src/main/resources/fess_config.properties

@@ -84,6 +84,7 @@ crawler.document.use.site.encoding.on.english=false
 crawler.document.append.data=true
 crawler.document.max.alphanum.term.size=20
 crawler.document.max.symbol.term.size=10
+crawler.document.duplicate.term.removed=false
 crawler.crawling.data.encoding=UTF-8
 crawler.web.protocols=http,https
 crawler.file.protocols=file,smb,ftp