Browse Source

fix #2735 add crawler.document.append.filename

Shinsuke Sugaya 2 years ago
parent
commit
e9c4a0c0b2

+ 4 - 1
src/main/java/org/codelibs/fess/crawler/transformer/AbstractFessFileTransformer.java

@@ -213,6 +213,10 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
             }
             buf.append(contentMeta);
         }
+        final String fileName = getFileName(url, urlEncoding);
+        if (StringUtil.isNotBlank(fileName) && fessConfig.isCrawlerDocumentAppendFilename()) {
+            buf.append(' ').append(fileName);
+        }
         final String bodyBase = buf.toString().trim();
         responseData.addMetaData(Extractor.class.getSimpleName(), extractor);
         final String body = documentHelper.getContent(crawlingConfig, responseData, bodyBase, dataMap);
@@ -232,7 +236,6 @@ public abstract class AbstractFessFileTransformer extends AbstractTransformer im
         putResultDataBody(dataMap, fessConfig.getIndexFieldDigest(),
                 documentHelper.getDigest(responseData, bodyBase, dataMap, fessConfig.getCrawlerDocumentFileMaxDigestLengthAsInteger()));
         // title
-        final String fileName = getFileName(url, urlEncoding);
         if (!hasTitle(dataMap)) {
             final String titleField = fessConfig.getIndexFieldTitle();
             dataMap.remove(titleField);

+ 12 - 3
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

@@ -392,8 +392,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         // content
         final String body = getSingleNodeValue(document, getContentXpath(fessConfig, xpathConfigMap),
                 prunedContent ? node -> pruneNode(node, crawlingConfig) : node -> node);
-        putResultDataBody(dataMap, fessConfig.getIndexFieldContent(),
-                documentHelper.getContent(crawlingConfig, responseData, body, dataMap));
+        final String fileName = getFileName(url, urlEncoding);
+        putResultDataContent(dataMap, responseData, fessConfig, crawlingConfig, documentHelper, body, fileName);
         if ((Constants.TRUE.equalsIgnoreCase(fieldConfigMap.get(fessConfig.getIndexFieldCache()))
                 || fessConfig.isCrawlerDocumentCacheEnabled()) && fessConfig.isSupportedDocumentCacheMimetypes(mimeType)) {
             if (responseData.getContentLength() > 0
@@ -429,7 +429,6 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         // site
         putResultDataBody(dataMap, fessConfig.getIndexFieldSite(), getSite(url, urlEncoding));
         // filename
-        final String fileName = getFileName(url, urlEncoding);
         if (StringUtil.isNotBlank(fileName)) {
             putResultDataBody(dataMap, fessConfig.getIndexFieldFilename(), fileName);
         }
@@ -502,6 +501,16 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         });
     }
 
+    protected void putResultDataContent(final Map<String, Object> dataMap, final ResponseData responseData, final FessConfig fessConfig,
+            final CrawlingConfig crawlingConfig, final DocumentHelper documentHelper, final String body, final String fileName) {
+        final String content = documentHelper.getContent(crawlingConfig, responseData, body, dataMap);
+        if (StringUtil.isNotBlank(fileName) && fessConfig.isCrawlerDocumentAppendFilename()) {
+            putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), content + " " + fileName);
+        } else {
+            putResultDataBody(dataMap, fessConfig.getIndexFieldContent(), content);
+        }
+    }
+
     protected CrawlingConfig getCrawlingConfig(final ResponseData responseData) {
         final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
         return crawlingConfigHelper.get(responseData.getSessionId());

+ 30 - 4
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -325,6 +325,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. true */
     String CRAWLER_DOCUMENT_APPEND_DATA = "crawler.document.append.data";
 
+    /** The key of the configuration. e.g. false */
+    String CRAWLER_DOCUMENT_APPEND_FILENAME = "crawler.document.append.filename";
+
     /** The key of the configuration. e.g. 20 */
     String CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE = "crawler.document.max.alphanum.term.size";
 
@@ -1164,7 +1167,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. org.codelibs,org.dbflute,org.lastaflute */
     String LOGGING_APP_PACKAGES = "logging.app.packages";
 
-    /** The key of the configuration. e.g. 4000 */
+    /** The key of the configuration. e.g. 10000 */
     String FORM_ADMIN_MAX_INPUT_SIZE = "form.admin.max.input.size";
 
     /** The key of the configuration. e.g. false */
@@ -2651,6 +2654,20 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     boolean isCrawlerDocumentAppendData();
 
+    /**
+     * Get the value for the key 'crawler.document.append.filename'. <br>
+     * The value is, e.g. false <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getCrawlerDocumentAppendFilename();
+
+    /**
+     * Is the property for the key 'crawler.document.append.filename' true? <br>
+     * The value is, e.g. false <br>
+     * @return The determination, true or false. (if not found, exception but basically no way)
+     */
+    boolean isCrawlerDocumentAppendFilename();
+
     /**
      * Get the value for the key 'crawler.document.max.alphanum.term.size'. <br>
      * The value is, e.g. 20 <br>
@@ -5422,14 +5439,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
 
     /**
      * Get the value for the key 'form.admin.max.input.size'. <br>
-     * The value is, e.g. 4000 <br>
+     * The value is, e.g. 10000 <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
     String getFormAdminMaxInputSize();
 
     /**
      * Get the value for the key 'form.admin.max.input.size' as {@link Integer}. <br>
-     * The value is, e.g. 4000 <br>
+     * The value is, e.g. 10000 <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      * @throws NumberFormatException When the property is not integer.
      */
@@ -8009,6 +8026,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return is(FessConfig.CRAWLER_DOCUMENT_APPEND_DATA);
         }
 
+        public String getCrawlerDocumentAppendFilename() {
+            return get(FessConfig.CRAWLER_DOCUMENT_APPEND_FILENAME);
+        }
+
+        public boolean isCrawlerDocumentAppendFilename() {
+            return is(FessConfig.CRAWLER_DOCUMENT_APPEND_FILENAME);
+        }
+
         public String getCrawlerDocumentMaxAlphanumTermSize() {
             return get(FessConfig.CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE);
         }
@@ -10727,6 +10752,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_UNKNOWN_HOSTNAME, "unknown");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_USE_SITE_ENCODING_ON_ENGLISH, "false");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_APPEND_DATA, "true");
+            defaultMap.put(FessConfig.CRAWLER_DOCUMENT_APPEND_FILENAME, "false");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_MAX_ALPHANUM_TERM_SIZE, "20");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_MAX_SYMBOL_TERM_SIZE, "10");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_DUPLICATE_TERM_REMOVED, "false");
@@ -10978,7 +11004,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
                     "filetype,created,click_count,title,doc_id,url,score,site,filename,host,digest,boost,mimetype,favorite_count,_id,lang,last_modified,content_length,timestamp");
             defaultMap.put(FessConfig.LOGGING_SEARCH_USE_LOGFILE, "true");
             defaultMap.put(FessConfig.LOGGING_APP_PACKAGES, "org.codelibs,org.dbflute,org.lastaflute");
-            defaultMap.put(FessConfig.FORM_ADMIN_MAX_INPUT_SIZE, "4000");
+            defaultMap.put(FessConfig.FORM_ADMIN_MAX_INPUT_SIZE, "10000");
             defaultMap.put(FessConfig.FORM_ADMIN_LABEL_IN_CONFIG_ENABLED, "false");
             defaultMap.put(FessConfig.FORM_ADMIN_DEFAULT_TEMPLATE_NAME, "__TEMPLATE__");
             defaultMap.put(FessConfig.OSDD_LINK_ENABLED, "true");

+ 1 - 0
src/main/resources/fess_config.properties

@@ -205,6 +205,7 @@ crawler.document.site.encoding=UTF-8
 crawler.document.unknown.hostname=unknown
 crawler.document.use.site.encoding.on.english=false
 crawler.document.append.data=true
+crawler.document.append.filename=false
 crawler.document.max.alphanum.term.size=20
 crawler.document.max.symbol.term.size=10
 crawler.document.duplicate.term.removed=false