فهرست منبع

fix #2803 Include aside tag in HTML crawler's pruned tags to enhance content relevance.

Shinsuke Sugaya 1 سال پیش
والد
کامیت
f6b2ef3b06

+ 3 - 3
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -412,7 +412,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. //LINK[@rel='canonical'][1]/@href */
     String CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH = "crawler.document.html.canonical.xpath";
 
-    /** The key of the configuration. e.g. noscript,script,style,header,footer,nav,a[rel=nofollow] */
+    /** The key of the configuration. e.g. noscript,script,style,header,footer,aside,nav,a[rel=nofollow] */
     String CRAWLER_DOCUMENT_HTML_PRUNED_TAGS = "crawler.document.html.pruned.tags";
 
     /** The key of the configuration. e.g. 120 */
@@ -2972,7 +2972,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
 
     /**
      * Get the value for the key 'crawler.document.html.pruned.tags'. <br>
-     * The value is, e.g. noscript,script,style,header,footer,nav,a[rel=nofollow] <br>
+     * The value is, e.g. noscript,script,style,header,footer,aside,nav,a[rel=nofollow] <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
     String getCrawlerDocumentHtmlPrunedTags();
@@ -10916,7 +10916,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DIGEST_XPATH, "//META[@name='description']/@content");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH, "//LINK[@rel='canonical'][1]/@href");
-            defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_PRUNED_TAGS, "noscript,script,style,header,footer,nav,a[rel=nofollow]");
+            defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_PRUNED_TAGS, "noscript,script,style,header,footer,aside,nav,a[rel=nofollow]");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH, "120");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG, "");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_INCLUDE_INDEX_PATTERNS, "");

+ 1 - 1
src/main/resources/fess_config.properties

@@ -239,7 +239,7 @@ crawler.document.html.content.xpath=//BODY
 crawler.document.html.lang.xpath=//HTML/@lang
 crawler.document.html.digest.xpath=//META[@name='description']/@content
 crawler.document.html.canonical.xpath=//LINK[@rel='canonical'][1]/@href
-crawler.document.html.pruned.tags=noscript,script,style,header,footer,nav,a[rel=nofollow]
+crawler.document.html.pruned.tags=noscript,script,style,header,footer,aside,nav,a[rel=nofollow]
 crawler.document.html.max.digest.length=120
 crawler.document.html.default.lang=
 crawler.document.html.default.include.index.patterns=