Przeglądaj źródła

fix #1437 add crawler.document.html.default.lang

Shinsuke Sugaya 7 lat temu
rodzic
commit
db97ecba9b

+ 1 - 1
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

@@ -333,7 +333,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
             putResultDataBody(dataMap, fessConfig.getIndexFieldExpires(), documentExpires);
         }
         // lang
-        final String lang = systemHelper.normalizeLang(getSingleNodeValue(document, getLangXpath(fessConfig, xpathConfigMap), true));
+        final String lang = systemHelper.normalizeHtmlLang(getSingleNodeValue(document, getLangXpath(fessConfig, xpathConfigMap), true));
         if (lang != null) {
             putResultDataBody(dataMap, fessConfig.getIndexFieldLang(), lang);
         }

+ 9 - 0
src/main/java/org/codelibs/fess/helper/SystemHelper.java

@@ -263,6 +263,15 @@ public class SystemHelper {
         return StringUtils.abbreviate(str, ComponentUtil.getFessConfig().getMaxLogOutputLengthAsInteger().intValue());
     }
 
+    public String normalizeHtmlLang(final String value) {
+        String defaultLang = ComponentUtil.getFessConfig().getCrawlerDocumentHtmlDefaultLang();
+        if (StringUtil.isNotBlank(defaultLang)) {
+            return defaultLang;
+        }
+
+        return normalizeLang(value);
+    }
+
     public String normalizeLang(final String value) {
         if (StringUtil.isBlank(value)) {
             return null;

+ 27 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -246,6 +246,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. 120 */
     String CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH = "crawler.document.html.max.digest.length";
 
+    /** The key of the configuration. e.g.  */
+    String CRAWLER_DOCUMENT_HTML_DEFAULT_LANG = "crawler.document.html.default.lang";
+
     /** The key of the configuration. e.g.  */
     String CRAWLER_DOCUMENT_FILE_NAME_ENCODING = "crawler.document.file.name.encoding";
 
@@ -1948,6 +1951,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     Integer getCrawlerDocumentHtmlMaxDigestLengthAsInteger();
 
+    /**
+     * Get the value for the key 'crawler.document.html.default.lang'. <br>
+     * The value is, e.g.  <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getCrawlerDocumentHtmlDefaultLang();
+
+    /**
+     * Get the value for the key 'crawler.document.html.default.lang' as {@link Integer}. <br>
+     * The value is, e.g.  <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getCrawlerDocumentHtmlDefaultLangAsInteger();
+
     /**
      * Get the value for the key 'crawler.document.file.name.encoding'. <br>
      * The value is, e.g.  <br>
@@ -5777,6 +5795,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH);
         }
 
+        public String getCrawlerDocumentHtmlDefaultLang() {
+            return get(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG);
+        }
+
+        public Integer getCrawlerDocumentHtmlDefaultLangAsInteger() {
+            return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG);
+        }
+
         public String getCrawlerDocumentFileNameEncoding() {
             return get(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING);
         }
@@ -7724,6 +7750,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH, "//LINK[@rel='canonical']/@href");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_PRUNED_TAGS, "noscript,script,style,header,footer,nav,a[rel=nofollow]");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH, "120");
+            defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG, "");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING, "");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_NO_TITLE_LABEL, "No title.");
             defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT, "false");

+ 1 - 0
src/main/resources/fess_config.properties

@@ -140,6 +140,7 @@ crawler.document.html.digest.xpath=//META[@name='description']/@content
 crawler.document.html.canonical.xpath=//LINK[@rel='canonical']/@href
 crawler.document.html.pruned.tags=noscript,script,style,header,footer,nav,a[rel=nofollow]
 crawler.document.html.max.digest.length=120
+crawler.document.html.default.lang=
 
 # file
 crawler.document.file.name.encoding=