diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java index 293dd3644..e6dc9469e 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java @@ -333,7 +333,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf putResultDataBody(dataMap, fessConfig.getIndexFieldExpires(), documentExpires); } // lang - final String lang = systemHelper.normalizeLang(getSingleNodeValue(document, getLangXpath(fessConfig, xpathConfigMap), true)); + final String lang = systemHelper.normalizeHtmlLang(getSingleNodeValue(document, getLangXpath(fessConfig, xpathConfigMap), true)); if (lang != null) { putResultDataBody(dataMap, fessConfig.getIndexFieldLang(), lang); } diff --git a/src/main/java/org/codelibs/fess/helper/SystemHelper.java b/src/main/java/org/codelibs/fess/helper/SystemHelper.java index 9f067cc49..f3ad0f864 100644 --- a/src/main/java/org/codelibs/fess/helper/SystemHelper.java +++ b/src/main/java/org/codelibs/fess/helper/SystemHelper.java @@ -263,6 +263,15 @@ public class SystemHelper { return StringUtils.abbreviate(str, ComponentUtil.getFessConfig().getMaxLogOutputLengthAsInteger().intValue()); } + public String normalizeHtmlLang(final String value) { + String defaultLang = ComponentUtil.getFessConfig().getCrawlerDocumentHtmlDefaultLang(); + if (StringUtil.isNotBlank(defaultLang)) { + return defaultLang; + } + + return normalizeLang(value); + } + public String normalizeLang(final String value) { if (StringUtil.isBlank(value)) { return null; diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index 72cacdd3b..ce6ad93f2 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -246,6 +246,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. 120 */ String CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH = "crawler.document.html.max.digest.length"; + /** The key of the configuration. e.g. */ + String CRAWLER_DOCUMENT_HTML_DEFAULT_LANG = "crawler.document.html.default.lang"; + /** The key of the configuration. e.g. */ String CRAWLER_DOCUMENT_FILE_NAME_ENCODING = "crawler.document.file.name.encoding"; @@ -1948,6 +1951,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ Integer getCrawlerDocumentHtmlMaxDigestLengthAsInteger(); + /** + * Get the value for the key 'crawler.document.html.default.lang'.
+ * The value is, e.g.
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getCrawlerDocumentHtmlDefaultLang(); + + /** + * Get the value for the key 'crawler.document.html.default.lang' as {@link Integer}.
+ * The value is, e.g.
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getCrawlerDocumentHtmlDefaultLangAsInteger(); + /** * Get the value for the key 'crawler.document.file.name.encoding'.
* The value is, e.g.
@@ -5777,6 +5795,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH); } + public String getCrawlerDocumentHtmlDefaultLang() { + return get(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG); + } + + public Integer getCrawlerDocumentHtmlDefaultLangAsInteger() { + return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG); + } + public String getCrawlerDocumentFileNameEncoding() { return get(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING); } @@ -7724,6 +7750,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH, "//LINK[@rel='canonical']/@href"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_PRUNED_TAGS, "noscript,script,style,header,footer,nav,a[rel=nofollow]"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH, "120"); + defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG, ""); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING, ""); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_NO_TITLE_LABEL, "No title."); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT, "false"); diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index 42ea2f659..96dec7675 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -140,6 +140,7 @@ crawler.document.html.digest.xpath=//META[@name='description']/@content crawler.document.html.canonical.xpath=//LINK[@rel='canonical']/@href crawler.document.html.pruned.tags=noscript,script,style,header,footer,nav,a[rel=nofollow] crawler.document.html.max.digest.length=120 +crawler.document.html.default.lang= # file crawler.document.file.name.encoding=