diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
index 293dd3644..e6dc9469e 100644
--- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
+++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java
@@ -333,7 +333,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
putResultDataBody(dataMap, fessConfig.getIndexFieldExpires(), documentExpires);
}
// lang
- final String lang = systemHelper.normalizeLang(getSingleNodeValue(document, getLangXpath(fessConfig, xpathConfigMap), true));
+ final String lang = systemHelper.normalizeHtmlLang(getSingleNodeValue(document, getLangXpath(fessConfig, xpathConfigMap), true));
if (lang != null) {
putResultDataBody(dataMap, fessConfig.getIndexFieldLang(), lang);
}
diff --git a/src/main/java/org/codelibs/fess/helper/SystemHelper.java b/src/main/java/org/codelibs/fess/helper/SystemHelper.java
index 9f067cc49..f3ad0f864 100644
--- a/src/main/java/org/codelibs/fess/helper/SystemHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/SystemHelper.java
@@ -263,6 +263,15 @@ public class SystemHelper {
return StringUtils.abbreviate(str, ComponentUtil.getFessConfig().getMaxLogOutputLengthAsInteger().intValue());
}
+ public String normalizeHtmlLang(final String value) {
+ String defaultLang = ComponentUtil.getFessConfig().getCrawlerDocumentHtmlDefaultLang();
+ if (StringUtil.isNotBlank(defaultLang)) {
+ return defaultLang;
+ }
+
+ return normalizeLang(value);
+ }
+
public String normalizeLang(final String value) {
if (StringUtil.isBlank(value)) {
return null;
diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java
index 72cacdd3b..ce6ad93f2 100644
--- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java
+++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java
@@ -246,6 +246,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. 120 */
String CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH = "crawler.document.html.max.digest.length";
+ /** The key of the configuration. e.g. */
+ String CRAWLER_DOCUMENT_HTML_DEFAULT_LANG = "crawler.document.html.default.lang";
+
/** The key of the configuration. e.g. */
String CRAWLER_DOCUMENT_FILE_NAME_ENCODING = "crawler.document.file.name.encoding";
@@ -1948,6 +1951,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
Integer getCrawlerDocumentHtmlMaxDigestLengthAsInteger();
+ /**
+ * Get the value for the key 'crawler.document.html.default.lang'.
+ * The value is, e.g.
+ * @return The value of found property. (NotNull: if not found, exception but basically no way)
+ */
+ String getCrawlerDocumentHtmlDefaultLang();
+
+ /**
+ * Get the value for the key 'crawler.document.html.default.lang' as {@link Integer}.
+ * The value is, e.g.
+ * @return The value of found property. (NotNull: if not found, exception but basically no way)
+ * @throws NumberFormatException When the property is not integer.
+ */
+ Integer getCrawlerDocumentHtmlDefaultLangAsInteger();
+
/**
* Get the value for the key 'crawler.document.file.name.encoding'.
* The value is, e.g.
@@ -5777,6 +5795,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH);
}
+ public String getCrawlerDocumentHtmlDefaultLang() {
+ return get(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG);
+ }
+
+ public Integer getCrawlerDocumentHtmlDefaultLangAsInteger() {
+ return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG);
+ }
+
public String getCrawlerDocumentFileNameEncoding() {
return get(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING);
}
@@ -7724,6 +7750,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH, "//LINK[@rel='canonical']/@href");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_PRUNED_TAGS, "noscript,script,style,header,footer,nav,a[rel=nofollow]");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH, "120");
+ defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG, "");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING, "");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_NO_TITLE_LABEL, "No title.");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT, "false");
diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties
index 42ea2f659..96dec7675 100644
--- a/src/main/resources/fess_config.properties
+++ b/src/main/resources/fess_config.properties
@@ -140,6 +140,7 @@ crawler.document.html.digest.xpath=//META[@name='description']/@content
crawler.document.html.canonical.xpath=//LINK[@rel='canonical']/@href
crawler.document.html.pruned.tags=noscript,script,style,header,footer,nav,a[rel=nofollow]
crawler.document.html.max.digest.length=120
+crawler.document.html.default.lang=
# file
crawler.document.file.name.encoding=