fix #1437 add crawler.document.html.default.lang
This commit is contained in:
parent
6dd9d20e6b
commit
db97ecba9b
4 changed files with 38 additions and 1 deletions
|
@ -333,7 +333,7 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
putResultDataBody(dataMap, fessConfig.getIndexFieldExpires(), documentExpires);
|
||||
}
|
||||
// lang
|
||||
final String lang = systemHelper.normalizeLang(getSingleNodeValue(document, getLangXpath(fessConfig, xpathConfigMap), true));
|
||||
final String lang = systemHelper.normalizeHtmlLang(getSingleNodeValue(document, getLangXpath(fessConfig, xpathConfigMap), true));
|
||||
if (lang != null) {
|
||||
putResultDataBody(dataMap, fessConfig.getIndexFieldLang(), lang);
|
||||
}
|
||||
|
|
|
@ -263,6 +263,15 @@ public class SystemHelper {
|
|||
return StringUtils.abbreviate(str, ComponentUtil.getFessConfig().getMaxLogOutputLengthAsInteger().intValue());
|
||||
}
|
||||
|
||||
public String normalizeHtmlLang(final String value) {
|
||||
String defaultLang = ComponentUtil.getFessConfig().getCrawlerDocumentHtmlDefaultLang();
|
||||
if (StringUtil.isNotBlank(defaultLang)) {
|
||||
return defaultLang;
|
||||
}
|
||||
|
||||
return normalizeLang(value);
|
||||
}
|
||||
|
||||
public String normalizeLang(final String value) {
|
||||
if (StringUtil.isBlank(value)) {
|
||||
return null;
|
||||
|
|
|
@ -246,6 +246,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. 120 */
|
||||
String CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH = "crawler.document.html.max.digest.length";
|
||||
|
||||
/** The key of the configuration. e.g. */
|
||||
String CRAWLER_DOCUMENT_HTML_DEFAULT_LANG = "crawler.document.html.default.lang";
|
||||
|
||||
/** The key of the configuration. e.g. */
|
||||
String CRAWLER_DOCUMENT_FILE_NAME_ENCODING = "crawler.document.file.name.encoding";
|
||||
|
||||
|
@ -1948,6 +1951,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
Integer getCrawlerDocumentHtmlMaxDigestLengthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.default.lang'. <br>
|
||||
* The value is, e.g. <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getCrawlerDocumentHtmlDefaultLang();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.html.default.lang' as {@link Integer}. <br>
|
||||
* The value is, e.g. <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getCrawlerDocumentHtmlDefaultLangAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'crawler.document.file.name.encoding'. <br>
|
||||
* The value is, e.g. <br>
|
||||
|
@ -5777,6 +5795,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentHtmlDefaultLang() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG);
|
||||
}
|
||||
|
||||
public Integer getCrawlerDocumentHtmlDefaultLangAsInteger() {
|
||||
return getAsInteger(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG);
|
||||
}
|
||||
|
||||
public String getCrawlerDocumentFileNameEncoding() {
|
||||
return get(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING);
|
||||
}
|
||||
|
@ -7724,6 +7750,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH, "//LINK[@rel='canonical']/@href");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_PRUNED_TAGS, "noscript,script,style,header,footer,nav,a[rel=nofollow]");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH, "120");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG, "");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_NAME_ENCODING, "");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_NO_TITLE_LABEL, "No title.");
|
||||
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_FILE_IGNORE_EMPTY_CONTENT, "false");
|
||||
|
|
|
@ -140,6 +140,7 @@ crawler.document.html.digest.xpath=//META[@name='description']/@content
|
|||
crawler.document.html.canonical.xpath=//LINK[@rel='canonical']/@href
|
||||
crawler.document.html.pruned.tags=noscript,script,style,header,footer,nav,a[rel=nofollow]
|
||||
crawler.document.html.max.digest.length=120
|
||||
crawler.document.html.default.lang=
|
||||
|
||||
# file
|
||||
crawler.document.file.name.encoding=
|
||||
|
|
Loading…
Add table
Reference in a new issue