diff --git a/src/main/java/jp/sf/fess/helper/SystemHelper.java b/src/main/java/jp/sf/fess/helper/SystemHelper.java index d256444a4..0e5ad8119 100644 --- a/src/main/java/jp/sf/fess/helper/SystemHelper.java +++ b/src/main/java/jp/sf/fess/helper/SystemHelper.java @@ -123,6 +123,8 @@ public class SystemHelper implements Serializable { public String idField = "id"; + public String langField = "lang_s"; + @InitMethod public void init() { final File[] files = ResourceUtil.getJarFiles(launcherFileNamePrefix); @@ -430,4 +432,24 @@ public class SystemHelper implements Serializable { return StringUtils.abbreviate(str, maxTextLength); } + public String normalizeLang(final String value) { + if (StringUtil.isBlank(value)) { + return null; + } + + final StringBuilder buf = new StringBuilder(value.length()); + for (int i = 0; i < value.length(); i++) { + final char c = value.charAt(i); + if ('a' <= c && c <= 'z') { + buf.append(c); + } else if (buf.length() > 0) { + break; + } + } + if (buf.length() > 0) { + return buf.toString(); + } + return null; + } + } diff --git a/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java b/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java index 1e792e249..794380d95 100644 --- a/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java +++ b/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java @@ -80,6 +80,8 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { public String contentXpath = "//BODY"; + public String langXpath = "//HTML/@lang"; + public String digestXpath = "//META[@name='description']/@content"; public String canonicalXpath = "//LINK[@rel='canonical']/@href"; @@ -245,6 +247,12 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { putResultDataBody(dataMap, systemHelper.expiresField, FessFunctions.formatDate(documentExpires)); } + // lang + final String lang = systemHelper.normalizeLang(getSingleNodeValue( + document, langXpath, true)); + if (lang != null) { + putResultDataBody(dataMap, systemHelper.langField, lang); + } // title // content putResultDataBody(dataMap, "content", diff --git a/src/test/java/jp/sf/fess/helper/SystemHelperTest.java b/src/test/java/jp/sf/fess/helper/SystemHelperTest.java index 6a1669cd5..f9b0056d6 100644 --- a/src/test/java/jp/sf/fess/helper/SystemHelperTest.java +++ b/src/test/java/jp/sf/fess/helper/SystemHelperTest.java @@ -40,4 +40,30 @@ public class SystemHelperTest extends S2TestCase { path = "[]^$.*+?,{}|%\\"; assertEquals(path, systemHelper.encodeUrlFilter(path)); } + + public void test_normalizeLang() { + String value = null; + assertNull(systemHelper.normalizeLang(value)); + + value = ""; + assertNull(systemHelper.normalizeLang(value)); + + value = "ja"; + assertEquals("ja", systemHelper.normalizeLang(value)); + + value = " ja "; + assertEquals("ja", systemHelper.normalizeLang(value)); + + value = "_ja"; + assertEquals("ja", systemHelper.normalizeLang(value)); + + value = "ja-JP"; + assertEquals("ja", systemHelper.normalizeLang(value)); + + value = "ja_JP"; + assertEquals("ja", systemHelper.normalizeLang(value)); + + value = "ja_JP_AAA"; + assertEquals("ja", systemHelper.normalizeLang(value)); + } }