diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java index f0fb3c221..0678ce0d4 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java @@ -342,8 +342,20 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf protected String getCanonicalUrl(final ResponseData responseData, final Document document) { final String canonicalUrl = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlCannonicalXpath(), false); - if (StringUtil.isNotBlank(canonicalUrl)) { - return canonicalUrl; + if (StringUtil.isBlank(canonicalUrl)) { + return null; + } + if (canonicalUrl.startsWith("/")) { + return normalizeCanonicalUrl(responseData.getUrl(), canonicalUrl); + } + return canonicalUrl; + } + + protected String normalizeCanonicalUrl(final String baseUrl, final String canonicalUrl) { + try { + return new URL(new URL(baseUrl), canonicalUrl).toString(); + } catch (MalformedURLException e) { + logger.warn("Invalid canonical url: " + baseUrl + " : " + canonicalUrl, e); } return null; } diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java index 192b167f3..3ff2be279 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java @@ -297,4 +297,30 @@ public class FessXpathTransformerTest extends UnitFessTestCase { value = transformer.getSingleNodeValue(document, "//META[@name='keywords']/@content|//BODY", false); assertEquals("bbb aaa", value); } + + public void test_normalizeCanonicalUrl() throws Exception { + final FessXpathTransformer transformer = new FessXpathTransformer(); + String value; + + value = transformer.normalizeCanonicalUrl("http://hoge.com/", "a"); + assertEquals("http://hoge.com/a", value); + + value = transformer.normalizeCanonicalUrl("http://hoge.com/", "aaa"); + assertEquals("http://hoge.com/aaa", value); + + value = transformer.normalizeCanonicalUrl("http://hoge.com/", "/aaa"); + assertEquals("http://hoge.com/aaa", value); + + value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb", "aaa"); + assertEquals("http://hoge.com/aaa", value); + + value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb/", "aaa"); + assertEquals("http://hoge.com/bbb/aaa", value); + + value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb/", "/aaa"); + assertEquals("http://hoge.com/aaa", value); + + value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb", "/aaa"); + assertEquals("http://hoge.com/aaa", value); + } }