Shinsuke Sugaya пре 8 година
родитељ
комит
eb8e5864dd

+ 14 - 2
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

@@ -342,8 +342,20 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
 
     protected String getCanonicalUrl(final ResponseData responseData, final Document document) {
         final String canonicalUrl = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlCannonicalXpath(), false);
-        if (StringUtil.isNotBlank(canonicalUrl)) {
-            return canonicalUrl;
+        if (StringUtil.isBlank(canonicalUrl)) {
+            return null;
+        }
+        if (canonicalUrl.startsWith("/")) {
+            return normalizeCanonicalUrl(responseData.getUrl(), canonicalUrl);
+        }
+        return canonicalUrl;
+    }
+
+    protected String normalizeCanonicalUrl(final String baseUrl, final String canonicalUrl) {
+        try {
+            return new URL(new URL(baseUrl), canonicalUrl).toString();
+        } catch (MalformedURLException e) {
+            logger.warn("Invalid canonical url: " + baseUrl + " : " + canonicalUrl, e);
         }
         return null;
     }

+ 26 - 0
src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java

@@ -297,4 +297,30 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
         value = transformer.getSingleNodeValue(document, "//META[@name='keywords']/@content|//BODY", false);
         assertEquals("bbb aaa", value);
     }
+
+    public void test_normalizeCanonicalUrl() throws Exception {
+        final FessXpathTransformer transformer = new FessXpathTransformer();
+        String value;
+
+        value = transformer.normalizeCanonicalUrl("http://hoge.com/", "a");
+        assertEquals("http://hoge.com/a", value);
+
+        value = transformer.normalizeCanonicalUrl("http://hoge.com/", "aaa");
+        assertEquals("http://hoge.com/aaa", value);
+
+        value = transformer.normalizeCanonicalUrl("http://hoge.com/", "/aaa");
+        assertEquals("http://hoge.com/aaa", value);
+
+        value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb", "aaa");
+        assertEquals("http://hoge.com/aaa", value);
+
+        value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb/", "aaa");
+        assertEquals("http://hoge.com/bbb/aaa", value);
+
+        value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb/", "/aaa");
+        assertEquals("http://hoge.com/aaa", value);
+
+        value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb", "/aaa");
+        assertEquals("http://hoge.com/aaa", value);
+    }
 }