fix #723 normalize canonical url

This commit is contained in:
Shinsuke Sugaya 2016-10-01 10:21:19 +09:00
parent 1b18c63069
commit 5b33cf0e29
2 changed files with 40 additions and 2 deletions

View file

@ -347,8 +347,20 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
protected String getCanonicalUrl(final ResponseData responseData, final Document document) {
final String canonicalUrl = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlCannonicalXpath(), false);
if (StringUtil.isNotBlank(canonicalUrl)) {
return canonicalUrl;
if (StringUtil.isBlank(canonicalUrl)) {
return null;
}
if (canonicalUrl.startsWith("/")) {
return normalizeCanonicalUrl(responseData.getUrl(), canonicalUrl);
}
return canonicalUrl;
}
protected String normalizeCanonicalUrl(final String baseUrl, final String canonicalUrl) {
try {
return new URL(new URL(baseUrl), canonicalUrl).toString();
} catch (MalformedURLException e) {
logger.warn("Invalid canonical url: " + baseUrl + " : " + canonicalUrl, e);
}
return null;
}

View file

@ -297,4 +297,30 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
value = transformer.getSingleNodeValue(document, "//META[@name='keywords']/@content|//BODY", false);
assertEquals("bbb aaa", value);
}
public void test_normalizeCanonicalUrl() throws Exception {
final FessXpathTransformer transformer = new FessXpathTransformer();
String value;
value = transformer.normalizeCanonicalUrl("http://hoge.com/", "a");
assertEquals("http://hoge.com/a", value);
value = transformer.normalizeCanonicalUrl("http://hoge.com/", "aaa");
assertEquals("http://hoge.com/aaa", value);
value = transformer.normalizeCanonicalUrl("http://hoge.com/", "/aaa");
assertEquals("http://hoge.com/aaa", value);
value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb", "aaa");
assertEquals("http://hoge.com/aaa", value);
value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb/", "aaa");
assertEquals("http://hoge.com/bbb/aaa", value);
value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb/", "/aaa");
assertEquals("http://hoge.com/aaa", value);
value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb", "/aaa");
assertEquals("http://hoge.com/aaa", value);
}
}