fix #723 normalize canonical url
This commit is contained in:
parent
fa9436a573
commit
eb8e5864dd
2 changed files with 40 additions and 2 deletions
|
@ -342,8 +342,20 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
|
||||
protected String getCanonicalUrl(final ResponseData responseData, final Document document) {
|
||||
final String canonicalUrl = getSingleNodeValue(document, fessConfig.getCrawlerDocumentHtmlCannonicalXpath(), false);
|
||||
if (StringUtil.isNotBlank(canonicalUrl)) {
|
||||
return canonicalUrl;
|
||||
if (StringUtil.isBlank(canonicalUrl)) {
|
||||
return null;
|
||||
}
|
||||
if (canonicalUrl.startsWith("/")) {
|
||||
return normalizeCanonicalUrl(responseData.getUrl(), canonicalUrl);
|
||||
}
|
||||
return canonicalUrl;
|
||||
}
|
||||
|
||||
protected String normalizeCanonicalUrl(final String baseUrl, final String canonicalUrl) {
|
||||
try {
|
||||
return new URL(new URL(baseUrl), canonicalUrl).toString();
|
||||
} catch (MalformedURLException e) {
|
||||
logger.warn("Invalid canonical url: " + baseUrl + " : " + canonicalUrl, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -297,4 +297,30 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
|
|||
value = transformer.getSingleNodeValue(document, "//META[@name='keywords']/@content|//BODY", false);
|
||||
assertEquals("bbb aaa", value);
|
||||
}
|
||||
|
||||
public void test_normalizeCanonicalUrl() throws Exception {
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
String value;
|
||||
|
||||
value = transformer.normalizeCanonicalUrl("http://hoge.com/", "a");
|
||||
assertEquals("http://hoge.com/a", value);
|
||||
|
||||
value = transformer.normalizeCanonicalUrl("http://hoge.com/", "aaa");
|
||||
assertEquals("http://hoge.com/aaa", value);
|
||||
|
||||
value = transformer.normalizeCanonicalUrl("http://hoge.com/", "/aaa");
|
||||
assertEquals("http://hoge.com/aaa", value);
|
||||
|
||||
value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb", "aaa");
|
||||
assertEquals("http://hoge.com/aaa", value);
|
||||
|
||||
value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb/", "aaa");
|
||||
assertEquals("http://hoge.com/bbb/aaa", value);
|
||||
|
||||
value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb/", "/aaa");
|
||||
assertEquals("http://hoge.com/aaa", value);
|
||||
|
||||
value = transformer.normalizeCanonicalUrl("http://hoge.com/bbb", "/aaa");
|
||||
assertEquals("http://hoge.com/aaa", value);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue