diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index ce6ad93f2..a655c572c 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -237,7 +237,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. //META[@name='description']/@content */ String CRAWLER_DOCUMENT_HTML_DIGEST_XPATH = "crawler.document.html.digest.xpath"; - /** The key of the configuration. e.g. //LINK[@rel='canonical']/@href */ + /** The key of the configuration. e.g. //LINK[@rel='canonical'][1]/@href */ String CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH = "crawler.document.html.canonical.xpath"; /** The key of the configuration. e.g. noscript,script,style,header,footer,nav,a[rel=nofollow] */ @@ -1924,7 +1924,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** * Get the value for the key 'crawler.document.html.canonical.xpath'.
- * The value is, e.g. //LINK[@rel='canonical']/@href
+ * The value is, e.g. //LINK[@rel='canonical'][1]/@href
* @return The value of found property. (NotNull: if not found, exception but basically no way) */ String getCrawlerDocumentHtmlCanonicalXpath(); @@ -7747,7 +7747,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DIGEST_XPATH, "//META[@name='description']/@content"); - defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH, "//LINK[@rel='canonical']/@href"); + defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CANONICAL_XPATH, "//LINK[@rel='canonical'][1]/@href"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_PRUNED_TAGS, "noscript,script,style,header,footer,nav,a[rel=nofollow]"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_MAX_DIGEST_LENGTH, "120"); defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_DEFAULT_LANG, ""); diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index 96dec7675..3d0e79b3b 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -137,7 +137,7 @@ Title=title:string\n\ crawler.document.html.content.xpath=//BODY crawler.document.html.lang.xpath=//HTML/@lang crawler.document.html.digest.xpath=//META[@name='description']/@content -crawler.document.html.canonical.xpath=//LINK[@rel='canonical']/@href +crawler.document.html.canonical.xpath=//LINK[@rel='canonical'][1]/@href crawler.document.html.pruned.tags=noscript,script,style,header,footer,nav,a[rel=nofollow] crawler.document.html.max.digest.length=120 crawler.document.html.default.lang= diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java index 75759a30f..46d7b5826 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java @@ -582,6 +582,42 @@ public class FessXpathTransformerTest extends UnitFessTestCase { assertEquals("bbb aaa", value); } + public void test_getCanonicalUrl() throws Exception { + final FessXpathTransformer transformer = new FessXpathTransformer() { + @Override + protected Map getConfigPrameterMap(final ResponseData responseData, final ConfigName config) { + return Collections.emptyMap(); + } + }; + transformer.fessConfig = new FessConfig.SimpleImpl() { + private static final long serialVersionUID = 1L; + + public String getCrawlerDocumentHtmlCanonicalXpath() { + return "//LINK[@rel='canonical'][1]/@href"; + }; + }; + + final ResponseData responseData = new ResponseData(); + responseData.setSessionId("test"); + responseData.setUrl("http://example.com/"); + + String data = "aaa"; + Document document = getDocument(data); + String value = transformer.getCanonicalUrl(responseData, document); + assertNull(value); + + data = "aaa"; + document = getDocument(data); + value = transformer.getCanonicalUrl(responseData, document); + assertEquals("http://example.com/", value); + + data = + "aaa"; + document = getDocument(data); + value = transformer.getCanonicalUrl(responseData, document); + assertEquals("http://example1.com/", value); + } + public void test_normalizeCanonicalUrl() throws Exception { final FessXpathTransformer transformer = new FessXpathTransformer(); String value;