diff --git a/src/main/java/org/codelibs/fess/helper/CrawlingInfoHelper.java b/src/main/java/org/codelibs/fess/helper/CrawlingInfoHelper.java index 94d4cfb4c..9e6fc7b6e 100644 --- a/src/main/java/org/codelibs/fess/helper/CrawlingInfoHelper.java +++ b/src/main/java/org/codelibs/fess/helper/CrawlingInfoHelper.java @@ -15,6 +15,8 @@ */ package org.codelibs.fess.helper; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; import java.util.Date; @@ -24,6 +26,7 @@ import java.util.List; import java.util.Map; import org.codelibs.core.lang.StringUtil; +import org.codelibs.core.security.MessageDigestUtil; import org.codelibs.fess.Constants; import org.codelibs.fess.app.service.CrawlingInfoService; import org.codelibs.fess.crawler.util.UnsafeStringBuilder; @@ -52,7 +55,9 @@ public class CrawlingInfoHelper { protected Long documentExpires; - public int maxSessionIdsInList; + protected int maxSessionIdsInList; + + protected int urlIdPrefixLength = 445;; protected CrawlingInfoService getCrawlingInfoService() { return ComponentUtil.getComponent(CrawlingInfoService.class); @@ -186,25 +191,68 @@ public class CrawlingInfoHelper { }); } - private String generateId(final String url, final List roleTypeList) { + String generateId(final String url, final List roleTypeList) { final UnsafeStringBuilder buf = new UnsafeStringBuilder(1000); buf.append(url); if (roleTypeList != null && !roleTypeList.isEmpty()) { Collections.sort(roleTypeList); buf.append(";role="); - for (int i = 0; i < roleTypeList.size(); i++) { - if (i != 0) { - buf.append(','); + buf.append(String.join(",", roleTypeList)); + } + final String urlId = buf.toUnsafeString().trim(); + StringBuilder encodedBuf = new StringBuilder(urlId.length() + 100); + for (int i = 0; i < urlId.length(); i++) { + char c = urlId.charAt(i); + if (c >= 'a' && c <= 'z' // + || c >= 'A' && c <= 'Z' // + || c >= '0' && c <= '9' // + || c == '.' // + || c == '-' // + || c == '*' // + || c == '_' // + || c == ':' // added + || c == '/' // added + || c == '+' // added + || c == '%' // added + || c == '=' // added + || c == '&' // added + || c == '?' // added + || c == '#' // added + || c == '[' // added + || c == ']' // added + || c == '@' // added + || c == '~' // added + || c == '!' // added + || c == '$' // added + || c == '\'' // added + || c == '(' // added + || c == ')' // added + || c == ',' // added + || c == ';' // added + ) { + encodedBuf.append(c); + } else { + try { + encodedBuf.append(URLEncoder.encode(String.valueOf(c), Constants.UTF_8)); + } catch (final UnsupportedEncodingException e) { + // NOP } - buf.append(roleTypeList.get(i)); } } - return normalize(buf.toUnsafeString().trim()); + final String id = encodedBuf.toString(); + if (id.length() <= urlIdPrefixLength) { + return id; + } + return id.substring(0, urlIdPrefixLength) + MessageDigestUtil.digest("SHA-256", id.substring(urlIdPrefixLength)); } - private String normalize(final String value) { - return value.replace('"', ' '); + public void setMaxSessionIdsInList(int maxSessionIdsInList) { + this.maxSessionIdsInList = maxSessionIdsInList; + } + + public void setUrlIdPrefixLength(int urlIdPrefixLength) { + this.urlIdPrefixLength = urlIdPrefixLength; } } diff --git a/src/test/java/org/codelibs/fess/helper/CrawlingInfoHelperTest.java b/src/test/java/org/codelibs/fess/helper/CrawlingInfoHelperTest.java index 2ac3f85cd..972f3b060 100644 --- a/src/test/java/org/codelibs/fess/helper/CrawlingInfoHelperTest.java +++ b/src/test/java/org/codelibs/fess/helper/CrawlingInfoHelperTest.java @@ -63,4 +63,24 @@ public class CrawlingInfoHelperTest extends UnitFessTestCase { assertEquals("http://example.com/;role=admin,guest", crawlingInfoHelper.generateId(dataMap)); } + + public void test_generateId_long() { + StringBuilder buf = new StringBuilder(1000); + for (int i = 0; i < 550; i++) { + buf.append('x'); + } + + assertEquals(440, crawlingInfoHelper.generateId(buf.substring(0, 440), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 450), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 460), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 470), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 480), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 490), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 500), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 510), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 520), null).length()); + assertEquals(509, crawlingInfoHelper.generateId(buf.toString(), null).length()); + + } + }