瀏覽代碼

fix #842 modify _id

Shinsuke Sugaya 8 年之前
父節點
當前提交
e9e9f1ae14

+ 57 - 9
src/main/java/org/codelibs/fess/helper/CrawlingInfoHelper.java

@@ -15,6 +15,8 @@
  */
 package org.codelibs.fess.helper;
 
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Date;
@@ -24,6 +26,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.codelibs.core.lang.StringUtil;
+import org.codelibs.core.security.MessageDigestUtil;
 import org.codelibs.fess.Constants;
 import org.codelibs.fess.app.service.CrawlingInfoService;
 import org.codelibs.fess.crawler.util.UnsafeStringBuilder;
@@ -52,7 +55,9 @@ public class CrawlingInfoHelper {
 
     protected Long documentExpires;
 
-    public int maxSessionIdsInList;
+    protected int maxSessionIdsInList;
+
+    protected int urlIdPrefixLength = 445;;
 
     protected CrawlingInfoService getCrawlingInfoService() {
         return ComponentUtil.getComponent(CrawlingInfoService.class);
@@ -186,25 +191,68 @@ public class CrawlingInfoHelper {
                 });
     }
 
-    private String generateId(final String url, final List<String> roleTypeList) {
+    String generateId(final String url, final List<String> roleTypeList) {
         final UnsafeStringBuilder buf = new UnsafeStringBuilder(1000);
         buf.append(url);
         if (roleTypeList != null && !roleTypeList.isEmpty()) {
             Collections.sort(roleTypeList);
             buf.append(";role=");
-            for (int i = 0; i < roleTypeList.size(); i++) {
-                if (i != 0) {
-                    buf.append(',');
+            buf.append(String.join(",", roleTypeList));
+        }
+        final String urlId = buf.toUnsafeString().trim();
+        StringBuilder encodedBuf = new StringBuilder(urlId.length() + 100);
+        for (int i = 0; i < urlId.length(); i++) {
+            char c = urlId.charAt(i);
+            if (c >= 'a' && c <= 'z' //
+                    || c >= 'A' && c <= 'Z' //
+                    || c >= '0' && c <= '9' //
+                    || c == '.' //
+                    || c == '-' //
+                    || c == '*' //
+                    || c == '_' //
+                    || c == ':' // added
+                    || c == '/' // added
+                    || c == '+' // added
+                    || c == '%' // added
+                    || c == '=' // added
+                    || c == '&' // added
+                    || c == '?' // added
+                    || c == '#' // added
+                    || c == '[' // added
+                    || c == ']' // added
+                    || c == '@' // added
+                    || c == '~' // added
+                    || c == '!' // added
+                    || c == '$' // added
+                    || c == '\'' // added
+                    || c == '(' // added
+                    || c == ')' // added
+                    || c == ',' // added
+                    || c == ';' // added
+            ) {
+                encodedBuf.append(c);
+            } else {
+                try {
+                    encodedBuf.append(URLEncoder.encode(String.valueOf(c), Constants.UTF_8));
+                } catch (final UnsupportedEncodingException e) {
+                    // NOP
                 }
-                buf.append(roleTypeList.get(i));
             }
         }
 
-        return normalize(buf.toUnsafeString().trim());
+        final String id = encodedBuf.toString();
+        if (id.length() <= urlIdPrefixLength) {
+            return id;
+        }
+        return id.substring(0, urlIdPrefixLength) + MessageDigestUtil.digest("SHA-256", id.substring(urlIdPrefixLength));
+    }
+
+    public void setMaxSessionIdsInList(int maxSessionIdsInList) {
+        this.maxSessionIdsInList = maxSessionIdsInList;
     }
 
-    private String normalize(final String value) {
-        return value.replace('"', ' ');
+    public void setUrlIdPrefixLength(int urlIdPrefixLength) {
+        this.urlIdPrefixLength = urlIdPrefixLength;
     }
 
 }

+ 20 - 0
src/test/java/org/codelibs/fess/helper/CrawlingInfoHelperTest.java

@@ -63,4 +63,24 @@ public class CrawlingInfoHelperTest extends UnitFessTestCase {
 
         assertEquals("http://example.com/;role=admin,guest", crawlingInfoHelper.generateId(dataMap));
     }
+
+    public void test_generateId_long() {
+        StringBuilder buf = new StringBuilder(1000);
+        for (int i = 0; i < 550; i++) {
+            buf.append('x');
+        }
+
+        assertEquals(440, crawlingInfoHelper.generateId(buf.substring(0, 440), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 450), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 460), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 470), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 480), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 490), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 500), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 510), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 520), null).length());
+        assertEquals(509, crawlingInfoHelper.generateId(buf.toString(), null).length());
+
+    }
+
 }