fix #842 modify _id

This commit is contained in:
Shinsuke Sugaya 2017-01-21 23:36:28 +09:00
parent f11de033fd
commit e9e9f1ae14
2 changed files with 77 additions and 9 deletions

View file

@ -15,6 +15,8 @@
*/
package org.codelibs.fess.helper;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
@ -24,6 +26,7 @@ import java.util.List;
import java.util.Map;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.security.MessageDigestUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.app.service.CrawlingInfoService;
import org.codelibs.fess.crawler.util.UnsafeStringBuilder;
@ -52,7 +55,9 @@ public class CrawlingInfoHelper {
protected Long documentExpires;
public int maxSessionIdsInList;
protected int maxSessionIdsInList;
protected int urlIdPrefixLength = 445;;
protected CrawlingInfoService getCrawlingInfoService() {
return ComponentUtil.getComponent(CrawlingInfoService.class);
@ -186,25 +191,68 @@ public class CrawlingInfoHelper {
});
}
private String generateId(final String url, final List<String> roleTypeList) {
String generateId(final String url, final List<String> roleTypeList) {
final UnsafeStringBuilder buf = new UnsafeStringBuilder(1000);
buf.append(url);
if (roleTypeList != null && !roleTypeList.isEmpty()) {
Collections.sort(roleTypeList);
buf.append(";role=");
for (int i = 0; i < roleTypeList.size(); i++) {
if (i != 0) {
buf.append(',');
buf.append(String.join(",", roleTypeList));
}
final String urlId = buf.toUnsafeString().trim();
StringBuilder encodedBuf = new StringBuilder(urlId.length() + 100);
for (int i = 0; i < urlId.length(); i++) {
char c = urlId.charAt(i);
if (c >= 'a' && c <= 'z' //
|| c >= 'A' && c <= 'Z' //
|| c >= '0' && c <= '9' //
|| c == '.' //
|| c == '-' //
|| c == '*' //
|| c == '_' //
|| c == ':' // added
|| c == '/' // added
|| c == '+' // added
|| c == '%' // added
|| c == '=' // added
|| c == '&' // added
|| c == '?' // added
|| c == '#' // added
|| c == '[' // added
|| c == ']' // added
|| c == '@' // added
|| c == '~' // added
|| c == '!' // added
|| c == '$' // added
|| c == '\'' // added
|| c == '(' // added
|| c == ')' // added
|| c == ',' // added
|| c == ';' // added
) {
encodedBuf.append(c);
} else {
try {
encodedBuf.append(URLEncoder.encode(String.valueOf(c), Constants.UTF_8));
} catch (final UnsupportedEncodingException e) {
// NOP
}
buf.append(roleTypeList.get(i));
}
}
return normalize(buf.toUnsafeString().trim());
final String id = encodedBuf.toString();
if (id.length() <= urlIdPrefixLength) {
return id;
}
return id.substring(0, urlIdPrefixLength) + MessageDigestUtil.digest("SHA-256", id.substring(urlIdPrefixLength));
}
private String normalize(final String value) {
return value.replace('"', ' ');
public void setMaxSessionIdsInList(int maxSessionIdsInList) {
this.maxSessionIdsInList = maxSessionIdsInList;
}
public void setUrlIdPrefixLength(int urlIdPrefixLength) {
this.urlIdPrefixLength = urlIdPrefixLength;
}
}

View file

@ -63,4 +63,24 @@ public class CrawlingInfoHelperTest extends UnitFessTestCase {
assertEquals("http://example.com/;role=admin,guest", crawlingInfoHelper.generateId(dataMap));
}
public void test_generateId_long() {
StringBuilder buf = new StringBuilder(1000);
for (int i = 0; i < 550; i++) {
buf.append('x');
}
assertEquals(440, crawlingInfoHelper.generateId(buf.substring(0, 440), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 450), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 460), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 470), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 480), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 490), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 500), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 510), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.substring(0, 520), null).length());
assertEquals(509, crawlingInfoHelper.generateId(buf.toString(), null).length());
}
}