fix #986 compress sdh

This commit is contained in:
Shinsuke Sugaya 2017-04-06 15:40:48 +09:00
parent c4cc7bcb23
commit df3d66c57f
7 changed files with 138 additions and 22 deletions

View file

@ -104,7 +104,7 @@ public class SearchService {
return SearchConditionBuilder.builder(searchRequestBuilder)
.query(StringUtil.isBlank(sortField) ? query : query + " sort:" + sortField).offset(pageStart)
.size(pageSize).facetInfo(params.getFacetInfo()).geoInfo(params.getGeoInfo())
.similarHash(params.getSimilarDocHash()).responseFields(queryHelper.getResponseFields())
.similarDocHash(params.getSimilarDocHash()).responseFields(queryHelper.getResponseFields())
.searchRequestType(params.getType()).build();
}, (searchRequestBuilder, execTime, searchResponse) -> {
final QueryResponseList queryResponseList = ComponentUtil.getQueryResponseList();

View file

@ -56,6 +56,7 @@ import org.codelibs.fess.exception.FessSystemException;
import org.codelibs.fess.exception.InvalidQueryException;
import org.codelibs.fess.exception.ResultOffsetExceededException;
import org.codelibs.fess.exception.SearchQueryException;
import org.codelibs.fess.helper.DocumentHelper;
import org.codelibs.fess.helper.QueryHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
@ -803,7 +804,7 @@ public class FessEsClient implements Client {
private int size = Constants.DEFAULT_PAGE_SIZE;
private GeoInfo geoInfo;
private FacetInfo facetInfo;
private String similarHash;
private String similarDocHash;
private SearchRequestType searchRequestType = SearchRequestType.SEARCH;
public static SearchConditionBuilder builder(final SearchRequestBuilder searchRequestBuilder) {
@ -844,9 +845,9 @@ public class FessEsClient implements Client {
return this;
}
public SearchConditionBuilder similarHash(final String similarHash) {
if (StringUtil.isNotBlank(similarHash)) {
this.similarHash = similarHash;
public SearchConditionBuilder similarDocHash(final String similarDocHash) {
if (StringUtil.isNotBlank(similarDocHash)) {
this.similarDocHash = similarDocHash;
}
return this;
}
@ -868,21 +869,24 @@ public class FessEsClient implements Client {
throw new ResultOffsetExceededException("The number of result size is exceeded.");
}
final QueryContext queryContext = queryHelper.build(searchRequestType, query, context -> {
if (SearchRequestType.ADMIN_SEARCH.equals(searchRequestType)) {
context.skipRoleQuery();
} else if (similarHash != null) {
context.addQuery(boolQuery -> {
boolQuery.filter(QueryBuilders.termQuery(fessConfig.getIndexFieldContentMinhashBits(), similarHash));
});
}
final QueryContext queryContext =
queryHelper.build(searchRequestType, query, context -> {
if (SearchRequestType.ADMIN_SEARCH.equals(searchRequestType)) {
context.skipRoleQuery();
} else if (similarDocHash != null) {
final DocumentHelper documentHelper = ComponentUtil.getDocumentHelper();
context.addQuery(boolQuery -> {
boolQuery.filter(QueryBuilders.termQuery(fessConfig.getIndexFieldContentMinhashBits(),
documentHelper.decodeSimilarDocHash(similarDocHash)));
});
}
if (geoInfo != null && geoInfo.toQueryBuilder() != null) {
context.addQuery(boolQuery -> {
boolQuery.filter(geoInfo.toQueryBuilder());
if (geoInfo != null && geoInfo.toQueryBuilder() != null) {
context.addQuery(boolQuery -> {
boolQuery.filter(geoInfo.toQueryBuilder());
});
}
});
}
});
searchRequestBuilder.setFrom(offset).setSize(size);
@ -939,7 +943,7 @@ public class FessEsClient implements Client {
}));
}
if (!SearchRequestType.ADMIN_SEARCH.equals(searchRequestType) && fessConfig.isResultCollapsed() && similarHash == null) {
if (!SearchRequestType.ADMIN_SEARCH.equals(searchRequestType) && fessConfig.isResultCollapsed() && similarDocHash == null) {
searchRequestBuilder.setCollapse(getCollapseBuilder(fessConfig));
}

View file

@ -15,16 +15,26 @@
*/
package org.codelibs.fess.helper;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Base64;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.lang3.StringUtils;
import org.codelibs.core.io.ReaderUtil;
import org.codelibs.core.io.SerializeUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
import org.codelibs.fess.crawler.client.CrawlerClient;
import org.codelibs.fess.crawler.client.CrawlerClientFactory;
@ -44,8 +54,14 @@ import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.lastaflute.di.core.SingletonLaContainer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DocumentHelper {
private static final Logger logger = LoggerFactory.getLogger(DocumentHelper.class);
private static final String SIMILAR_DOC_HASH_PREFIX = "$";
public String getContent(final ResponseData responseData, final String content, final Map<String, Object> dataMap) {
if (content == null) {
return StringUtil.EMPTY; // empty
@ -157,4 +173,31 @@ public class DocumentHelper {
}
}
public String decodeSimilarDocHash(String hash) {
if (hash != null && hash.startsWith(SIMILAR_DOC_HASH_PREFIX) && hash.length() > SIMILAR_DOC_HASH_PREFIX.length()) {
byte[] decode = Base64.getUrlDecoder().decode(hash.substring(SIMILAR_DOC_HASH_PREFIX.length()));
try (BufferedReader reader =
new BufferedReader(new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(decode)), Constants.UTF_8))) {
return ReaderUtil.readText(reader);
} catch (IOException e) {
logger.warn("Failed to decode " + hash, e);
}
}
return hash;
}
public String encodeSimilarDocHash(String hash) {
if (hash != null && !hash.startsWith(SIMILAR_DOC_HASH_PREFIX)) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
try (GZIPOutputStream gos = new GZIPOutputStream(baos)) {
gos.write(hash.getBytes(Constants.UTF_8));
}
return SIMILAR_DOC_HASH_PREFIX + Base64.getUrlEncoder().withoutPadding().encodeToString(baos.toByteArray());
} catch (IOException e) {
logger.warn("Failed to encode " + hash, e);
}
}
return hash;
}
}

View file

@ -287,4 +287,11 @@ public class FessFunctions {
}
return LaResponseUtil.getResponse().encodeURL(sb.toString());
}
public static String sdh(final String input) {
if (StringUtil.isBlank(input)) {
return input;
}
return ComponentUtil.getDocumentHelper().encodeSimilarDocHash(input);
}
}

View file

@ -210,4 +210,12 @@
<function-signature>java.lang.String url(java.lang.String)</function-signature>
<example>&lt;a href="${fe:url(param:info)}" ...</example>
</function>
<function>
<description>Encode Similar Document Hash.</description>
<name>sdh</name>
<function-class>org.codelibs.fess.taglib.FessFunctions</function-class>
<function-signature>java.lang.String sdh(java.lang.String)</function-signature>
<example>${fe:sdh(doc.similar_docs_hash)}</example>
</function>
</taglib>

View file

@ -58,7 +58,7 @@
</c:if>
<c:if test="${doc.similar_docs_count!=null&&doc.similar_docs_count>1}">
<small class="hidden-md-down"> <la:link
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(doc.similar_docs_hash)}${fe:facetQuery()}${fe:geoQuery()}">
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(doc.similar_docs_hash))}${fe:facetQuery()}${fe:geoQuery()}">
<la:message key="labels.search_result_similar"
arg0="${fe:formatNumber(doc.similar_docs_count-1)}" />
</la:link>
@ -138,7 +138,7 @@
<c:if
test="${countEntry.value != 0 && fe:labelexists(countEntry.key)}">
<li class="list-group-item"><la:link
href="/search?q=${f:u(q)}&ex_q=label%3a${f:u(countEntry.key)}&sdh=${f:u(sh)}${fe:pagingQuery(null)}${fe:facetQuery()}${fe:geoQuery()}">
href="/search?q=${f:u(q)}&ex_q=label%3a${f:u(countEntry.key)}&sdh=${f:u(fe:sdh(sh))}${fe:pagingQuery(null)}${fe:facetQuery()}${fe:geoQuery()}">
${f:h(fe:label(countEntry.key))}
<span class="label label-default label-pill pull-right">${f:h(countEntry.value)}</span>
</la:link></li>
@ -155,7 +155,7 @@
<c:forEach var="queryEntry" items="${facetQueryView.queryMap}">
<c:if test="${facetResponse.queryCountMap[queryEntry.value] != 0}">
<li class="list-group-item p-l-md"><la:link
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(sh)}${fe:pagingQuery(queryEntry.value)}${fe:facetQuery()}${fe:geoQuery()}">
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(sdh))}${fe:pagingQuery(queryEntry.value)}${fe:facetQuery()}${fe:geoQuery()}">
<la:message key="${queryEntry.key}" />
<span class="label label-default label-pill pull-right">${f:h(facetResponse.queryCountMap[queryEntry.value])}</span>
</la:link></li>

View file

@ -120,4 +120,58 @@ public class DocumentHelperTest extends UnitFessTestCase {
assertEquals("1234567...", documentHelper.getDigest(responseData, " 1234567890 1234567890 1234567890 ", dataMap, 10));
assertEquals("...", documentHelper.getDigest(responseData, "", dataMap, 10));
}
public void test_encodeSimilarDocHash() {
DocumentHelper documentHelper = new DocumentHelper();
String hash = "01010101010101010101010101010101";
String value = "$H4sIAAAAAAAAADMwNMALAXC7sg0gAAAA";
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
hash = "00101010010010100100101010001010";
value = "$H4sIAAAAAAAAADMwMARDCELQQApMAgAi5-3LIAAAAA";
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
hash = "0001010100100101001001010100010100101010010010100100101011000100";
value = "$H4sIAAAAAAAAADMwMDAEQwhC0EAKxscqDZE3AABZOHx2QAAAAA";
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
hash =
"00100101010001010100100101001001010100010100010101010010010101010010101100010101001000010101001001010001000001010010101001001010";
value = "$H4sIAAAAAAAAADMwMDQAIhCE01ARdBkYD1kGxkDVjWESRBBJPVwKAHL5QrqAAAAA";
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
hash = "";
value = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
hash = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
value = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
assertNull(documentHelper.encodeSimilarDocHash(null));
}
public void test_decodeSimilarDocHash() {
DocumentHelper documentHelper = new DocumentHelper();
String hash = "01010101010101010101010101010101";
String value = "$H4sIAAAAAAAAADMwNMALAXC7sg0gAAAA";
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
hash = "00101010010010100100101010001010";
value = "$H4sIAAAAAAAAADMwMARDCELQQApMAgAi5-3LIAAAAA";
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
hash = "0001010100100101001001010100010100101010010010100100101011000100";
value = "$H4sIAAAAAAAAADMwMDAEQwhC0EAKxscqDZE3AABZOHx2QAAAAA";
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
hash =
"00100101010001010100100101001001010100010100010101010010010101010010101100010101001000010101001001010001000001010010101001001010";
value = "$H4sIAAAAAAAAADMwMDQAIhCE01ARdBkYD1kGxkDVjWESRBBJPVwKAHL5QrqAAAAA";
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
hash = "01010101010101010101010101010101";
value = "01010101010101010101010101010101";
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
hash = "";
value = "";
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
assertNull(documentHelper.decodeSimilarDocHash(null));
}
}