fix #986 compress sdh
This commit is contained in:
parent
c4cc7bcb23
commit
df3d66c57f
7 changed files with 138 additions and 22 deletions
|
@ -104,7 +104,7 @@ public class SearchService {
|
|||
return SearchConditionBuilder.builder(searchRequestBuilder)
|
||||
.query(StringUtil.isBlank(sortField) ? query : query + " sort:" + sortField).offset(pageStart)
|
||||
.size(pageSize).facetInfo(params.getFacetInfo()).geoInfo(params.getGeoInfo())
|
||||
.similarHash(params.getSimilarDocHash()).responseFields(queryHelper.getResponseFields())
|
||||
.similarDocHash(params.getSimilarDocHash()).responseFields(queryHelper.getResponseFields())
|
||||
.searchRequestType(params.getType()).build();
|
||||
}, (searchRequestBuilder, execTime, searchResponse) -> {
|
||||
final QueryResponseList queryResponseList = ComponentUtil.getQueryResponseList();
|
||||
|
|
|
@ -56,6 +56,7 @@ import org.codelibs.fess.exception.FessSystemException;
|
|||
import org.codelibs.fess.exception.InvalidQueryException;
|
||||
import org.codelibs.fess.exception.ResultOffsetExceededException;
|
||||
import org.codelibs.fess.exception.SearchQueryException;
|
||||
import org.codelibs.fess.helper.DocumentHelper;
|
||||
import org.codelibs.fess.helper.QueryHelper;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
|
@ -803,7 +804,7 @@ public class FessEsClient implements Client {
|
|||
private int size = Constants.DEFAULT_PAGE_SIZE;
|
||||
private GeoInfo geoInfo;
|
||||
private FacetInfo facetInfo;
|
||||
private String similarHash;
|
||||
private String similarDocHash;
|
||||
private SearchRequestType searchRequestType = SearchRequestType.SEARCH;
|
||||
|
||||
public static SearchConditionBuilder builder(final SearchRequestBuilder searchRequestBuilder) {
|
||||
|
@ -844,9 +845,9 @@ public class FessEsClient implements Client {
|
|||
return this;
|
||||
}
|
||||
|
||||
public SearchConditionBuilder similarHash(final String similarHash) {
|
||||
if (StringUtil.isNotBlank(similarHash)) {
|
||||
this.similarHash = similarHash;
|
||||
public SearchConditionBuilder similarDocHash(final String similarDocHash) {
|
||||
if (StringUtil.isNotBlank(similarDocHash)) {
|
||||
this.similarDocHash = similarDocHash;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
@ -868,21 +869,24 @@ public class FessEsClient implements Client {
|
|||
throw new ResultOffsetExceededException("The number of result size is exceeded.");
|
||||
}
|
||||
|
||||
final QueryContext queryContext = queryHelper.build(searchRequestType, query, context -> {
|
||||
if (SearchRequestType.ADMIN_SEARCH.equals(searchRequestType)) {
|
||||
context.skipRoleQuery();
|
||||
} else if (similarHash != null) {
|
||||
context.addQuery(boolQuery -> {
|
||||
boolQuery.filter(QueryBuilders.termQuery(fessConfig.getIndexFieldContentMinhashBits(), similarHash));
|
||||
});
|
||||
}
|
||||
final QueryContext queryContext =
|
||||
queryHelper.build(searchRequestType, query, context -> {
|
||||
if (SearchRequestType.ADMIN_SEARCH.equals(searchRequestType)) {
|
||||
context.skipRoleQuery();
|
||||
} else if (similarDocHash != null) {
|
||||
final DocumentHelper documentHelper = ComponentUtil.getDocumentHelper();
|
||||
context.addQuery(boolQuery -> {
|
||||
boolQuery.filter(QueryBuilders.termQuery(fessConfig.getIndexFieldContentMinhashBits(),
|
||||
documentHelper.decodeSimilarDocHash(similarDocHash)));
|
||||
});
|
||||
}
|
||||
|
||||
if (geoInfo != null && geoInfo.toQueryBuilder() != null) {
|
||||
context.addQuery(boolQuery -> {
|
||||
boolQuery.filter(geoInfo.toQueryBuilder());
|
||||
if (geoInfo != null && geoInfo.toQueryBuilder() != null) {
|
||||
context.addQuery(boolQuery -> {
|
||||
boolQuery.filter(geoInfo.toQueryBuilder());
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
searchRequestBuilder.setFrom(offset).setSize(size);
|
||||
|
||||
|
@ -939,7 +943,7 @@ public class FessEsClient implements Client {
|
|||
}));
|
||||
}
|
||||
|
||||
if (!SearchRequestType.ADMIN_SEARCH.equals(searchRequestType) && fessConfig.isResultCollapsed() && similarHash == null) {
|
||||
if (!SearchRequestType.ADMIN_SEARCH.equals(searchRequestType) && fessConfig.isResultCollapsed() && similarDocHash == null) {
|
||||
searchRequestBuilder.setCollapse(getCollapseBuilder(fessConfig));
|
||||
}
|
||||
|
||||
|
|
|
@ -15,16 +15,26 @@
|
|||
*/
|
||||
package org.codelibs.fess.helper;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Base64;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.codelibs.core.io.ReaderUtil;
|
||||
import org.codelibs.core.io.SerializeUtil;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.fess.Constants;
|
||||
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
|
||||
import org.codelibs.fess.crawler.client.CrawlerClient;
|
||||
import org.codelibs.fess.crawler.client.CrawlerClientFactory;
|
||||
|
@ -44,8 +54,14 @@ import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
|||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.lastaflute.di.core.SingletonLaContainer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class DocumentHelper {
|
||||
private static final Logger logger = LoggerFactory.getLogger(DocumentHelper.class);
|
||||
|
||||
private static final String SIMILAR_DOC_HASH_PREFIX = "$";
|
||||
|
||||
public String getContent(final ResponseData responseData, final String content, final Map<String, Object> dataMap) {
|
||||
if (content == null) {
|
||||
return StringUtil.EMPTY; // empty
|
||||
|
@ -157,4 +173,31 @@ public class DocumentHelper {
|
|||
}
|
||||
}
|
||||
|
||||
public String decodeSimilarDocHash(String hash) {
|
||||
if (hash != null && hash.startsWith(SIMILAR_DOC_HASH_PREFIX) && hash.length() > SIMILAR_DOC_HASH_PREFIX.length()) {
|
||||
byte[] decode = Base64.getUrlDecoder().decode(hash.substring(SIMILAR_DOC_HASH_PREFIX.length()));
|
||||
try (BufferedReader reader =
|
||||
new BufferedReader(new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(decode)), Constants.UTF_8))) {
|
||||
return ReaderUtil.readText(reader);
|
||||
} catch (IOException e) {
|
||||
logger.warn("Failed to decode " + hash, e);
|
||||
}
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
public String encodeSimilarDocHash(String hash) {
|
||||
if (hash != null && !hash.startsWith(SIMILAR_DOC_HASH_PREFIX)) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
try (GZIPOutputStream gos = new GZIPOutputStream(baos)) {
|
||||
gos.write(hash.getBytes(Constants.UTF_8));
|
||||
}
|
||||
return SIMILAR_DOC_HASH_PREFIX + Base64.getUrlEncoder().withoutPadding().encodeToString(baos.toByteArray());
|
||||
} catch (IOException e) {
|
||||
logger.warn("Failed to encode " + hash, e);
|
||||
}
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -287,4 +287,11 @@ public class FessFunctions {
|
|||
}
|
||||
return LaResponseUtil.getResponse().encodeURL(sb.toString());
|
||||
}
|
||||
|
||||
public static String sdh(final String input) {
|
||||
if (StringUtil.isBlank(input)) {
|
||||
return input;
|
||||
}
|
||||
return ComponentUtil.getDocumentHelper().encodeSimilarDocHash(input);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -210,4 +210,12 @@
|
|||
<function-signature>java.lang.String url(java.lang.String)</function-signature>
|
||||
<example><a href="${fe:url(param:info)}" ...</example>
|
||||
</function>
|
||||
|
||||
<function>
|
||||
<description>Encode Similar Document Hash.</description>
|
||||
<name>sdh</name>
|
||||
<function-class>org.codelibs.fess.taglib.FessFunctions</function-class>
|
||||
<function-signature>java.lang.String sdh(java.lang.String)</function-signature>
|
||||
<example>${fe:sdh(doc.similar_docs_hash)}</example>
|
||||
</function>
|
||||
</taglib>
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
</c:if>
|
||||
<c:if test="${doc.similar_docs_count!=null&&doc.similar_docs_count>1}">
|
||||
<small class="hidden-md-down"> <la:link
|
||||
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(doc.similar_docs_hash)}${fe:facetQuery()}${fe:geoQuery()}">
|
||||
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(doc.similar_docs_hash))}${fe:facetQuery()}${fe:geoQuery()}">
|
||||
<la:message key="labels.search_result_similar"
|
||||
arg0="${fe:formatNumber(doc.similar_docs_count-1)}" />
|
||||
</la:link>
|
||||
|
@ -138,7 +138,7 @@
|
|||
<c:if
|
||||
test="${countEntry.value != 0 && fe:labelexists(countEntry.key)}">
|
||||
<li class="list-group-item"><la:link
|
||||
href="/search?q=${f:u(q)}&ex_q=label%3a${f:u(countEntry.key)}&sdh=${f:u(sh)}${fe:pagingQuery(null)}${fe:facetQuery()}${fe:geoQuery()}">
|
||||
href="/search?q=${f:u(q)}&ex_q=label%3a${f:u(countEntry.key)}&sdh=${f:u(fe:sdh(sh))}${fe:pagingQuery(null)}${fe:facetQuery()}${fe:geoQuery()}">
|
||||
${f:h(fe:label(countEntry.key))}
|
||||
<span class="label label-default label-pill pull-right">${f:h(countEntry.value)}</span>
|
||||
</la:link></li>
|
||||
|
@ -155,7 +155,7 @@
|
|||
<c:forEach var="queryEntry" items="${facetQueryView.queryMap}">
|
||||
<c:if test="${facetResponse.queryCountMap[queryEntry.value] != 0}">
|
||||
<li class="list-group-item p-l-md"><la:link
|
||||
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(sh)}${fe:pagingQuery(queryEntry.value)}${fe:facetQuery()}${fe:geoQuery()}">
|
||||
href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(sdh))}${fe:pagingQuery(queryEntry.value)}${fe:facetQuery()}${fe:geoQuery()}">
|
||||
<la:message key="${queryEntry.key}" />
|
||||
<span class="label label-default label-pill pull-right">${f:h(facetResponse.queryCountMap[queryEntry.value])}</span>
|
||||
</la:link></li>
|
||||
|
|
|
@ -120,4 +120,58 @@ public class DocumentHelperTest extends UnitFessTestCase {
|
|||
assertEquals("1234567...", documentHelper.getDigest(responseData, " 1234567890 1234567890 1234567890 ", dataMap, 10));
|
||||
assertEquals("1234567...", documentHelper.getDigest(responseData, "12345678901234567890", dataMap, 10));
|
||||
}
|
||||
|
||||
public void test_encodeSimilarDocHash() {
|
||||
DocumentHelper documentHelper = new DocumentHelper();
|
||||
|
||||
String hash = "01010101010101010101010101010101";
|
||||
String value = "$H4sIAAAAAAAAADMwNMALAXC7sg0gAAAA";
|
||||
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
|
||||
hash = "00101010010010100100101010001010";
|
||||
value = "$H4sIAAAAAAAAADMwMARDCELQQApMAgAi5-3LIAAAAA";
|
||||
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
|
||||
hash = "0001010100100101001001010100010100101010010010100100101011000100";
|
||||
value = "$H4sIAAAAAAAAADMwMDAEQwhC0EAKxscqDZE3AABZOHx2QAAAAA";
|
||||
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
|
||||
hash =
|
||||
"00100101010001010100100101001001010100010100010101010010010101010010101100010101001000010101001001010001000001010010101001001010";
|
||||
value = "$H4sIAAAAAAAAADMwMDQAIhCE01ARdBkYD1kGxkDVjWESRBBJPVwKAHL5QrqAAAAA";
|
||||
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
|
||||
|
||||
hash = "";
|
||||
value = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
|
||||
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
|
||||
hash = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
|
||||
value = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
|
||||
assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
|
||||
|
||||
assertNull(documentHelper.encodeSimilarDocHash(null));
|
||||
}
|
||||
|
||||
public void test_decodeSimilarDocHash() {
|
||||
DocumentHelper documentHelper = new DocumentHelper();
|
||||
|
||||
String hash = "01010101010101010101010101010101";
|
||||
String value = "$H4sIAAAAAAAAADMwNMALAXC7sg0gAAAA";
|
||||
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
|
||||
hash = "00101010010010100100101010001010";
|
||||
value = "$H4sIAAAAAAAAADMwMARDCELQQApMAgAi5-3LIAAAAA";
|
||||
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
|
||||
hash = "0001010100100101001001010100010100101010010010100100101011000100";
|
||||
value = "$H4sIAAAAAAAAADMwMDAEQwhC0EAKxscqDZE3AABZOHx2QAAAAA";
|
||||
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
|
||||
hash =
|
||||
"00100101010001010100100101001001010100010100010101010010010101010010101100010101001000010101001001010001000001010010101001001010";
|
||||
value = "$H4sIAAAAAAAAADMwMDQAIhCE01ARdBkYD1kGxkDVjWESRBBJPVwKAHL5QrqAAAAA";
|
||||
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
|
||||
|
||||
hash = "01010101010101010101010101010101";
|
||||
value = "01010101010101010101010101010101";
|
||||
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
|
||||
hash = "";
|
||||
value = "";
|
||||
assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
|
||||
|
||||
assertNull(documentHelper.decodeSimilarDocHash(null));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue