diff --git a/src/main/java/org/codelibs/fess/app/service/SearchService.java b/src/main/java/org/codelibs/fess/app/service/SearchService.java index 7b858b0c5..9856e7f7e 100644 --- a/src/main/java/org/codelibs/fess/app/service/SearchService.java +++ b/src/main/java/org/codelibs/fess/app/service/SearchService.java @@ -104,7 +104,7 @@ public class SearchService { return SearchConditionBuilder.builder(searchRequestBuilder) .query(StringUtil.isBlank(sortField) ? query : query + " sort:" + sortField).offset(pageStart) .size(pageSize).facetInfo(params.getFacetInfo()).geoInfo(params.getGeoInfo()) - .similarHash(params.getSimilarDocHash()).responseFields(queryHelper.getResponseFields()) + .similarDocHash(params.getSimilarDocHash()).responseFields(queryHelper.getResponseFields()) .searchRequestType(params.getType()).build(); }, (searchRequestBuilder, execTime, searchResponse) -> { final QueryResponseList queryResponseList = ComponentUtil.getQueryResponseList(); diff --git a/src/main/java/org/codelibs/fess/es/client/FessEsClient.java b/src/main/java/org/codelibs/fess/es/client/FessEsClient.java index 73bd1a33f..662d27d2c 100644 --- a/src/main/java/org/codelibs/fess/es/client/FessEsClient.java +++ b/src/main/java/org/codelibs/fess/es/client/FessEsClient.java @@ -56,6 +56,7 @@ import org.codelibs.fess.exception.FessSystemException; import org.codelibs.fess.exception.InvalidQueryException; import org.codelibs.fess.exception.ResultOffsetExceededException; import org.codelibs.fess.exception.SearchQueryException; +import org.codelibs.fess.helper.DocumentHelper; import org.codelibs.fess.helper.QueryHelper; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; @@ -803,7 +804,7 @@ public class FessEsClient implements Client { private int size = Constants.DEFAULT_PAGE_SIZE; private GeoInfo geoInfo; private FacetInfo facetInfo; - private String similarHash; + private String similarDocHash; private SearchRequestType searchRequestType = SearchRequestType.SEARCH; public static SearchConditionBuilder builder(final SearchRequestBuilder searchRequestBuilder) { @@ -844,9 +845,9 @@ public class FessEsClient implements Client { return this; } - public SearchConditionBuilder similarHash(final String similarHash) { - if (StringUtil.isNotBlank(similarHash)) { - this.similarHash = similarHash; + public SearchConditionBuilder similarDocHash(final String similarDocHash) { + if (StringUtil.isNotBlank(similarDocHash)) { + this.similarDocHash = similarDocHash; } return this; } @@ -868,21 +869,24 @@ public class FessEsClient implements Client { throw new ResultOffsetExceededException("The number of result size is exceeded."); } - final QueryContext queryContext = queryHelper.build(searchRequestType, query, context -> { - if (SearchRequestType.ADMIN_SEARCH.equals(searchRequestType)) { - context.skipRoleQuery(); - } else if (similarHash != null) { - context.addQuery(boolQuery -> { - boolQuery.filter(QueryBuilders.termQuery(fessConfig.getIndexFieldContentMinhashBits(), similarHash)); - }); - } + final QueryContext queryContext = + queryHelper.build(searchRequestType, query, context -> { + if (SearchRequestType.ADMIN_SEARCH.equals(searchRequestType)) { + context.skipRoleQuery(); + } else if (similarDocHash != null) { + final DocumentHelper documentHelper = ComponentUtil.getDocumentHelper(); + context.addQuery(boolQuery -> { + boolQuery.filter(QueryBuilders.termQuery(fessConfig.getIndexFieldContentMinhashBits(), + documentHelper.decodeSimilarDocHash(similarDocHash))); + }); + } - if (geoInfo != null && geoInfo.toQueryBuilder() != null) { - context.addQuery(boolQuery -> { - boolQuery.filter(geoInfo.toQueryBuilder()); + if (geoInfo != null && geoInfo.toQueryBuilder() != null) { + context.addQuery(boolQuery -> { + boolQuery.filter(geoInfo.toQueryBuilder()); + }); + } }); - } - }); searchRequestBuilder.setFrom(offset).setSize(size); @@ -939,7 +943,7 @@ public class FessEsClient implements Client { })); } - if (!SearchRequestType.ADMIN_SEARCH.equals(searchRequestType) && fessConfig.isResultCollapsed() && similarHash == null) { + if (!SearchRequestType.ADMIN_SEARCH.equals(searchRequestType) && fessConfig.isResultCollapsed() && similarDocHash == null) { searchRequestBuilder.setCollapse(getCollapseBuilder(fessConfig)); } diff --git a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java index b8c8ffb86..3b0fc1222 100644 --- a/src/main/java/org/codelibs/fess/helper/DocumentHelper.java +++ b/src/main/java/org/codelibs/fess/helper/DocumentHelper.java @@ -15,16 +15,26 @@ */ package org.codelibs.fess.helper; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; +import java.io.UnsupportedEncodingException; +import java.util.Base64; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; import org.apache.commons.lang3.StringUtils; +import org.codelibs.core.io.ReaderUtil; import org.codelibs.core.io.SerializeUtil; import org.codelibs.core.lang.StringUtil; +import org.codelibs.fess.Constants; import org.codelibs.fess.crawler.builder.RequestDataBuilder; import org.codelibs.fess.crawler.client.CrawlerClient; import org.codelibs.fess.crawler.client.CrawlerClientFactory; @@ -44,8 +54,14 @@ import org.codelibs.fess.es.config.exentity.CrawlingConfig; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.lastaflute.di.core.SingletonLaContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class DocumentHelper { + private static final Logger logger = LoggerFactory.getLogger(DocumentHelper.class); + + private static final String SIMILAR_DOC_HASH_PREFIX = "$"; + public String getContent(final ResponseData responseData, final String content, final Map dataMap) { if (content == null) { return StringUtil.EMPTY; // empty @@ -157,4 +173,31 @@ public class DocumentHelper { } } + public String decodeSimilarDocHash(String hash) { + if (hash != null && hash.startsWith(SIMILAR_DOC_HASH_PREFIX) && hash.length() > SIMILAR_DOC_HASH_PREFIX.length()) { + byte[] decode = Base64.getUrlDecoder().decode(hash.substring(SIMILAR_DOC_HASH_PREFIX.length())); + try (BufferedReader reader = + new BufferedReader(new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(decode)), Constants.UTF_8))) { + return ReaderUtil.readText(reader); + } catch (IOException e) { + logger.warn("Failed to decode " + hash, e); + } + } + return hash; + } + + public String encodeSimilarDocHash(String hash) { + if (hash != null && !hash.startsWith(SIMILAR_DOC_HASH_PREFIX)) { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + try (GZIPOutputStream gos = new GZIPOutputStream(baos)) { + gos.write(hash.getBytes(Constants.UTF_8)); + } + return SIMILAR_DOC_HASH_PREFIX + Base64.getUrlEncoder().withoutPadding().encodeToString(baos.toByteArray()); + } catch (IOException e) { + logger.warn("Failed to encode " + hash, e); + } + } + return hash; + } + } diff --git a/src/main/java/org/codelibs/fess/taglib/FessFunctions.java b/src/main/java/org/codelibs/fess/taglib/FessFunctions.java index fea4cb1c7..b93e9fe22 100644 --- a/src/main/java/org/codelibs/fess/taglib/FessFunctions.java +++ b/src/main/java/org/codelibs/fess/taglib/FessFunctions.java @@ -287,4 +287,11 @@ public class FessFunctions { } return LaResponseUtil.getResponse().encodeURL(sb.toString()); } + + public static String sdh(final String input) { + if (StringUtil.isBlank(input)) { + return input; + } + return ComponentUtil.getDocumentHelper().encodeSimilarDocHash(input); + } } diff --git a/src/main/webapp/WEB-INF/fe.tld b/src/main/webapp/WEB-INF/fe.tld index 86aef0b40..40fa9aa2f 100644 --- a/src/main/webapp/WEB-INF/fe.tld +++ b/src/main/webapp/WEB-INF/fe.tld @@ -210,4 +210,12 @@ java.lang.String url(java.lang.String) <a href="${fe:url(param:info)}" ... + + + Encode Similar Document Hash. + sdh + org.codelibs.fess.taglib.FessFunctions + java.lang.String sdh(java.lang.String) + ${fe:sdh(doc.similar_docs_hash)} + diff --git a/src/main/webapp/WEB-INF/view/searchResults.jsp b/src/main/webapp/WEB-INF/view/searchResults.jsp index 773dbde22..9eed77d29 100644 --- a/src/main/webapp/WEB-INF/view/searchResults.jsp +++ b/src/main/webapp/WEB-INF/view/searchResults.jsp @@ -58,7 +58,7 @@ + href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(doc.similar_docs_hash))}${fe:facetQuery()}${fe:geoQuery()}"> @@ -138,7 +138,7 @@
  • + href="/search?q=${f:u(q)}&ex_q=label%3a${f:u(countEntry.key)}&sdh=${f:u(fe:sdh(sh))}${fe:pagingQuery(null)}${fe:facetQuery()}${fe:geoQuery()}"> ${f:h(fe:label(countEntry.key))} ${f:h(countEntry.value)}
  • @@ -155,7 +155,7 @@
  • + href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(sdh))}${fe:pagingQuery(queryEntry.value)}${fe:facetQuery()}${fe:geoQuery()}"> ${f:h(facetResponse.queryCountMap[queryEntry.value])}
  • diff --git a/src/test/java/org/codelibs/fess/helper/DocumentHelperTest.java b/src/test/java/org/codelibs/fess/helper/DocumentHelperTest.java index da6a1e30a..761e4415b 100644 --- a/src/test/java/org/codelibs/fess/helper/DocumentHelperTest.java +++ b/src/test/java/org/codelibs/fess/helper/DocumentHelperTest.java @@ -120,4 +120,58 @@ public class DocumentHelperTest extends UnitFessTestCase { assertEquals("1234567...", documentHelper.getDigest(responseData, " 1234567890 1234567890 1234567890 ", dataMap, 10)); assertEquals("1234567...", documentHelper.getDigest(responseData, "12345678901234567890", dataMap, 10)); } + + public void test_encodeSimilarDocHash() { + DocumentHelper documentHelper = new DocumentHelper(); + + String hash = "01010101010101010101010101010101"; + String value = "$H4sIAAAAAAAAADMwNMALAXC7sg0gAAAA"; + assertEquals(value, documentHelper.encodeSimilarDocHash(hash)); + hash = "00101010010010100100101010001010"; + value = "$H4sIAAAAAAAAADMwMARDCELQQApMAgAi5-3LIAAAAA"; + assertEquals(value, documentHelper.encodeSimilarDocHash(hash)); + hash = "0001010100100101001001010100010100101010010010100100101011000100"; + value = "$H4sIAAAAAAAAADMwMDAEQwhC0EAKxscqDZE3AABZOHx2QAAAAA"; + assertEquals(value, documentHelper.encodeSimilarDocHash(hash)); + hash = + "00100101010001010100100101001001010100010100010101010010010101010010101100010101001000010101001001010001000001010010101001001010"; + value = "$H4sIAAAAAAAAADMwMDQAIhCE01ARdBkYD1kGxkDVjWESRBBJPVwKAHL5QrqAAAAA"; + assertEquals(value, documentHelper.encodeSimilarDocHash(hash)); + + hash = ""; + value = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA"; + assertEquals(value, documentHelper.encodeSimilarDocHash(hash)); + hash = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA"; + value = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA"; + assertEquals(value, documentHelper.encodeSimilarDocHash(hash)); + + assertNull(documentHelper.encodeSimilarDocHash(null)); + } + + public void test_decodeSimilarDocHash() { + DocumentHelper documentHelper = new DocumentHelper(); + + String hash = "01010101010101010101010101010101"; + String value = "$H4sIAAAAAAAAADMwNMALAXC7sg0gAAAA"; + assertEquals(hash, documentHelper.decodeSimilarDocHash(value)); + hash = "00101010010010100100101010001010"; + value = "$H4sIAAAAAAAAADMwMARDCELQQApMAgAi5-3LIAAAAA"; + assertEquals(hash, documentHelper.decodeSimilarDocHash(value)); + hash = "0001010100100101001001010100010100101010010010100100101011000100"; + value = "$H4sIAAAAAAAAADMwMDAEQwhC0EAKxscqDZE3AABZOHx2QAAAAA"; + assertEquals(hash, documentHelper.decodeSimilarDocHash(value)); + hash = + "00100101010001010100100101001001010100010100010101010010010101010010101100010101001000010101001001010001000001010010101001001010"; + value = "$H4sIAAAAAAAAADMwMDQAIhCE01ARdBkYD1kGxkDVjWESRBBJPVwKAHL5QrqAAAAA"; + assertEquals(hash, documentHelper.decodeSimilarDocHash(value)); + + hash = "01010101010101010101010101010101"; + value = "01010101010101010101010101010101"; + assertEquals(hash, documentHelper.decodeSimilarDocHash(value)); + hash = ""; + value = ""; + assertEquals(hash, documentHelper.decodeSimilarDocHash(value)); + + assertNull(documentHelper.decodeSimilarDocHash(null)); + } }