Browse Source

fix #986 compress sdh

Shinsuke Sugaya 8 years ago
parent
commit
df3d66c57f

+ 1 - 1
src/main/java/org/codelibs/fess/app/service/SearchService.java

@@ -104,7 +104,7 @@ public class SearchService {
                             return SearchConditionBuilder.builder(searchRequestBuilder)
                                     .query(StringUtil.isBlank(sortField) ? query : query + " sort:" + sortField).offset(pageStart)
                                     .size(pageSize).facetInfo(params.getFacetInfo()).geoInfo(params.getGeoInfo())
-                                    .similarHash(params.getSimilarDocHash()).responseFields(queryHelper.getResponseFields())
+                                    .similarDocHash(params.getSimilarDocHash()).responseFields(queryHelper.getResponseFields())
                                     .searchRequestType(params.getType()).build();
                         }, (searchRequestBuilder, execTime, searchResponse) -> {
                             final QueryResponseList queryResponseList = ComponentUtil.getQueryResponseList();

+ 22 - 18
src/main/java/org/codelibs/fess/es/client/FessEsClient.java

@@ -56,6 +56,7 @@ import org.codelibs.fess.exception.FessSystemException;
 import org.codelibs.fess.exception.InvalidQueryException;
 import org.codelibs.fess.exception.ResultOffsetExceededException;
 import org.codelibs.fess.exception.SearchQueryException;
+import org.codelibs.fess.helper.DocumentHelper;
 import org.codelibs.fess.helper.QueryHelper;
 import org.codelibs.fess.mylasta.direction.FessConfig;
 import org.codelibs.fess.util.ComponentUtil;
@@ -803,7 +804,7 @@ public class FessEsClient implements Client {
         private int size = Constants.DEFAULT_PAGE_SIZE;
         private GeoInfo geoInfo;
         private FacetInfo facetInfo;
-        private String similarHash;
+        private String similarDocHash;
         private SearchRequestType searchRequestType = SearchRequestType.SEARCH;
 
         public static SearchConditionBuilder builder(final SearchRequestBuilder searchRequestBuilder) {
@@ -844,9 +845,9 @@ public class FessEsClient implements Client {
             return this;
         }
 
-        public SearchConditionBuilder similarHash(final String similarHash) {
-            if (StringUtil.isNotBlank(similarHash)) {
-                this.similarHash = similarHash;
+        public SearchConditionBuilder similarDocHash(final String similarDocHash) {
+            if (StringUtil.isNotBlank(similarDocHash)) {
+                this.similarDocHash = similarDocHash;
             }
             return this;
         }
@@ -868,21 +869,24 @@ public class FessEsClient implements Client {
                 throw new ResultOffsetExceededException("The number of result size is exceeded.");
             }
 
-            final QueryContext queryContext = queryHelper.build(searchRequestType, query, context -> {
-                if (SearchRequestType.ADMIN_SEARCH.equals(searchRequestType)) {
-                    context.skipRoleQuery();
-                } else if (similarHash != null) {
-                    context.addQuery(boolQuery -> {
-                        boolQuery.filter(QueryBuilders.termQuery(fessConfig.getIndexFieldContentMinhashBits(), similarHash));
-                    });
-                }
+            final QueryContext queryContext =
+                    queryHelper.build(searchRequestType, query, context -> {
+                        if (SearchRequestType.ADMIN_SEARCH.equals(searchRequestType)) {
+                            context.skipRoleQuery();
+                        } else if (similarDocHash != null) {
+                            final DocumentHelper documentHelper = ComponentUtil.getDocumentHelper();
+                            context.addQuery(boolQuery -> {
+                                boolQuery.filter(QueryBuilders.termQuery(fessConfig.getIndexFieldContentMinhashBits(),
+                                        documentHelper.decodeSimilarDocHash(similarDocHash)));
+                            });
+                        }
 
-                if (geoInfo != null && geoInfo.toQueryBuilder() != null) {
-                    context.addQuery(boolQuery -> {
-                        boolQuery.filter(geoInfo.toQueryBuilder());
+                        if (geoInfo != null && geoInfo.toQueryBuilder() != null) {
+                            context.addQuery(boolQuery -> {
+                                boolQuery.filter(geoInfo.toQueryBuilder());
+                            });
+                        }
                     });
-                }
-            });
 
             searchRequestBuilder.setFrom(offset).setSize(size);
 
@@ -939,7 +943,7 @@ public class FessEsClient implements Client {
                         }));
             }
 
-            if (!SearchRequestType.ADMIN_SEARCH.equals(searchRequestType) && fessConfig.isResultCollapsed() && similarHash == null) {
+            if (!SearchRequestType.ADMIN_SEARCH.equals(searchRequestType) && fessConfig.isResultCollapsed() && similarDocHash == null) {
                 searchRequestBuilder.setCollapse(getCollapseBuilder(fessConfig));
             }
 

+ 43 - 0
src/main/java/org/codelibs/fess/helper/DocumentHelper.java

@@ -15,16 +15,26 @@
  */
 package org.codelibs.fess.helper;
 
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.util.Base64;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
 
 import org.apache.commons.lang3.StringUtils;
+import org.codelibs.core.io.ReaderUtil;
 import org.codelibs.core.io.SerializeUtil;
 import org.codelibs.core.lang.StringUtil;
+import org.codelibs.fess.Constants;
 import org.codelibs.fess.crawler.builder.RequestDataBuilder;
 import org.codelibs.fess.crawler.client.CrawlerClient;
 import org.codelibs.fess.crawler.client.CrawlerClientFactory;
@@ -44,8 +54,14 @@ import org.codelibs.fess.es.config.exentity.CrawlingConfig;
 import org.codelibs.fess.mylasta.direction.FessConfig;
 import org.codelibs.fess.util.ComponentUtil;
 import org.lastaflute.di.core.SingletonLaContainer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class DocumentHelper {
+    private static final Logger logger = LoggerFactory.getLogger(DocumentHelper.class);
+
+    private static final String SIMILAR_DOC_HASH_PREFIX = "$";
+
     public String getContent(final ResponseData responseData, final String content, final Map<String, Object> dataMap) {
         if (content == null) {
             return StringUtil.EMPTY; // empty
@@ -157,4 +173,31 @@ public class DocumentHelper {
         }
     }
 
+    public String decodeSimilarDocHash(String hash) {
+        if (hash != null && hash.startsWith(SIMILAR_DOC_HASH_PREFIX) && hash.length() > SIMILAR_DOC_HASH_PREFIX.length()) {
+            byte[] decode = Base64.getUrlDecoder().decode(hash.substring(SIMILAR_DOC_HASH_PREFIX.length()));
+            try (BufferedReader reader =
+                    new BufferedReader(new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(decode)), Constants.UTF_8))) {
+                return ReaderUtil.readText(reader);
+            } catch (IOException e) {
+                logger.warn("Failed to decode " + hash, e);
+            }
+        }
+        return hash;
+    }
+
+    public String encodeSimilarDocHash(String hash) {
+        if (hash != null && !hash.startsWith(SIMILAR_DOC_HASH_PREFIX)) {
+            try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+                try (GZIPOutputStream gos = new GZIPOutputStream(baos)) {
+                    gos.write(hash.getBytes(Constants.UTF_8));
+                }
+                return SIMILAR_DOC_HASH_PREFIX + Base64.getUrlEncoder().withoutPadding().encodeToString(baos.toByteArray());
+            } catch (IOException e) {
+                logger.warn("Failed to encode " + hash, e);
+            }
+        }
+        return hash;
+    }
+
 }

+ 7 - 0
src/main/java/org/codelibs/fess/taglib/FessFunctions.java

@@ -287,4 +287,11 @@ public class FessFunctions {
         }
         return LaResponseUtil.getResponse().encodeURL(sb.toString());
     }
+
+    public static String sdh(final String input) {
+        if (StringUtil.isBlank(input)) {
+            return input;
+        }
+        return ComponentUtil.getDocumentHelper().encodeSimilarDocHash(input);
+    }
 }

+ 8 - 0
src/main/webapp/WEB-INF/fe.tld

@@ -210,4 +210,12 @@
     <function-signature>java.lang.String url(java.lang.String)</function-signature>
     <example>&lt;a href="${fe:url(param:info)}" ...</example>
   </function>
+
+  <function>
+    <description>Encode Similar Document Hash.</description>
+    <name>sdh</name>
+    <function-class>org.codelibs.fess.taglib.FessFunctions</function-class>
+    <function-signature>java.lang.String sdh(java.lang.String)</function-signature>
+    <example>${fe:sdh(doc.similar_docs_hash)}</example>
+  </function>
 </taglib>

+ 3 - 3
src/main/webapp/WEB-INF/view/searchResults.jsp

@@ -58,7 +58,7 @@
 									</c:if>
 									<c:if test="${doc.similar_docs_count!=null&&doc.similar_docs_count>1}">
 										<small class="hidden-md-down"> <la:link
-												href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(doc.similar_docs_hash)}${fe:facetQuery()}${fe:geoQuery()}">
+												href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(doc.similar_docs_hash))}${fe:facetQuery()}${fe:geoQuery()}">
 												<la:message key="labels.search_result_similar"
 															arg0="${fe:formatNumber(doc.similar_docs_count-1)}" />
 											</la:link>
@@ -138,7 +138,7 @@
 							<c:if
 								test="${countEntry.value != 0 && fe:labelexists(countEntry.key)}">
 								<li class="list-group-item"><la:link
-										href="/search?q=${f:u(q)}&ex_q=label%3a${f:u(countEntry.key)}&sdh=${f:u(sh)}${fe:pagingQuery(null)}${fe:facetQuery()}${fe:geoQuery()}">
+										href="/search?q=${f:u(q)}&ex_q=label%3a${f:u(countEntry.key)}&sdh=${f:u(fe:sdh(sh))}${fe:pagingQuery(null)}${fe:facetQuery()}${fe:geoQuery()}">
 											${f:h(fe:label(countEntry.key))} 
 											<span class="label label-default label-pill pull-right">${f:h(countEntry.value)}</span>
 									</la:link></li>
@@ -155,7 +155,7 @@
 					<c:forEach var="queryEntry" items="${facetQueryView.queryMap}">
 						<c:if test="${facetResponse.queryCountMap[queryEntry.value] != 0}">
 							<li class="list-group-item p-l-md"><la:link
-									href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(sh)}${fe:pagingQuery(queryEntry.value)}${fe:facetQuery()}${fe:geoQuery()}">
+									href="/search?q=${f:u(q)}&ex_q=${f:u(queryEntry.value)}&sdh=${f:u(fe:sdh(sdh))}${fe:pagingQuery(queryEntry.value)}${fe:facetQuery()}${fe:geoQuery()}">
 									<la:message key="${queryEntry.key}" />
 									<span class="label label-default label-pill pull-right">${f:h(facetResponse.queryCountMap[queryEntry.value])}</span>
 								</la:link></li>

+ 54 - 0
src/test/java/org/codelibs/fess/helper/DocumentHelperTest.java

@@ -120,4 +120,58 @@ public class DocumentHelperTest extends UnitFessTestCase {
         assertEquals("1234567...", documentHelper.getDigest(responseData, " 1234567890  1234567890  1234567890 ", dataMap, 10));
         assertEquals("1234567...", documentHelper.getDigest(responseData, "12345678901234567890", dataMap, 10));
     }
+
+    public void test_encodeSimilarDocHash() {
+        DocumentHelper documentHelper = new DocumentHelper();
+
+        String hash = "01010101010101010101010101010101";
+        String value = "$H4sIAAAAAAAAADMwNMALAXC7sg0gAAAA";
+        assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
+        hash = "00101010010010100100101010001010";
+        value = "$H4sIAAAAAAAAADMwMARDCELQQApMAgAi5-3LIAAAAA";
+        assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
+        hash = "0001010100100101001001010100010100101010010010100100101011000100";
+        value = "$H4sIAAAAAAAAADMwMDAEQwhC0EAKxscqDZE3AABZOHx2QAAAAA";
+        assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
+        hash =
+                "00100101010001010100100101001001010100010100010101010010010101010010101100010101001000010101001001010001000001010010101001001010";
+        value = "$H4sIAAAAAAAAADMwMDQAIhCE01ARdBkYD1kGxkDVjWESRBBJPVwKAHL5QrqAAAAA";
+        assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
+
+        hash = "";
+        value = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
+        assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
+        hash = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
+        value = "$H4sIAAAAAAAAAAMAAAAAAAAAAAA";
+        assertEquals(value, documentHelper.encodeSimilarDocHash(hash));
+
+        assertNull(documentHelper.encodeSimilarDocHash(null));
+    }
+
+    public void test_decodeSimilarDocHash() {
+        DocumentHelper documentHelper = new DocumentHelper();
+
+        String hash = "01010101010101010101010101010101";
+        String value = "$H4sIAAAAAAAAADMwNMALAXC7sg0gAAAA";
+        assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
+        hash = "00101010010010100100101010001010";
+        value = "$H4sIAAAAAAAAADMwMARDCELQQApMAgAi5-3LIAAAAA";
+        assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
+        hash = "0001010100100101001001010100010100101010010010100100101011000100";
+        value = "$H4sIAAAAAAAAADMwMDAEQwhC0EAKxscqDZE3AABZOHx2QAAAAA";
+        assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
+        hash =
+                "00100101010001010100100101001001010100010100010101010010010101010010101100010101001000010101001001010001000001010010101001001010";
+        value = "$H4sIAAAAAAAAADMwMDQAIhCE01ARdBkYD1kGxkDVjWESRBBJPVwKAHL5QrqAAAAA";
+        assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
+
+        hash = "01010101010101010101010101010101";
+        value = "01010101010101010101010101010101";
+        assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
+        hash = "";
+        value = "";
+        assertEquals(hash, documentHelper.decodeSimilarDocHash(value));
+
+        assertNull(documentHelper.decodeSimilarDocHash(null));
+    }
 }