Преглед изворни кода

fix #2675 add indexer.max.search.doc.size

Shinsuke Sugaya пре 2 година
родитељ
комит
5fd09ec733

+ 11 - 2
src/main/java/org/codelibs/fess/es/client/SearchEngineClient.java

@@ -919,6 +919,11 @@ public class SearchEngineClient implements Client {
         return searchResult.build(searchRequestBuilder, execTime, OptionalEntity.ofNullable(searchResponse, () -> {}));
         return searchResult.build(searchRequestBuilder, execTime, OptionalEntity.ofNullable(searchResponse, () -> {}));
     }
     }
 
 
+    public long scrollSearch(final String index, final SearchCondition<SearchRequestBuilder> condition,
+            final BooleanFunction<Map<String, Object>> cursor) {
+        return scrollSearch(index, condition, getDefaultEntityCreator(), cursor);
+    }
+
     public <T> long scrollSearch(final String index, final SearchCondition<SearchRequestBuilder> condition,
     public <T> long scrollSearch(final String index, final SearchCondition<SearchRequestBuilder> condition,
             final EntityCreator<T, SearchResponse, SearchHit> creator, final BooleanFunction<T> cursor) {
             final EntityCreator<T, SearchResponse, SearchHit> creator, final BooleanFunction<T> cursor) {
         long count = 0;
         long count = 0;
@@ -1008,7 +1013,11 @@ public class SearchEngineClient implements Client {
     }
     }
 
 
     public List<Map<String, Object>> getDocumentList(final String index, final SearchCondition<SearchRequestBuilder> condition) {
     public List<Map<String, Object>> getDocumentList(final String index, final SearchCondition<SearchRequestBuilder> condition) {
-        return getDocumentList(index, condition, (response, hit) -> {
+        return getDocumentList(index, condition, getDefaultEntityCreator());
+    }
+
+    protected EntityCreator<Map<String, Object>, SearchResponse, SearchHit> getDefaultEntityCreator() {
+        return (response, hit) -> {
             final FessConfig fessConfig = ComponentUtil.getFessConfig();
             final FessConfig fessConfig = ComponentUtil.getFessConfig();
             final Map<String, Object> source = hit.getSourceAsMap();
             final Map<String, Object> source = hit.getSourceAsMap();
             if (source != null) {
             if (source != null) {
@@ -1024,7 +1033,7 @@ public class SearchEngineClient implements Client {
                 return docMap;
                 return docMap;
             }
             }
             return null;
             return null;
-        });
+        };
     }
     }
 
 
     protected <T> List<T> getDocumentList(final String index, final SearchCondition<SearchRequestBuilder> condition,
     protected <T> List<T> getDocumentList(final String index, final SearchCondition<SearchRequestBuilder> condition,

+ 27 - 9
src/main/java/org/codelibs/fess/helper/IndexingHelper.java

@@ -189,16 +189,34 @@ public class IndexingHelper {
         final SearchResponse countResponse = searchEngineClient.prepareSearch(fessConfig.getIndexDocumentUpdateIndex())
         final SearchResponse countResponse = searchEngineClient.prepareSearch(fessConfig.getIndexDocumentUpdateIndex())
                 .setQuery(queryBuilder).setSize(0).execute().actionGet(fessConfig.getIndexSearchTimeout());
                 .setQuery(queryBuilder).setSize(0).execute().actionGet(fessConfig.getIndexSearchTimeout());
         final long numFound = countResponse.getHits().getTotalHits().value;
         final long numFound = countResponse.getHits().getTotalHits().value;
-        // TODO max threshold
-
-        return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
-            requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
-            if (fields != null) {
-                requestBuilder.setFetchSource(fields, null);
-            }
-            return true;
-        });
+        final long maxSearchDocSize = fessConfig.getIndexerMaxSearchDocSizeAsInteger().longValue();
+        final boolean exceeded = numFound > maxSearchDocSize;
+        if (exceeded) {
+            logger.warn("Max document size is exceeded({}>{}): {}", numFound, fessConfig.getIndexerMaxSearchDocSize(), queryBuilder);
+        }
 
 
+        if (numFound > fessConfig.getIndexerMaxResultWindowSizeAsInteger().longValue()) {
+            final List<Map<String, Object>> entityList = new ArrayList<>(Long.valueOf(numFound).intValue());
+            searchEngineClient.scrollSearch(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
+                requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
+                if (fields != null) {
+                    requestBuilder.setFetchSource(fields, null);
+                }
+                return true;
+            }, entity -> {
+                entityList.add(entity);
+                return entityList.size() <= (exceeded ? maxSearchDocSize : numFound);
+            });
+            return entityList;
+        } else {
+            return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
+                requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
+                if (fields != null) {
+                    requestBuilder.setFetchSource(fields, null);
+                }
+                return true;
+            });
+        }
     }
     }
 
 
     public long deleteBySessionId(final String sessionId) {
     public long deleteBySessionId(final String sessionId) {

+ 54 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -486,6 +486,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. 1000 */
     /** The key of the configuration. e.g. 1000 */
     String INDEXER_LANGUAGE_DETECT_LENGTH = "indexer.language.detect.length";
     String INDEXER_LANGUAGE_DETECT_LENGTH = "indexer.language.detect.length";
 
 
+    /** The key of the configuration. e.g. 10000 */
+    String INDEXER_MAX_RESULT_WINDOW_SIZE = "indexer.max.result.window.size";
+
+    /** The key of the configuration. e.g. 50000 */
+    String INDEXER_MAX_SEARCH_DOC_SIZE = "indexer.max.search.doc.size";
+
     /** The key of the configuration. e.g. default */
     /** The key of the configuration. e.g. default */
     String INDEX_CODEC = "index.codec";
     String INDEX_CODEC = "index.codec";
 
 
@@ -3251,6 +3257,36 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
      */
     Integer getIndexerLanguageDetectLengthAsInteger();
     Integer getIndexerLanguageDetectLengthAsInteger();
 
 
+    /**
+     * Get the value for the key 'indexer.max.result.window.size'. <br>
+     * The value is, e.g. 10000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerMaxResultWindowSize();
+
+    /**
+     * Get the value for the key 'indexer.max.result.window.size' as {@link Integer}. <br>
+     * The value is, e.g. 10000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getIndexerMaxResultWindowSizeAsInteger();
+
+    /**
+     * Get the value for the key 'indexer.max.search.doc.size'. <br>
+     * The value is, e.g. 50000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerMaxSearchDocSize();
+
+    /**
+     * Get the value for the key 'indexer.max.search.doc.size' as {@link Integer}. <br>
+     * The value is, e.g. 50000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getIndexerMaxSearchDocSizeAsInteger();
+
     /**
     /**
      * Get the value for the key 'index.codec'. <br>
      * Get the value for the key 'index.codec'. <br>
      * The value is, e.g. default <br>
      * The value is, e.g. default <br>
@@ -8129,6 +8165,22 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return getAsInteger(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH);
             return getAsInteger(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH);
         }
         }
 
 
+        public String getIndexerMaxResultWindowSize() {
+            return get(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE);
+        }
+
+        public Integer getIndexerMaxResultWindowSizeAsInteger() {
+            return getAsInteger(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE);
+        }
+
+        public String getIndexerMaxSearchDocSize() {
+            return get(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE);
+        }
+
+        public Integer getIndexerMaxSearchDocSizeAsInteger() {
+            return getAsInteger(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE);
+        }
+
         public String getIndexCodec() {
         public String getIndexCodec() {
             return get(FessConfig.INDEX_CODEC);
             return get(FessConfig.INDEX_CODEC);
         }
         }
@@ -10454,6 +10506,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             defaultMap.put(FessConfig.INDEXER_DATA_MAX_REDIRECT_COUNT, "10");
             defaultMap.put(FessConfig.INDEXER_DATA_MAX_REDIRECT_COUNT, "10");
             defaultMap.put(FessConfig.INDEXER_LANGUAGE_FIELDS, "content,important_content,title");
             defaultMap.put(FessConfig.INDEXER_LANGUAGE_FIELDS, "content,important_content,title");
             defaultMap.put(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH, "1000");
             defaultMap.put(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH, "1000");
+            defaultMap.put(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE, "10000");
+            defaultMap.put(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE, "50000");
             defaultMap.put(FessConfig.INDEX_CODEC, "default");
             defaultMap.put(FessConfig.INDEX_CODEC, "default");
             defaultMap.put(FessConfig.INDEX_number_of_shards, "5");
             defaultMap.put(FessConfig.INDEX_number_of_shards, "5");
             defaultMap.put(FessConfig.INDEX_auto_expand_replicas, "0-1");
             defaultMap.put(FessConfig.INDEX_auto_expand_replicas, "0-1");

+ 2 - 0
src/main/resources/fess_config.properties

@@ -269,6 +269,8 @@ indexer.data.max.delete.cache.size=100
 indexer.data.max.redirect.count=10
 indexer.data.max.redirect.count=10
 indexer.language.fields=content,important_content,title
 indexer.language.fields=content,important_content,title
 indexer.language.detect.length=1000
 indexer.language.detect.length=1000
+indexer.max.result.window.size=10000
+indexer.max.search.doc.size=50000
 
 
 # index setting
 # index setting
 index.codec=default
 index.codec=default