diff --git a/src/main/java/org/codelibs/fess/es/client/SearchEngineClient.java b/src/main/java/org/codelibs/fess/es/client/SearchEngineClient.java index 06524b415..34c5756bb 100644 --- a/src/main/java/org/codelibs/fess/es/client/SearchEngineClient.java +++ b/src/main/java/org/codelibs/fess/es/client/SearchEngineClient.java @@ -919,6 +919,11 @@ public class SearchEngineClient implements Client { return searchResult.build(searchRequestBuilder, execTime, OptionalEntity.ofNullable(searchResponse, () -> {})); } + public long scrollSearch(final String index, final SearchCondition condition, + final BooleanFunction> cursor) { + return scrollSearch(index, condition, getDefaultEntityCreator(), cursor); + } + public long scrollSearch(final String index, final SearchCondition condition, final EntityCreator creator, final BooleanFunction cursor) { long count = 0; @@ -1008,7 +1013,11 @@ public class SearchEngineClient implements Client { } public List> getDocumentList(final String index, final SearchCondition condition) { - return getDocumentList(index, condition, (response, hit) -> { + return getDocumentList(index, condition, getDefaultEntityCreator()); + } + + protected EntityCreator, SearchResponse, SearchHit> getDefaultEntityCreator() { + return (response, hit) -> { final FessConfig fessConfig = ComponentUtil.getFessConfig(); final Map source = hit.getSourceAsMap(); if (source != null) { @@ -1024,7 +1033,7 @@ public class SearchEngineClient implements Client { return docMap; } return null; - }); + }; } protected List getDocumentList(final String index, final SearchCondition condition, diff --git a/src/main/java/org/codelibs/fess/helper/IndexingHelper.java b/src/main/java/org/codelibs/fess/helper/IndexingHelper.java index 3c9bddde4..5ecd8d5e3 100644 --- a/src/main/java/org/codelibs/fess/helper/IndexingHelper.java +++ b/src/main/java/org/codelibs/fess/helper/IndexingHelper.java @@ -189,16 +189,34 @@ public class IndexingHelper { final SearchResponse countResponse = searchEngineClient.prepareSearch(fessConfig.getIndexDocumentUpdateIndex()) .setQuery(queryBuilder).setSize(0).execute().actionGet(fessConfig.getIndexSearchTimeout()); final long numFound = countResponse.getHits().getTotalHits().value; - // TODO max threshold - - return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> { - requestBuilder.setQuery(queryBuilder).setSize((int) numFound); - if (fields != null) { - requestBuilder.setFetchSource(fields, null); - } - return true; - }); + final long maxSearchDocSize = fessConfig.getIndexerMaxSearchDocSizeAsInteger().longValue(); + final boolean exceeded = numFound > maxSearchDocSize; + if (exceeded) { + logger.warn("Max document size is exceeded({}>{}): {}", numFound, fessConfig.getIndexerMaxSearchDocSize(), queryBuilder); + } + if (numFound > fessConfig.getIndexerMaxResultWindowSizeAsInteger().longValue()) { + final List> entityList = new ArrayList<>(Long.valueOf(numFound).intValue()); + searchEngineClient.scrollSearch(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> { + requestBuilder.setQuery(queryBuilder).setSize((int) numFound); + if (fields != null) { + requestBuilder.setFetchSource(fields, null); + } + return true; + }, entity -> { + entityList.add(entity); + return entityList.size() <= (exceeded ? maxSearchDocSize : numFound); + }); + return entityList; + } else { + return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> { + requestBuilder.setQuery(queryBuilder).setSize((int) numFound); + if (fields != null) { + requestBuilder.setFetchSource(fields, null); + } + return true; + }); + } } public long deleteBySessionId(final String sessionId) { diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index a0665da3f..405eaad5b 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -486,6 +486,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. 1000 */ String INDEXER_LANGUAGE_DETECT_LENGTH = "indexer.language.detect.length"; + /** The key of the configuration. e.g. 10000 */ + String INDEXER_MAX_RESULT_WINDOW_SIZE = "indexer.max.result.window.size"; + + /** The key of the configuration. e.g. 50000 */ + String INDEXER_MAX_SEARCH_DOC_SIZE = "indexer.max.search.doc.size"; + /** The key of the configuration. e.g. default */ String INDEX_CODEC = "index.codec"; @@ -3251,6 +3257,36 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ Integer getIndexerLanguageDetectLengthAsInteger(); + /** + * Get the value for the key 'indexer.max.result.window.size'.
+ * The value is, e.g. 10000
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerMaxResultWindowSize(); + + /** + * Get the value for the key 'indexer.max.result.window.size' as {@link Integer}.
+ * The value is, e.g. 10000
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerMaxResultWindowSizeAsInteger(); + + /** + * Get the value for the key 'indexer.max.search.doc.size'.
+ * The value is, e.g. 50000
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerMaxSearchDocSize(); + + /** + * Get the value for the key 'indexer.max.search.doc.size' as {@link Integer}.
+ * The value is, e.g. 50000
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerMaxSearchDocSizeAsInteger(); + /** * Get the value for the key 'index.codec'.
* The value is, e.g. default
@@ -8129,6 +8165,22 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return getAsInteger(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH); } + public String getIndexerMaxResultWindowSize() { + return get(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE); + } + + public Integer getIndexerMaxResultWindowSizeAsInteger() { + return getAsInteger(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE); + } + + public String getIndexerMaxSearchDocSize() { + return get(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE); + } + + public Integer getIndexerMaxSearchDocSizeAsInteger() { + return getAsInteger(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE); + } + public String getIndexCodec() { return get(FessConfig.INDEX_CODEC); } @@ -10454,6 +10506,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction defaultMap.put(FessConfig.INDEXER_DATA_MAX_REDIRECT_COUNT, "10"); defaultMap.put(FessConfig.INDEXER_LANGUAGE_FIELDS, "content,important_content,title"); defaultMap.put(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH, "1000"); + defaultMap.put(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE, "10000"); + defaultMap.put(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE, "50000"); defaultMap.put(FessConfig.INDEX_CODEC, "default"); defaultMap.put(FessConfig.INDEX_number_of_shards, "5"); defaultMap.put(FessConfig.INDEX_auto_expand_replicas, "0-1"); diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index b550acac9..8a94afad1 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -269,6 +269,8 @@ indexer.data.max.delete.cache.size=100 indexer.data.max.redirect.count=10 indexer.language.fields=content,important_content,title indexer.language.detect.length=1000 +indexer.max.result.window.size=10000 +indexer.max.search.doc.size=50000 # index setting index.codec=default