fix #2675 add indexer.max.search.doc.size

This commit is contained in:
Shinsuke Sugaya 2022-08-16 22:33:17 +09:00
parent 5a97261ce6
commit 5fd09ec733
4 changed files with 94 additions and 11 deletions

View file

@ -919,6 +919,11 @@ public class SearchEngineClient implements Client {
return searchResult.build(searchRequestBuilder, execTime, OptionalEntity.ofNullable(searchResponse, () -> {}));
}
public long scrollSearch(final String index, final SearchCondition<SearchRequestBuilder> condition,
final BooleanFunction<Map<String, Object>> cursor) {
return scrollSearch(index, condition, getDefaultEntityCreator(), cursor);
}
public <T> long scrollSearch(final String index, final SearchCondition<SearchRequestBuilder> condition,
final EntityCreator<T, SearchResponse, SearchHit> creator, final BooleanFunction<T> cursor) {
long count = 0;
@ -1008,7 +1013,11 @@ public class SearchEngineClient implements Client {
}
public List<Map<String, Object>> getDocumentList(final String index, final SearchCondition<SearchRequestBuilder> condition) {
return getDocumentList(index, condition, (response, hit) -> {
return getDocumentList(index, condition, getDefaultEntityCreator());
}
protected EntityCreator<Map<String, Object>, SearchResponse, SearchHit> getDefaultEntityCreator() {
return (response, hit) -> {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final Map<String, Object> source = hit.getSourceAsMap();
if (source != null) {
@ -1024,7 +1033,7 @@ public class SearchEngineClient implements Client {
return docMap;
}
return null;
});
};
}
protected <T> List<T> getDocumentList(final String index, final SearchCondition<SearchRequestBuilder> condition,

View file

@ -189,16 +189,34 @@ public class IndexingHelper {
final SearchResponse countResponse = searchEngineClient.prepareSearch(fessConfig.getIndexDocumentUpdateIndex())
.setQuery(queryBuilder).setSize(0).execute().actionGet(fessConfig.getIndexSearchTimeout());
final long numFound = countResponse.getHits().getTotalHits().value;
// TODO max threshold
return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
if (fields != null) {
requestBuilder.setFetchSource(fields, null);
}
return true;
});
final long maxSearchDocSize = fessConfig.getIndexerMaxSearchDocSizeAsInteger().longValue();
final boolean exceeded = numFound > maxSearchDocSize;
if (exceeded) {
logger.warn("Max document size is exceeded({}>{}): {}", numFound, fessConfig.getIndexerMaxSearchDocSize(), queryBuilder);
}
if (numFound > fessConfig.getIndexerMaxResultWindowSizeAsInteger().longValue()) {
final List<Map<String, Object>> entityList = new ArrayList<>(Long.valueOf(numFound).intValue());
searchEngineClient.scrollSearch(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
if (fields != null) {
requestBuilder.setFetchSource(fields, null);
}
return true;
}, entity -> {
entityList.add(entity);
return entityList.size() <= (exceeded ? maxSearchDocSize : numFound);
});
return entityList;
} else {
return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
if (fields != null) {
requestBuilder.setFetchSource(fields, null);
}
return true;
});
}
}
public long deleteBySessionId(final String sessionId) {

View file

@ -486,6 +486,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. 1000 */
String INDEXER_LANGUAGE_DETECT_LENGTH = "indexer.language.detect.length";
/** The key of the configuration. e.g. 10000 */
String INDEXER_MAX_RESULT_WINDOW_SIZE = "indexer.max.result.window.size";
/** The key of the configuration. e.g. 50000 */
String INDEXER_MAX_SEARCH_DOC_SIZE = "indexer.max.search.doc.size";
/** The key of the configuration. e.g. default */
String INDEX_CODEC = "index.codec";
@ -3251,6 +3257,36 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
Integer getIndexerLanguageDetectLengthAsInteger();
/**
* Get the value for the key 'indexer.max.result.window.size'. <br>
* The value is, e.g. 10000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerMaxResultWindowSize();
/**
* Get the value for the key 'indexer.max.result.window.size' as {@link Integer}. <br>
* The value is, e.g. 10000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerMaxResultWindowSizeAsInteger();
/**
* Get the value for the key 'indexer.max.search.doc.size'. <br>
* The value is, e.g. 50000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerMaxSearchDocSize();
/**
* Get the value for the key 'indexer.max.search.doc.size' as {@link Integer}. <br>
* The value is, e.g. 50000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerMaxSearchDocSizeAsInteger();
/**
* Get the value for the key 'index.codec'. <br>
* The value is, e.g. default <br>
@ -8129,6 +8165,22 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return getAsInteger(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH);
}
public String getIndexerMaxResultWindowSize() {
return get(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE);
}
public Integer getIndexerMaxResultWindowSizeAsInteger() {
return getAsInteger(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE);
}
public String getIndexerMaxSearchDocSize() {
return get(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE);
}
public Integer getIndexerMaxSearchDocSizeAsInteger() {
return getAsInteger(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE);
}
public String getIndexCodec() {
return get(FessConfig.INDEX_CODEC);
}
@ -10454,6 +10506,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.INDEXER_DATA_MAX_REDIRECT_COUNT, "10");
defaultMap.put(FessConfig.INDEXER_LANGUAGE_FIELDS, "content,important_content,title");
defaultMap.put(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH, "1000");
defaultMap.put(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE, "10000");
defaultMap.put(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE, "50000");
defaultMap.put(FessConfig.INDEX_CODEC, "default");
defaultMap.put(FessConfig.INDEX_number_of_shards, "5");
defaultMap.put(FessConfig.INDEX_auto_expand_replicas, "0-1");

View file

@ -269,6 +269,8 @@ indexer.data.max.delete.cache.size=100
indexer.data.max.redirect.count=10
indexer.language.fields=content,important_content,title
indexer.language.detect.length=1000
indexer.max.result.window.size=10000
indexer.max.search.doc.size=50000
# index setting
index.codec=default