fix #2675 add indexer.max.search.doc.size
This commit is contained in:
parent
5a97261ce6
commit
5fd09ec733
4 changed files with 94 additions and 11 deletions
|
@ -919,6 +919,11 @@ public class SearchEngineClient implements Client {
|
|||
return searchResult.build(searchRequestBuilder, execTime, OptionalEntity.ofNullable(searchResponse, () -> {}));
|
||||
}
|
||||
|
||||
public long scrollSearch(final String index, final SearchCondition<SearchRequestBuilder> condition,
|
||||
final BooleanFunction<Map<String, Object>> cursor) {
|
||||
return scrollSearch(index, condition, getDefaultEntityCreator(), cursor);
|
||||
}
|
||||
|
||||
public <T> long scrollSearch(final String index, final SearchCondition<SearchRequestBuilder> condition,
|
||||
final EntityCreator<T, SearchResponse, SearchHit> creator, final BooleanFunction<T> cursor) {
|
||||
long count = 0;
|
||||
|
@ -1008,7 +1013,11 @@ public class SearchEngineClient implements Client {
|
|||
}
|
||||
|
||||
public List<Map<String, Object>> getDocumentList(final String index, final SearchCondition<SearchRequestBuilder> condition) {
|
||||
return getDocumentList(index, condition, (response, hit) -> {
|
||||
return getDocumentList(index, condition, getDefaultEntityCreator());
|
||||
}
|
||||
|
||||
protected EntityCreator<Map<String, Object>, SearchResponse, SearchHit> getDefaultEntityCreator() {
|
||||
return (response, hit) -> {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final Map<String, Object> source = hit.getSourceAsMap();
|
||||
if (source != null) {
|
||||
|
@ -1024,7 +1033,7 @@ public class SearchEngineClient implements Client {
|
|||
return docMap;
|
||||
}
|
||||
return null;
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
protected <T> List<T> getDocumentList(final String index, final SearchCondition<SearchRequestBuilder> condition,
|
||||
|
|
|
@ -189,16 +189,34 @@ public class IndexingHelper {
|
|||
final SearchResponse countResponse = searchEngineClient.prepareSearch(fessConfig.getIndexDocumentUpdateIndex())
|
||||
.setQuery(queryBuilder).setSize(0).execute().actionGet(fessConfig.getIndexSearchTimeout());
|
||||
final long numFound = countResponse.getHits().getTotalHits().value;
|
||||
// TODO max threshold
|
||||
|
||||
return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
|
||||
requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
|
||||
if (fields != null) {
|
||||
requestBuilder.setFetchSource(fields, null);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
final long maxSearchDocSize = fessConfig.getIndexerMaxSearchDocSizeAsInteger().longValue();
|
||||
final boolean exceeded = numFound > maxSearchDocSize;
|
||||
if (exceeded) {
|
||||
logger.warn("Max document size is exceeded({}>{}): {}", numFound, fessConfig.getIndexerMaxSearchDocSize(), queryBuilder);
|
||||
}
|
||||
|
||||
if (numFound > fessConfig.getIndexerMaxResultWindowSizeAsInteger().longValue()) {
|
||||
final List<Map<String, Object>> entityList = new ArrayList<>(Long.valueOf(numFound).intValue());
|
||||
searchEngineClient.scrollSearch(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
|
||||
requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
|
||||
if (fields != null) {
|
||||
requestBuilder.setFetchSource(fields, null);
|
||||
}
|
||||
return true;
|
||||
}, entity -> {
|
||||
entityList.add(entity);
|
||||
return entityList.size() <= (exceeded ? maxSearchDocSize : numFound);
|
||||
});
|
||||
return entityList;
|
||||
} else {
|
||||
return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
|
||||
requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
|
||||
if (fields != null) {
|
||||
requestBuilder.setFetchSource(fields, null);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public long deleteBySessionId(final String sessionId) {
|
||||
|
|
|
@ -486,6 +486,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. 1000 */
|
||||
String INDEXER_LANGUAGE_DETECT_LENGTH = "indexer.language.detect.length";
|
||||
|
||||
/** The key of the configuration. e.g. 10000 */
|
||||
String INDEXER_MAX_RESULT_WINDOW_SIZE = "indexer.max.result.window.size";
|
||||
|
||||
/** The key of the configuration. e.g. 50000 */
|
||||
String INDEXER_MAX_SEARCH_DOC_SIZE = "indexer.max.search.doc.size";
|
||||
|
||||
/** The key of the configuration. e.g. default */
|
||||
String INDEX_CODEC = "index.codec";
|
||||
|
||||
|
@ -3251,6 +3257,36 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
Integer getIndexerLanguageDetectLengthAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.max.result.window.size'. <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerMaxResultWindowSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.max.result.window.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerMaxResultWindowSizeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.max.search.doc.size'. <br>
|
||||
* The value is, e.g. 50000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerMaxSearchDocSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.max.search.doc.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 50000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerMaxSearchDocSizeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'index.codec'. <br>
|
||||
* The value is, e.g. default <br>
|
||||
|
@ -8129,6 +8165,22 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return getAsInteger(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH);
|
||||
}
|
||||
|
||||
public String getIndexerMaxResultWindowSize() {
|
||||
return get(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE);
|
||||
}
|
||||
|
||||
public Integer getIndexerMaxResultWindowSizeAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE);
|
||||
}
|
||||
|
||||
public String getIndexerMaxSearchDocSize() {
|
||||
return get(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE);
|
||||
}
|
||||
|
||||
public Integer getIndexerMaxSearchDocSizeAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE);
|
||||
}
|
||||
|
||||
public String getIndexCodec() {
|
||||
return get(FessConfig.INDEX_CODEC);
|
||||
}
|
||||
|
@ -10454,6 +10506,8 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
defaultMap.put(FessConfig.INDEXER_DATA_MAX_REDIRECT_COUNT, "10");
|
||||
defaultMap.put(FessConfig.INDEXER_LANGUAGE_FIELDS, "content,important_content,title");
|
||||
defaultMap.put(FessConfig.INDEXER_LANGUAGE_DETECT_LENGTH, "1000");
|
||||
defaultMap.put(FessConfig.INDEXER_MAX_RESULT_WINDOW_SIZE, "10000");
|
||||
defaultMap.put(FessConfig.INDEXER_MAX_SEARCH_DOC_SIZE, "50000");
|
||||
defaultMap.put(FessConfig.INDEX_CODEC, "default");
|
||||
defaultMap.put(FessConfig.INDEX_number_of_shards, "5");
|
||||
defaultMap.put(FessConfig.INDEX_auto_expand_replicas, "0-1");
|
||||
|
|
|
@ -269,6 +269,8 @@ indexer.data.max.delete.cache.size=100
|
|||
indexer.data.max.redirect.count=10
|
||||
indexer.language.fields=content,important_content,title
|
||||
indexer.language.detect.length=1000
|
||||
indexer.max.result.window.size=10000
|
||||
indexer.max.search.doc.size=50000
|
||||
|
||||
# index setting
|
||||
index.codec=default
|
||||
|
|
Loading…
Add table
Reference in a new issue