fix #2627 add crawler.http.thread_pool.size

This commit is contained in:
Shinsuke Sugaya 2022-02-17 22:29:39 +09:00
parent ee0cb68c52
commit c3662ff905
4 changed files with 39 additions and 1 deletions

View file

@ -28,8 +28,8 @@ import org.opensearch.common.settings.Settings.Builder;
public class CrawlerEngineClient extends FesenClient {
@Override
protected Client createClient() {
final Builder builder = Settings.builder().putList("http.hosts", address);
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final Builder builder = Settings.builder().putList("http.hosts", address).put("processors", fessConfig.getCrawlerHttpProcessors());
final String username = fessConfig.getOpenSearchUsername();
final String password = fessConfig.getOpenSearchPassword();
if (StringUtil.isNotBlank(username) && StringUtil.isNotBlank(password)) {

View file

@ -295,6 +295,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. groovy */
String CRAWLER_DEFAULT_SCRIPT = "crawler.default.script";
/** The key of the configuration. e.g. 0 */
String CRAWLER_HTTP_thread_pool_SIZE = "crawler.http.thread_pool.size";
/** The key of the configuration. e.g. 50 */
String CRAWLER_DOCUMENT_MAX_SITE_LENGTH = "crawler.document.max.site.length";
@ -2453,6 +2456,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
String getCrawlerDefaultScript();
/**
* Get the value for the key 'crawler.http.thread_pool.size'. <br>
* The value is, e.g. 0 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerHttpThreadPoolSize();
/**
* Get the value for the key 'crawler.http.thread_pool.size' as {@link Integer}. <br>
* The value is, e.g. 0 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getCrawlerHttpThreadPoolSizeAsInteger();
/**
* Get the value for the key 'crawler.document.max.site.length'. <br>
* The value is, e.g. 50 <br>
@ -7542,6 +7560,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return get(FessConfig.CRAWLER_DEFAULT_SCRIPT);
}
public String getCrawlerHttpThreadPoolSize() {
return get(FessConfig.CRAWLER_HTTP_thread_pool_SIZE);
}
public Integer getCrawlerHttpThreadPoolSizeAsInteger() {
return getAsInteger(FessConfig.CRAWLER_HTTP_thread_pool_SIZE);
}
public String getCrawlerDocumentMaxSiteLength() {
return get(FessConfig.CRAWLER_DOCUMENT_MAX_SITE_LENGTH);
}
@ -10142,6 +10168,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.HTTP_FILEUPLOAD_MAX_SIZE, "262144000");
defaultMap.put(FessConfig.HTTP_FILEUPLOAD_THRESHOLD_SIZE, "262144");
defaultMap.put(FessConfig.CRAWLER_DEFAULT_SCRIPT, "groovy");
defaultMap.put(FessConfig.CRAWLER_HTTP_thread_pool_SIZE, "0");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_MAX_SITE_LENGTH, "50");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_SITE_ENCODING, "UTF-8");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_UNKNOWN_HOSTNAME, "unknown");

View file

@ -2041,6 +2041,16 @@ public interface FessProp {
return Runtime.getRuntime().availableProcessors();
}
Integer getCrawlerHttpThreadPoolSizeAsInteger();
default int getCrawlerHttpProcessors() {
final int num = getCrawlerHttpThreadPoolSizeAsInteger();
if (num > 0) {
return num;
}
return Runtime.getRuntime().availableProcessors();
}
String getPluginVersionFilter();
default boolean isTargetPluginVersion(final String version) {

View file

@ -195,6 +195,7 @@ http.fileupload.threshold.size=262144
# common
crawler.default.script=groovy
crawler.http.thread_pool.size=0
crawler.document.max.site.length=50
crawler.document.site.encoding=UTF-8
crawler.document.unknown.hostname=unknown