fix #568 : Add crawler.ignore.robots.txt=false to fess_config.properties

This commit is contained in:
yfujita 2016-07-15 14:58:02 +09:00
parent 6532321e0c
commit 1697328b26

View file

@ -200,6 +200,7 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
public void initializeClientFactory(final CrawlerClientFactory clientFactory) {
final WebAuthenticationService webAuthenticationService = ComponentUtil.getComponent(WebAuthenticationService.class);
final RequestHeaderService requestHeaderService = ComponentUtil.getComponent(RequestHeaderService.class);
final FessConfig fessConfig = ComponentUtil.getFessConfig();
// HttpClient Parameters
final Map<String, Object> paramMap = new HashMap<>();
@ -210,6 +211,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
paramMap.putAll(clientConfigMap);
}
// robots txt enabled
if (paramMap.get(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY) == null) {
paramMap.put(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY, !fessConfig.isCrawlerIgnoreRobotsTxt());
}
final String userAgent = getUserAgent();
if (StringUtil.isNotBlank(userAgent)) {
paramMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);