fix #568 : Add crawler.ignore.robots.txt=false to fess_config.properties
This commit is contained in:
parent
6532321e0c
commit
1697328b26
1 changed files with 6 additions and 0 deletions
|
@ -200,6 +200,7 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
public void initializeClientFactory(final CrawlerClientFactory clientFactory) {
|
||||
final WebAuthenticationService webAuthenticationService = ComponentUtil.getComponent(WebAuthenticationService.class);
|
||||
final RequestHeaderService requestHeaderService = ComponentUtil.getComponent(RequestHeaderService.class);
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
|
||||
// HttpClient Parameters
|
||||
final Map<String, Object> paramMap = new HashMap<>();
|
||||
|
@ -210,6 +211,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
paramMap.putAll(clientConfigMap);
|
||||
}
|
||||
|
||||
// robots txt enabled
|
||||
if (paramMap.get(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY) == null) {
|
||||
paramMap.put(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY, !fessConfig.isCrawlerIgnoreRobotsTxt());
|
||||
}
|
||||
|
||||
final String userAgent = getUserAgent();
|
||||
if (StringUtil.isNotBlank(userAgent)) {
|
||||
paramMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
|
||||
|
|
Loading…
Add table
Reference in a new issue