瀏覽代碼

fix #568 : Add crawler.ignore.robots.txt=false to fess_config.properties

yfujita 9 年之前
父節點
當前提交
1697328b26
共有 1 個文件被更改,包括 6 次插入0 次删除
  1. 6 0
      src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java

+ 6 - 0
src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java

@@ -200,6 +200,7 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
     public void initializeClientFactory(final CrawlerClientFactory clientFactory) {
         final WebAuthenticationService webAuthenticationService = ComponentUtil.getComponent(WebAuthenticationService.class);
         final RequestHeaderService requestHeaderService = ComponentUtil.getComponent(RequestHeaderService.class);
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
 
         // HttpClient Parameters
         final Map<String, Object> paramMap = new HashMap<>();
@@ -210,6 +211,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
             paramMap.putAll(clientConfigMap);
         }
 
+        // robots txt enabled
+        if (paramMap.get(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY) == null) {
+            paramMap.put(HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY, !fessConfig.isCrawlerIgnoreRobotsTxt());
+        }
+
         final String userAgent = getUserAgent();
         if (StringUtil.isNotBlank(userAgent)) {
             paramMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);