Ver código fonte

fix #446 : add config parameters

Shinsuke Sugaya 9 anos atrás
pai
commit
3f3f3c5d4f

+ 6 - 0
src/main/java/org/codelibs/fess/Constants.java

@@ -376,4 +376,10 @@ public class Constants extends CoreLibConstants {
     public static final String REQUEST_LANGUAGES = "requestLanguages";
 
     public static final String SEARCH_PREFERENCE_PRIMARY = "_primary";
+
+    public static final String CONFIG_IGNORE_FAILURE_URLS = "ignore.failureUrls";
+
+    public static final String CONFIG_CLEANUP_FILTERS = "cleanup.urlFilters";
+
+    public static final String CONFIG_CLEANUP_ALL = "cleanup.all";
 }

+ 1 - 1
src/main/java/org/codelibs/fess/es/config/exentity/CrawlingConfig.java

@@ -63,6 +63,6 @@ public interface CrawlingConfig {
     }
 
     public enum ConfigName {
-        CLIENT, XPATH, META, VALUE, SCRIPT, FIELD;
+        CLIENT, XPATH, META, VALUE, SCRIPT, FIELD, CONFIG;
     }
 }

+ 71 - 37
src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java

@@ -19,6 +19,7 @@ import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 
 import javax.annotation.Resource;
 
@@ -36,6 +37,7 @@ import org.codelibs.fess.crawler.interval.FessIntervalController;
 import org.codelibs.fess.crawler.service.impl.EsDataService;
 import org.codelibs.fess.crawler.service.impl.EsUrlFilterService;
 import org.codelibs.fess.crawler.service.impl.EsUrlQueueService;
+import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
 import org.codelibs.fess.es.config.exentity.FileConfig;
 import org.codelibs.fess.es.config.exentity.WebConfig;
 import org.codelibs.fess.indexer.IndexUpdater;
@@ -179,6 +181,18 @@ public class WebFsIndexHelper implements Serializable {
             crawlerContext.setMaxAccessCount(maxCount);
 
             webConfig.initializeClientFactory(crawler.getClientFactory());
+            final Map<String, String> configParamMap = webConfig.getConfigParameterMap(ConfigName.CONFIG);
+
+            if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
+                deleteCrawlData(sid);
+            } else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
+                final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
+                try {
+                    urlFilterService.delete(sid);
+                } catch (Exception e) {
+                    logger.warn("Failed to delete url filters for " + sid);
+                }
+            }
 
             // set urls
             final String[] urls = urlsStr.split("[\r\n]");
@@ -223,13 +237,15 @@ public class WebFsIndexHelper implements Serializable {
             }
 
             // failure url
-            final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId());
-            for (final String u : excludedUrlList) {
-                if (StringUtil.isNotBlank(u)) {
-                    final String urlValue = u.trim();
-                    crawler.addExcludeFilter(urlValue);
-                    if (logger.isInfoEnabled()) {
-                        logger.info("Excluded URL from failures: " + urlValue);
+            if (!Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_IGNORE_FAILURE_URLS))) {
+                final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId());
+                for (final String u : excludedUrlList) {
+                    if (StringUtil.isNotBlank(u)) {
+                        final String urlValue = u.trim();
+                        crawler.addExcludeFilter(urlValue);
+                        if (logger.isInfoEnabled()) {
+                            logger.info("Excluded URL from failures: " + urlValue);
+                        }
                     }
                 }
             }
@@ -282,6 +298,18 @@ public class WebFsIndexHelper implements Serializable {
             crawlerContext.setMaxAccessCount(maxCount);
 
             fileConfig.initializeClientFactory(crawler.getClientFactory());
+            final Map<String, String> configParamMap = fileConfig.getConfigParameterMap(ConfigName.CONFIG);
+
+            if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
+                deleteCrawlData(sid);
+            } else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
+                final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
+                try {
+                    urlFilterService.delete(sid);
+                } catch (Exception e) {
+                    logger.warn("Failed to delete url filters for " + sid);
+                }
+            }
 
             // set paths
             final String[] paths = pathsStr.split("[\r\n]");
@@ -353,14 +381,16 @@ public class WebFsIndexHelper implements Serializable {
             }
 
             // failure url
-            final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId());
-            if (excludedUrlList != null) {
-                for (final String u : excludedUrlList) {
-                    if (StringUtil.isNotBlank(u)) {
-                        final String urlValue = u.trim();
-                        crawler.addExcludeFilter(urlValue);
-                        if (logger.isInfoEnabled()) {
-                            logger.info("Excluded Path from failures: " + urlValue);
+            if (!Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_IGNORE_FAILURE_URLS))) {
+                final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId());
+                if (excludedUrlList != null) {
+                    for (final String u : excludedUrlList) {
+                        if (StringUtil.isNotBlank(u)) {
+                            final String urlValue = u.trim();
+                            crawler.addExcludeFilter(urlValue);
+                            if (logger.isInfoEnabled()) {
+                                logger.info("Excluded Path from failures: " + urlValue);
+                            }
                         }
                     }
                 }
@@ -472,34 +502,38 @@ public class WebFsIndexHelper implements Serializable {
             return;
         }
 
-        final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
-        final EsUrlQueueService urlQueueService = ComponentUtil.getComponent(EsUrlQueueService.class);
-        final EsDataService dataService = ComponentUtil.getComponent(EsDataService.class);
         for (final String sid : sessionIdList) {
             // remove config
             crawlingConfigHelper.remove(sid);
+            deleteCrawlData(sid);
+        }
+    }
 
-            try {
-                // clear url filter
-                urlFilterService.delete(sid);
-            } catch (Exception e) {
-                logger.warn("Failed to delete UrlFilter for " + sid, e);
-            }
+    protected void deleteCrawlData(final String sid) {
+        final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
+        final EsUrlQueueService urlQueueService = ComponentUtil.getComponent(EsUrlQueueService.class);
+        final EsDataService dataService = ComponentUtil.getComponent(EsDataService.class);
 
-            try {
-                // clear queue
-                urlQueueService.clearCache();
-                urlQueueService.delete(sid);
-            } catch (Exception e) {
-                logger.warn("Failed to delete UrlQueue for " + sid, e);
-            }
+        try {
+            // clear url filter
+            urlFilterService.delete(sid);
+        } catch (Exception e) {
+            logger.warn("Failed to delete UrlFilter for " + sid, e);
+        }
 
-            try {
-                // clear
-                dataService.delete(sid);
-            } catch (Exception e) {
-                logger.warn("Failed to delete AccessResult for " + sid, e);
-            }
+        try {
+            // clear queue
+            urlQueueService.clearCache();
+            urlQueueService.delete(sid);
+        } catch (Exception e) {
+            logger.warn("Failed to delete UrlQueue for " + sid, e);
+        }
+
+        try {
+            // clear
+            dataService.delete(sid);
+        } catch (Exception e) {
+            logger.warn("Failed to delete AccessResult for " + sid, e);
         }
     }
 

+ 7 - 1
src/main/java/org/codelibs/fess/util/ParameterUtil.java

@@ -33,6 +33,8 @@ public class ParameterUtil {
 
     protected static final String CLIENT_PREFIX = "client.";
 
+    protected static final String CONFIG_PREFIX = "config.";
+
     protected static final String FIELD_PREFIX = "field.config.";
 
     protected ParameterUtil() {
@@ -72,12 +74,14 @@ public class ParameterUtil {
 
     public static Map<ConfigName, Map<String, String>> createConfigParameterMap(final String configParameters) {
         final Map<ConfigName, Map<String, String>> map = new HashMap<>();
+        final Map<String, String> configConfigMap = new HashMap<>();
         final Map<String, String> clientConfigMap = new HashMap<>();
         final Map<String, String> xpathConfigMap = new HashMap<>();
         final Map<String, String> metaConfigMap = new HashMap<>();
         final Map<String, String> valueConfigMap = new HashMap<>();
         final Map<String, String> scriptConfigMap = new HashMap<>();
         final Map<String, String> fieldConfigMap = new HashMap<>();
+        map.put(ConfigName.CONFIG, configConfigMap);
         map.put(ConfigName.CLIENT, clientConfigMap);
         map.put(ConfigName.XPATH, xpathConfigMap);
         map.put(ConfigName.META, metaConfigMap);
@@ -86,7 +90,9 @@ public class ParameterUtil {
         map.put(ConfigName.FIELD, fieldConfigMap);
         for (final Map.Entry<String, String> entry : ParameterUtil.parse(configParameters).entrySet()) {
             final String key = entry.getKey();
-            if (key.startsWith(CLIENT_PREFIX)) {
+            if (key.startsWith(CONFIG_PREFIX)) {
+                configConfigMap.put(key.substring(CONFIG_PREFIX.length()), entry.getValue());
+            } else if (key.startsWith(CLIENT_PREFIX)) {
                 clientConfigMap.put(key.substring(CLIENT_PREFIX.length()), entry.getValue());
             } else if (key.startsWith(XPATH_PREFIX)) {
                 xpathConfigMap.put(key.substring(XPATH_PREFIX.length()), entry.getValue());