Browse Source

fix #2161 remove duplicate settings

Shinsuke Sugaya 6 years ago
parent
commit
b9194e5f14
1 changed files with 86 additions and 102 deletions
  1. 86 102
      src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java

+ 86 - 102
src/main/java/org/codelibs/fess/helper/WebFsIndexHelper.java

@@ -15,10 +15,13 @@
  */
 package org.codelibs.fess.helper;
 
+import static org.codelibs.core.stream.StreamUtil.split;
+
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.regex.Pattern;
 
 import org.codelibs.core.lang.StringUtil;
@@ -144,57 +147,47 @@ public class WebFsIndexHelper {
             }
 
             // set urls
-            final String[] urls = urlsStr.split("[\r\n]");
-            for (final String u : urls) {
-                if (StringUtil.isNotBlank(u)) {
-                    final String urlValue = u.trim();
-                    if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(u)) {
-                        crawler.addUrl(urlValue);
-                        if (logger.isInfoEnabled()) {
-                            logger.info("Target URL: " + urlValue);
-                        }
+            split(urlsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
+                if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(urlValue)) {
+                    crawler.addUrl(urlValue);
+                    if (logger.isInfoEnabled()) {
+                        logger.info("Target URL: " + urlValue);
                     }
                 }
-            }
+            }));
 
             // set included urls
-            final String[] includedUrls = includedUrlsStr.split("[\r\n]");
-            for (final String u : includedUrls) {
-                if (StringUtil.isNotBlank(u)) {
-                    final String urlValue = u.trim();
-                    if (!urlValue.startsWith("#")) {
-                        crawler.addIncludeFilter(urlValue);
-                        if (logger.isInfoEnabled()) {
-                            logger.info("Included URL: " + urlValue);
+            split(includedUrlsStr, "[\r\n]").of(
+                    stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
+                        if (!urlValue.startsWith("#")) {
+                            crawler.addIncludeFilter(urlValue);
+                            if (logger.isInfoEnabled()) {
+                                logger.info("Included URL: " + urlValue);
+                            }
                         }
-                    }
-                }
-            }
+                    }));
 
             // set excluded urls
-            final String[] excludedUrls = excludedUrlsStr.split("[\r\n]");
-            for (final String u : excludedUrls) {
-                if (StringUtil.isNotBlank(u)) {
-                    final String urlValue = u.trim();
-                    if (!urlValue.startsWith("#")) {
-                        crawler.addExcludeFilter(urlValue);
-                        if (logger.isInfoEnabled()) {
-                            logger.info("Excluded URL: " + urlValue);
+            split(excludedUrlsStr, "[\r\n]").of(
+                    stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
+                        if (!urlValue.startsWith("#")) {
+                            crawler.addExcludeFilter(urlValue);
+                            if (logger.isInfoEnabled()) {
+                                logger.info("Excluded URL: " + urlValue);
+                            }
                         }
-                    }
-                }
-            }
+                    }));
 
             // failure url
             final List<String> excludedUrlList = ComponentUtil.getCrawlingConfigHelper().getExcludedUrlList(webConfig.getConfigId());
-            for (final String u : excludedUrlList) {
-                if (StringUtil.isNotBlank(u)) {
-                    final String urlValue = Pattern.quote(u.trim());
+            if (excludedUrlList != null) {
+                excludedUrlList.stream().filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(u -> {
+                    final String urlValue = Pattern.quote(u);
                     crawler.addExcludeFilter(urlValue);
                     if (logger.isInfoEnabled()) {
                         logger.info("Excluded URL from failures: " + urlValue);
                     }
-                }
+                });
             }
 
             if (logger.isDebugEnabled()) {
@@ -259,86 +252,77 @@ public class WebFsIndexHelper {
             }
 
             // set paths
-            final String[] paths = pathsStr.split("[\r\n]");
-            for (String u : paths) {
-                if (StringUtil.isNotBlank(u)) {
-                    u = u.trim();
-                    if (!u.startsWith("#")) {
-                        if (!fessConfig.isValidCrawlerFileProtocol(u)) {
-                            if (u.startsWith("/")) {
-                                u = "file:" + u;
-                            } else {
-                                u = "file:/" + u;
-                            }
-                        }
-                        crawler.addUrl(u);
-                        if (logger.isInfoEnabled()) {
-                            logger.info("Target Path: " + u);
+            split(pathsStr, "[\r\n]").of(stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(urlValue -> {
+                if (!urlValue.startsWith("#")) {
+                    final String u;
+                    if (!fessConfig.isValidCrawlerFileProtocol(urlValue)) {
+                        if (urlValue.startsWith("/")) {
+                            u = "file:" + urlValue;
+                        } else {
+                            u = "file:/" + urlValue;
                         }
+                    } else {
+                        u = urlValue;
+                    }
+                    crawler.addUrl(u);
+                    if (logger.isInfoEnabled()) {
+                        logger.info("Target Path: " + u);
                     }
                 }
-            }
+            }));
 
             // set included paths
-            boolean urlEncodeDisabled = false;
-            final String[] includedPaths = includedPathsStr.split("[\r\n]");
-            for (final String u : includedPaths) {
-                if (StringUtil.isNotBlank(u)) {
-                    final String line = u.trim();
-                    if (!line.startsWith("#")) {
-                        final String urlValue;
-                        if (urlEncodeDisabled) {
-                            urlValue = line;
-                            urlEncodeDisabled = false;
-                        } else {
-                            urlValue = systemHelper.encodeUrlFilter(line);
-                        }
-                        crawler.addIncludeFilter(urlValue);
-                        if (logger.isInfoEnabled()) {
-                            logger.info("Included Path: " + urlValue);
+            final AtomicBoolean urlEncodeDisabled = new AtomicBoolean(false);
+            split(includedPathsStr, "[\r\n]").of(
+                    stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(line -> {
+                        if (!line.startsWith("#")) {
+                            final String urlValue;
+                            if (urlEncodeDisabled.get()) {
+                                urlValue = line;
+                                urlEncodeDisabled.set(false);
+                            } else {
+                                urlValue = systemHelper.encodeUrlFilter(line);
+                            }
+                            crawler.addIncludeFilter(urlValue);
+                            if (logger.isInfoEnabled()) {
+                                logger.info("Included Path: " + urlValue);
+                            }
+                        } else if (line.startsWith("#DISABLE_URL_ENCODE")) {
+                            urlEncodeDisabled.set(true);
                         }
-                    } else if (line.startsWith("#DISABLE_URL_ENCODE")) {
-                        urlEncodeDisabled = true;
-                    }
-                }
-            }
+                    }));
 
             // set excluded paths
-            urlEncodeDisabled = false;
-            final String[] excludedPaths = excludedPathsStr.split("[\r\n]");
-            for (final String u : excludedPaths) {
-                if (StringUtil.isNotBlank(u)) {
-                    final String line = u.trim();
-                    if (!line.startsWith("#")) {
-                        final String urlValue;
-                        if (urlEncodeDisabled) {
-                            urlValue = line;
-                            urlEncodeDisabled = false;
-                        } else {
-                            urlValue = systemHelper.encodeUrlFilter(line);
-                        }
-                        crawler.addExcludeFilter(urlValue);
-                        if (logger.isInfoEnabled()) {
-                            logger.info("Excluded Path: " + urlValue);
+            urlEncodeDisabled.set(false);
+            split(excludedPathsStr, "[\r\n]").of(
+                    stream -> stream.filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(line -> {
+                        if (!line.startsWith("#")) {
+                            final String urlValue;
+                            if (urlEncodeDisabled.get()) {
+                                urlValue = line;
+                                urlEncodeDisabled.set(false);
+                            } else {
+                                urlValue = systemHelper.encodeUrlFilter(line);
+                            }
+                            crawler.addExcludeFilter(urlValue);
+                            if (logger.isInfoEnabled()) {
+                                logger.info("Excluded Path: " + urlValue);
+                            }
+                        } else if (line.startsWith("#DISABLE_URL_ENCODE")) {
+                            urlEncodeDisabled.set(true);
                         }
-                    } else if (line.startsWith("#DISABLE_URL_ENCODE")) {
-                        urlEncodeDisabled = true;
-                    }
-                }
-            }
+                    }));
 
             // failure url
             final List<String> excludedUrlList = ComponentUtil.getCrawlingConfigHelper().getExcludedUrlList(fileConfig.getConfigId());
             if (excludedUrlList != null) {
-                for (final String u : excludedUrlList) {
-                    if (StringUtil.isNotBlank(u)) {
-                        final String urlValue = Pattern.quote(u.trim());
-                        crawler.addExcludeFilter(urlValue);
-                        if (logger.isInfoEnabled()) {
-                            logger.info("Excluded Path from failures: " + urlValue);
-                        }
+                excludedUrlList.stream().filter(StringUtil::isNotBlank).map(String::trim).distinct().forEach(u -> {
+                    final String urlValue = Pattern.quote(u);
+                    crawler.addExcludeFilter(urlValue);
+                    if (logger.isInfoEnabled()) {
+                        logger.info("Excluded Path from failures: " + urlValue);
                     }
-                }
+                });
             }
 
             if (logger.isDebugEnabled()) {