Browse Source

Enhancement/feature to allow the use of "contains:" as a alternative to using RegEx. This is useful for when you have very many URLs to maintain and the paths are predefined. In addition, not everyone is familiar with RegEx and non programmers should be able to maitain the list of URLs to include/exclude and index. Lastly, it makes porting from GSA a lot simplier where you can just copy and paste collections to labels.

Eyad 7 years ago
parent
commit
2ba4ee85b5

+ 8 - 4
src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java

@@ -149,9 +149,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
                 final List<Pattern> urlPatterList = new ArrayList<>();
                 final List<Pattern> urlPatterList = new ArrayList<>();
                 final String[] urls = getIncludedDocUrls().split("[\r\n]");
                 final String[] urls = getIncludedDocUrls().split("[\r\n]");
                 for (final String u : urls) {
                 for (final String u : urls) {
-                    if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
-                        urlPatterList.add(Pattern.compile(u.trim()));
+                    final String v = ComponentUtil.getSystemHelper().normalizePath(u);
+                    if (v.isEmpty()) {
+                        break;
                     }
                     }
+                    urlPatterList.add(Pattern.compile(v));
                 }
                 }
                 includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
                 includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
             } else {
             } else {
@@ -164,9 +166,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
                 final List<Pattern> urlPatterList = new ArrayList<>();
                 final List<Pattern> urlPatterList = new ArrayList<>();
                 final String[] urls = getExcludedDocUrls().split("[\r\n]");
                 final String[] urls = getExcludedDocUrls().split("[\r\n]");
                 for (final String u : urls) {
                 for (final String u : urls) {
-                    if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
-                        urlPatterList.add(Pattern.compile(u.trim()));
+                    final String v = ComponentUtil.getSystemHelper().normalizePath(u);
+                    if (v.isEmpty()) {
+                        break;
                     }
                     }
+                    urlPatterList.add(Pattern.compile(v));
                 }
                 }
                 excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
                 excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
             } else if (includedDocUrlPatterns.length > 0) {
             } else if (includedDocUrlPatterns.length > 0) {

+ 1 - 1
src/main/java/org/codelibs/fess/exec/Crawler.java

@@ -126,7 +126,7 @@ public class Crawler {
         public String expires;
         public String expires;
 
 
         protected Options() {
         protected Options() {
-            // noghing
+            // nothing
         }
         }
 
 
         protected List<String> getWebConfigIdList() {
         protected List<String> getWebConfigIdList() {

+ 13 - 3
src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java

@@ -214,7 +214,7 @@ public class LabelTypeHelper {
                     } else {
                     } else {
                         buf.append(split);
                         buf.append(split);
                     }
                     }
-                    buf.append(path.trim());
+                    buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
                 }
                 }
                 this.includedPaths = Pattern.compile(buf.toString());
                 this.includedPaths = Pattern.compile(buf.toString());
             }
             }
@@ -228,7 +228,7 @@ public class LabelTypeHelper {
                     } else {
                     } else {
                         buf.append(split);
                         buf.append(split);
                     }
                     }
-                    buf.append(path.trim());
+                    buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
                 }
                 }
                 this.excludedPaths = Pattern.compile(buf.toString());
                 this.excludedPaths = Pattern.compile(buf.toString());
             }
             }
@@ -242,13 +242,23 @@ public class LabelTypeHelper {
             if (includedPaths != null) {
             if (includedPaths != null) {
                 if (includedPaths.matcher(path).matches()) {
                 if (includedPaths.matcher(path).matches()) {
                     if (excludedPaths != null && excludedPaths.matcher(path).matches()) {
                     if (excludedPaths != null && excludedPaths.matcher(path).matches()) {
+                        if (logger.isDebugEnabled()) {
+                            logger.debug("Path " + path + " matched against the excludes paths expression " + excludedPaths.toString());
+                        }
                         return false;
                         return false;
                     }
                     }
                     return true;
                     return true;
                 }
                 }
+                if (logger.isDebugEnabled()) {
+                    logger.debug("Path " + path + " wasn't matched against the include paths expression " + includedPaths.toString());
+                }
                 return false;
                 return false;
             } else {
             } else {
-                return !excludedPaths.matcher(path).matches();
+                boolean match = !excludedPaths.matcher(path).matches();
+                if (!match && logger.isDebugEnabled()) {
+                    logger.debug("Path " + path + " matched against the excludes paths expression " + includedPaths.toString());
+                }
+                return match;
             }
             }
         }
         }
 
 

+ 14 - 0
src/main/java/org/codelibs/fess/helper/SystemHelper.java

@@ -40,6 +40,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Pattern;
 
 
 import javax.annotation.PostConstruct;
 import javax.annotation.PostConstruct;
 import javax.annotation.PreDestroy;
 import javax.annotation.PreDestroy;
@@ -213,6 +214,19 @@ public class SystemHelper {
         }
         }
     }
     }
 
 
+    public String normalizePath(final String path) {
+
+        if (StringUtil.isBlank(path) || path.trim().startsWith("#")) {
+            return StringUtils.EMPTY;
+        }
+
+        if (path.startsWith("contains:")) {
+            return (".*" + Pattern.quote(path.trim().substring("contains:".length())) + ".*");
+        }
+
+        return path.trim();
+    }
+
     public String getHelpLink(final String name) {
     public String getHelpLink(final String name) {
         final String url = ComponentUtil.getFessConfig().getOnlineHelpBaseLink() + name + "-guide.html";
         final String url = ComponentUtil.getFessConfig().getOnlineHelpBaseLink() + name + "-guide.html";
         return getHelpUrl(url);
         return getHelpUrl(url);

+ 7 - 0
src/test/java/org/codelibs/fess/helper/SystemHelperTest.java

@@ -104,4 +104,11 @@ public class SystemHelperTest extends UnitFessTestCase {
         assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc"));
         assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc"));
     }
     }
 
 
+    public void normalizePaths() {
+        assertEquals("", systemHelper.normalizePath(""));
+        assertEquals(".*Q\\www.domain.com/test\\E.*", systemHelper.normalizePath("Contains:www.domain.com/test"));
+        assertEquals(".*Q\\/test\\E.*", systemHelper.normalizePath("Contains:/test/"));
+        assertEquals("www.domain.com/test", systemHelper.normalizePath("www.domain.com/test"));
+        assertEquals(".*domain.com/.*", systemHelper.normalizePath(".*domain.com/.*"));
+    }
 }
 }