Browse Source

fix #1740 add regexp: and regexpIgnoreCase:

Shinsuke Sugaya 7 years ago
parent
commit
f0e2046cc5

+ 6 - 4
src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java

@@ -149,8 +149,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
                 final List<Pattern> pathPatterList = new ArrayList<>();
                 final String[] paths = getIncludedDocPaths().split("[\r\n]");
                 for (final String u : paths) {
-                    if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
-                        pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(u.trim())));
+                    final String v = systemHelper.normalizeConfigPath(u);
+                    if (StringUtil.isNotBlank(v)) {
+                        pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(v)));
                     }
                 }
                 includedDocPathPatterns = pathPatterList.toArray(new Pattern[pathPatterList.size()]);
@@ -164,8 +165,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
                 final List<Pattern> pathPatterList = new ArrayList<>();
                 final String[] paths = getExcludedDocPaths().split("[\r\n]");
                 for (final String u : paths) {
-                    if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
-                        pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(u.trim())));
+                    final String v = systemHelper.normalizeConfigPath(u);
+                    if (StringUtil.isNotBlank(v)) {
+                        pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(v)));
                     }
                 }
                 excludedDocPathPatterns = pathPatterList.toArray(new Pattern[pathPatterList.size()]);

+ 8 - 8
src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java

@@ -34,6 +34,7 @@ import org.codelibs.fess.crawler.client.http.HcHttpClient;
 import org.codelibs.fess.es.config.bsentity.BsWebConfig;
 import org.codelibs.fess.es.config.exbhv.LabelTypeBhv;
 import org.codelibs.fess.es.config.exbhv.WebConfigToLabelBhv;
+import org.codelibs.fess.helper.SystemHelper;
 import org.codelibs.fess.mylasta.direction.FessConfig;
 import org.codelibs.fess.util.ComponentUtil;
 import org.codelibs.fess.util.ParameterUtil;
@@ -144,16 +145,16 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
 
     protected synchronized void initDocUrlPattern() {
 
+        final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
         if (includedDocUrlPatterns == null) {
             if (StringUtil.isNotBlank(getIncludedDocUrls())) {
                 final List<Pattern> urlPatterList = new ArrayList<>();
                 final String[] urls = getIncludedDocUrls().split("[\r\n]");
                 for (final String u : urls) {
-                    final String v = ComponentUtil.getSystemHelper().normalizePath(u);
-                    if (v.isEmpty()) {
-                        break;
+                    final String v = systemHelper.normalizeConfigPath(u);
+                    if (StringUtil.isNotBlank(v)) {
+                        urlPatterList.add(Pattern.compile(v));
                     }
-                    urlPatterList.add(Pattern.compile(v));
                 }
                 includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
             } else {
@@ -166,11 +167,10 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
                 final List<Pattern> urlPatterList = new ArrayList<>();
                 final String[] urls = getExcludedDocUrls().split("[\r\n]");
                 for (final String u : urls) {
-                    final String v = ComponentUtil.getSystemHelper().normalizePath(u);
-                    if (v.isEmpty()) {
-                        break;
+                    final String v = systemHelper.normalizeConfigPath(u);
+                    if (StringUtil.isNotBlank(v)) {
+                        urlPatterList.add(Pattern.compile(v));
                     }
-                    urlPatterList.add(Pattern.compile(v));
                 }
                 excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
             } else if (includedDocUrlPatterns.length > 0) {

+ 9 - 2
src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java

@@ -205,6 +205,7 @@ public class LabelTypeHelper {
         public LabelTypePattern(final String value, final String includedPaths, final String excludedPaths) {
             this.value = value;
 
+            final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
             if (StringUtil.isNotBlank(includedPaths)) {
                 final StringBuilder buf = new StringBuilder(100);
                 char split = 0;
@@ -214,7 +215,10 @@ public class LabelTypeHelper {
                     } else {
                         buf.append(split);
                     }
-                    buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
+                    final String normalizePath = systemHelper.normalizeConfigPath(path);
+                    if (StringUtil.isNotBlank(normalizePath)) {
+                        buf.append(normalizePath);
+                    }
                 }
                 this.includedPaths = Pattern.compile(buf.toString());
             }
@@ -228,7 +232,10 @@ public class LabelTypeHelper {
                     } else {
                         buf.append(split);
                     }
-                    buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
+                    final String normalizePath = systemHelper.normalizeConfigPath(path);
+                    if (StringUtil.isNotBlank(normalizePath)) {
+                        buf.append(normalizePath);
+                    }
                 }
                 this.excludedPaths = Pattern.compile(buf.toString());
             }

+ 24 - 5
src/main/java/org/codelibs/fess/helper/SystemHelper.java

@@ -57,6 +57,7 @@ import org.codelibs.fess.mylasta.action.FessMessages;
 import org.codelibs.fess.mylasta.action.FessUserBean;
 import org.codelibs.fess.mylasta.direction.FessConfig;
 import org.codelibs.fess.util.ComponentUtil;
+import org.codelibs.fess.util.GsaConfigParser;
 import org.codelibs.fess.util.ResourceUtil;
 import org.codelibs.fess.validation.FessActionValidator;
 import org.lastaflute.core.message.supplier.UserMessagesCreator;
@@ -75,6 +76,7 @@ import com.google.common.cache.LoadingCache;
 import com.ibm.icu.util.ULocale;
 
 public class SystemHelper {
+
     private static final Logger logger = LoggerFactory.getLogger(SystemHelper.class);
 
     protected final Map<String, String> designJspFileNameMap = new LinkedHashMap<>();
@@ -214,17 +216,34 @@ public class SystemHelper {
         }
     }
 
-    public String normalizePath(final String path) {
+    public String normalizeConfigPath(final String path) {
 
-        if (StringUtil.isBlank(path) || path.trim().startsWith("#")) {
+        if (StringUtil.isBlank(path)) {
             return StringUtils.EMPTY;
         }
 
-        if (path.startsWith("contains:")) {
-            return (".*" + Pattern.quote(path.trim().substring("contains:".length())) + ".*");
+        String p = path.trim();
+        if (p.startsWith("#")) {
+            return StringUtils.EMPTY;
+        }
+
+        if (p.startsWith(GsaConfigParser.CONTAINS)) {
+            return ".*" + Pattern.quote(p.substring(GsaConfigParser.CONTAINS.length())) + ".*";
+        }
+
+        if (p.startsWith(GsaConfigParser.REGEXP)) {
+            return p.substring(GsaConfigParser.REGEXP.length());
+        }
+
+        if (p.startsWith(GsaConfigParser.REGEXP_CASE)) {
+            return p.substring(GsaConfigParser.REGEXP_CASE.length());
+        }
+
+        if (p.startsWith(GsaConfigParser.REGEXP_IGNORE_CASE)) {
+            return "(?i)" + p.substring(GsaConfigParser.REGEXP_IGNORE_CASE.length());
         }
 
-        return path.trim();
+        return p;
     }
 
     public String getHelpLink(final String name) {

+ 9 - 3
src/main/java/org/codelibs/fess/util/GsaConfigParser.java

@@ -47,11 +47,13 @@ public class GsaConfigParser extends DefaultHandler {
 
     private static final Logger logger = LoggerFactory.getLogger(GsaConfigParser.class);
 
-    protected static final String REGEXP = "regexp:";
+    public static final String REGEXP = "regexp:";
 
-    protected static final String REGEXP_IGNORE_CASE = "regexpIgnoreCase:";
+    public static final String REGEXP_CASE = "regexpCase:";
 
-    protected static final String CONTAINS = "contains:";
+    public static final String REGEXP_IGNORE_CASE = "regexpIgnoreCase:";
+
+    public static final String CONTAINS = "contains:";
 
     protected static final String COLLECTIONS = "collections";
 
@@ -254,6 +256,10 @@ public class GsaConfigParser extends DefaultHandler {
             final StringBuilder buf = new StringBuilder(100);
             buf.append("(?i)");
             return appendFileterPath(buf, unescape(v));
+        } else if (s.startsWith(REGEXP_CASE)) {
+            final String v = s.substring(REGEXP_CASE.length());
+            final StringBuilder buf = new StringBuilder(100);
+            return appendFileterPath(buf, unescape(v));
         } else if (s.startsWith(REGEXP)) {
             final String v = s.substring(REGEXP.length());
             final StringBuilder buf = new StringBuilder(100);

+ 9 - 6
src/test/java/org/codelibs/fess/helper/SystemHelperTest.java

@@ -104,11 +104,14 @@ public class SystemHelperTest extends UnitFessTestCase {
         assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc"));
     }
 
-    public void normalizePaths() {
-        assertEquals("", systemHelper.normalizePath(""));
-        assertEquals(".*Q\\www.domain.com/test\\E.*", systemHelper.normalizePath("Contains:www.domain.com/test"));
-        assertEquals(".*Q\\/test\\E.*", systemHelper.normalizePath("Contains:/test/"));
-        assertEquals("www.domain.com/test", systemHelper.normalizePath("www.domain.com/test"));
-        assertEquals(".*domain.com/.*", systemHelper.normalizePath(".*domain.com/.*"));
+    public void test_normalizeConfigPath() {
+        assertEquals("", systemHelper.normalizeConfigPath(""));
+        assertEquals(".*\\Qwww.domain.com/test\\E.*", systemHelper.normalizeConfigPath("contains:www.domain.com/test"));
+        assertEquals(".*\\Q/test/\\E.*", systemHelper.normalizeConfigPath("contains:/test/"));
+        assertEquals("www.domain.com/test", systemHelper.normalizeConfigPath("www.domain.com/test"));
+        assertEquals(".*domain.com/.*", systemHelper.normalizeConfigPath(".*domain.com/.*"));
+        assertEquals("aaa", systemHelper.normalizeConfigPath("regexp:aaa"));
+        assertEquals("aaa", systemHelper.normalizeConfigPath("regexpCase:aaa"));
+        assertEquals("(?i)aaa", systemHelper.normalizeConfigPath("regexpIgnoreCase:aaa"));
     }
 }