diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java index 2287a3406..742a66cb9 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/FileConfig.java @@ -149,8 +149,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig { final List pathPatterList = new ArrayList<>(); final String[] paths = getIncludedDocPaths().split("[\r\n]"); for (final String u : paths) { - if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) { - pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(u.trim()))); + final String v = systemHelper.normalizeConfigPath(u); + if (StringUtil.isNotBlank(v)) { + pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(v))); } } includedDocPathPatterns = pathPatterList.toArray(new Pattern[pathPatterList.size()]); @@ -164,8 +165,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig { final List pathPatterList = new ArrayList<>(); final String[] paths = getExcludedDocPaths().split("[\r\n]"); for (final String u : paths) { - if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) { - pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(u.trim()))); + final String v = systemHelper.normalizeConfigPath(u); + if (StringUtil.isNotBlank(v)) { + pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(v))); } } excludedDocPathPatterns = pathPatterList.toArray(new Pattern[pathPatterList.size()]); diff --git a/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java b/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java index a6665f77b..9a18c4bed 100644 --- a/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java +++ b/src/main/java/org/codelibs/fess/es/config/exentity/WebConfig.java @@ -34,6 +34,7 @@ import org.codelibs.fess.crawler.client.http.HcHttpClient; import org.codelibs.fess.es.config.bsentity.BsWebConfig; import org.codelibs.fess.es.config.exbhv.LabelTypeBhv; import org.codelibs.fess.es.config.exbhv.WebConfigToLabelBhv; +import org.codelibs.fess.helper.SystemHelper; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; import org.codelibs.fess.util.ParameterUtil; @@ -144,16 +145,16 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { protected synchronized void initDocUrlPattern() { + final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); if (includedDocUrlPatterns == null) { if (StringUtil.isNotBlank(getIncludedDocUrls())) { final List urlPatterList = new ArrayList<>(); final String[] urls = getIncludedDocUrls().split("[\r\n]"); for (final String u : urls) { - final String v = ComponentUtil.getSystemHelper().normalizePath(u); - if (v.isEmpty()) { - break; + final String v = systemHelper.normalizeConfigPath(u); + if (StringUtil.isNotBlank(v)) { + urlPatterList.add(Pattern.compile(v)); } - urlPatterList.add(Pattern.compile(v)); } includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]); } else { @@ -166,11 +167,10 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig { final List urlPatterList = new ArrayList<>(); final String[] urls = getExcludedDocUrls().split("[\r\n]"); for (final String u : urls) { - final String v = ComponentUtil.getSystemHelper().normalizePath(u); - if (v.isEmpty()) { - break; + final String v = systemHelper.normalizeConfigPath(u); + if (StringUtil.isNotBlank(v)) { + urlPatterList.add(Pattern.compile(v)); } - urlPatterList.add(Pattern.compile(v)); } excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]); } else if (includedDocUrlPatterns.length > 0) { diff --git a/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java b/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java index b0f35f8ff..1c1928fbb 100644 --- a/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java +++ b/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java @@ -205,6 +205,7 @@ public class LabelTypeHelper { public LabelTypePattern(final String value, final String includedPaths, final String excludedPaths) { this.value = value; + final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); if (StringUtil.isNotBlank(includedPaths)) { final StringBuilder buf = new StringBuilder(100); char split = 0; @@ -214,7 +215,10 @@ public class LabelTypeHelper { } else { buf.append(split); } - buf.append(ComponentUtil.getSystemHelper().normalizePath(path)); + final String normalizePath = systemHelper.normalizeConfigPath(path); + if (StringUtil.isNotBlank(normalizePath)) { + buf.append(normalizePath); + } } this.includedPaths = Pattern.compile(buf.toString()); } @@ -228,7 +232,10 @@ public class LabelTypeHelper { } else { buf.append(split); } - buf.append(ComponentUtil.getSystemHelper().normalizePath(path)); + final String normalizePath = systemHelper.normalizeConfigPath(path); + if (StringUtil.isNotBlank(normalizePath)) { + buf.append(normalizePath); + } } this.excludedPaths = Pattern.compile(buf.toString()); } diff --git a/src/main/java/org/codelibs/fess/helper/SystemHelper.java b/src/main/java/org/codelibs/fess/helper/SystemHelper.java index 442ec9f33..6767af01e 100644 --- a/src/main/java/org/codelibs/fess/helper/SystemHelper.java +++ b/src/main/java/org/codelibs/fess/helper/SystemHelper.java @@ -57,6 +57,7 @@ import org.codelibs.fess.mylasta.action.FessMessages; import org.codelibs.fess.mylasta.action.FessUserBean; import org.codelibs.fess.mylasta.direction.FessConfig; import org.codelibs.fess.util.ComponentUtil; +import org.codelibs.fess.util.GsaConfigParser; import org.codelibs.fess.util.ResourceUtil; import org.codelibs.fess.validation.FessActionValidator; import org.lastaflute.core.message.supplier.UserMessagesCreator; @@ -75,6 +76,7 @@ import com.google.common.cache.LoadingCache; import com.ibm.icu.util.ULocale; public class SystemHelper { + private static final Logger logger = LoggerFactory.getLogger(SystemHelper.class); protected final Map designJspFileNameMap = new LinkedHashMap<>(); @@ -214,17 +216,34 @@ public class SystemHelper { } } - public String normalizePath(final String path) { + public String normalizeConfigPath(final String path) { - if (StringUtil.isBlank(path) || path.trim().startsWith("#")) { + if (StringUtil.isBlank(path)) { return StringUtils.EMPTY; } - if (path.startsWith("contains:")) { - return (".*" + Pattern.quote(path.trim().substring("contains:".length())) + ".*"); + String p = path.trim(); + if (p.startsWith("#")) { + return StringUtils.EMPTY; } - return path.trim(); + if (p.startsWith(GsaConfigParser.CONTAINS)) { + return ".*" + Pattern.quote(p.substring(GsaConfigParser.CONTAINS.length())) + ".*"; + } + + if (p.startsWith(GsaConfigParser.REGEXP)) { + return p.substring(GsaConfigParser.REGEXP.length()); + } + + if (p.startsWith(GsaConfigParser.REGEXP_CASE)) { + return p.substring(GsaConfigParser.REGEXP_CASE.length()); + } + + if (p.startsWith(GsaConfigParser.REGEXP_IGNORE_CASE)) { + return "(?i)" + p.substring(GsaConfigParser.REGEXP_IGNORE_CASE.length()); + } + + return p; } public String getHelpLink(final String name) { diff --git a/src/main/java/org/codelibs/fess/util/GsaConfigParser.java b/src/main/java/org/codelibs/fess/util/GsaConfigParser.java index f88cb78c2..293212ace 100644 --- a/src/main/java/org/codelibs/fess/util/GsaConfigParser.java +++ b/src/main/java/org/codelibs/fess/util/GsaConfigParser.java @@ -47,11 +47,13 @@ public class GsaConfigParser extends DefaultHandler { private static final Logger logger = LoggerFactory.getLogger(GsaConfigParser.class); - protected static final String REGEXP = "regexp:"; + public static final String REGEXP = "regexp:"; - protected static final String REGEXP_IGNORE_CASE = "regexpIgnoreCase:"; + public static final String REGEXP_CASE = "regexpCase:"; - protected static final String CONTAINS = "contains:"; + public static final String REGEXP_IGNORE_CASE = "regexpIgnoreCase:"; + + public static final String CONTAINS = "contains:"; protected static final String COLLECTIONS = "collections"; @@ -254,6 +256,10 @@ public class GsaConfigParser extends DefaultHandler { final StringBuilder buf = new StringBuilder(100); buf.append("(?i)"); return appendFileterPath(buf, unescape(v)); + } else if (s.startsWith(REGEXP_CASE)) { + final String v = s.substring(REGEXP_CASE.length()); + final StringBuilder buf = new StringBuilder(100); + return appendFileterPath(buf, unescape(v)); } else if (s.startsWith(REGEXP)) { final String v = s.substring(REGEXP.length()); final StringBuilder buf = new StringBuilder(100); diff --git a/src/test/java/org/codelibs/fess/helper/SystemHelperTest.java b/src/test/java/org/codelibs/fess/helper/SystemHelperTest.java index b64008ad4..8c0c4f9df 100644 --- a/src/test/java/org/codelibs/fess/helper/SystemHelperTest.java +++ b/src/test/java/org/codelibs/fess/helper/SystemHelperTest.java @@ -104,11 +104,14 @@ public class SystemHelperTest extends UnitFessTestCase { assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc")); } - public void normalizePaths() { - assertEquals("", systemHelper.normalizePath("")); - assertEquals(".*Q\\www.domain.com/test\\E.*", systemHelper.normalizePath("Contains:www.domain.com/test")); - assertEquals(".*Q\\/test\\E.*", systemHelper.normalizePath("Contains:/test/")); - assertEquals("www.domain.com/test", systemHelper.normalizePath("www.domain.com/test")); - assertEquals(".*domain.com/.*", systemHelper.normalizePath(".*domain.com/.*")); + public void test_normalizeConfigPath() { + assertEquals("", systemHelper.normalizeConfigPath("")); + assertEquals(".*\\Qwww.domain.com/test\\E.*", systemHelper.normalizeConfigPath("contains:www.domain.com/test")); + assertEquals(".*\\Q/test/\\E.*", systemHelper.normalizeConfigPath("contains:/test/")); + assertEquals("www.domain.com/test", systemHelper.normalizeConfigPath("www.domain.com/test")); + assertEquals(".*domain.com/.*", systemHelper.normalizeConfigPath(".*domain.com/.*")); + assertEquals("aaa", systemHelper.normalizeConfigPath("regexp:aaa")); + assertEquals("aaa", systemHelper.normalizeConfigPath("regexpCase:aaa")); + assertEquals("(?i)aaa", systemHelper.normalizeConfigPath("regexpIgnoreCase:aaa")); } }