fix #1740 add regexp: and regexpIgnoreCase:

This commit is contained in:
Shinsuke Sugaya 2018-07-05 08:56:28 +09:00
parent 1df3c77742
commit f0e2046cc5
6 changed files with 65 additions and 28 deletions

View file

@ -149,8 +149,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
final List<Pattern> pathPatterList = new ArrayList<>();
final String[] paths = getIncludedDocPaths().split("[\r\n]");
for (final String u : paths) {
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(u.trim())));
final String v = systemHelper.normalizeConfigPath(u);
if (StringUtil.isNotBlank(v)) {
pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(v)));
}
}
includedDocPathPatterns = pathPatterList.toArray(new Pattern[pathPatterList.size()]);
@ -164,8 +165,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
final List<Pattern> pathPatterList = new ArrayList<>();
final String[] paths = getExcludedDocPaths().split("[\r\n]");
for (final String u : paths) {
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(u.trim())));
final String v = systemHelper.normalizeConfigPath(u);
if (StringUtil.isNotBlank(v)) {
pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(v)));
}
}
excludedDocPathPatterns = pathPatterList.toArray(new Pattern[pathPatterList.size()]);

View file

@ -34,6 +34,7 @@ import org.codelibs.fess.crawler.client.http.HcHttpClient;
import org.codelibs.fess.es.config.bsentity.BsWebConfig;
import org.codelibs.fess.es.config.exbhv.LabelTypeBhv;
import org.codelibs.fess.es.config.exbhv.WebConfigToLabelBhv;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.ParameterUtil;
@ -144,16 +145,16 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
protected synchronized void initDocUrlPattern() {
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
if (includedDocUrlPatterns == null) {
if (StringUtil.isNotBlank(getIncludedDocUrls())) {
final List<Pattern> urlPatterList = new ArrayList<>();
final String[] urls = getIncludedDocUrls().split("[\r\n]");
for (final String u : urls) {
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
if (v.isEmpty()) {
break;
final String v = systemHelper.normalizeConfigPath(u);
if (StringUtil.isNotBlank(v)) {
urlPatterList.add(Pattern.compile(v));
}
urlPatterList.add(Pattern.compile(v));
}
includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
} else {
@ -166,11 +167,10 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
final List<Pattern> urlPatterList = new ArrayList<>();
final String[] urls = getExcludedDocUrls().split("[\r\n]");
for (final String u : urls) {
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
if (v.isEmpty()) {
break;
final String v = systemHelper.normalizeConfigPath(u);
if (StringUtil.isNotBlank(v)) {
urlPatterList.add(Pattern.compile(v));
}
urlPatterList.add(Pattern.compile(v));
}
excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
} else if (includedDocUrlPatterns.length > 0) {

View file

@ -205,6 +205,7 @@ public class LabelTypeHelper {
public LabelTypePattern(final String value, final String includedPaths, final String excludedPaths) {
this.value = value;
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
if (StringUtil.isNotBlank(includedPaths)) {
final StringBuilder buf = new StringBuilder(100);
char split = 0;
@ -214,7 +215,10 @@ public class LabelTypeHelper {
} else {
buf.append(split);
}
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
final String normalizePath = systemHelper.normalizeConfigPath(path);
if (StringUtil.isNotBlank(normalizePath)) {
buf.append(normalizePath);
}
}
this.includedPaths = Pattern.compile(buf.toString());
}
@ -228,7 +232,10 @@ public class LabelTypeHelper {
} else {
buf.append(split);
}
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
final String normalizePath = systemHelper.normalizeConfigPath(path);
if (StringUtil.isNotBlank(normalizePath)) {
buf.append(normalizePath);
}
}
this.excludedPaths = Pattern.compile(buf.toString());
}

View file

@ -57,6 +57,7 @@ import org.codelibs.fess.mylasta.action.FessMessages;
import org.codelibs.fess.mylasta.action.FessUserBean;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.GsaConfigParser;
import org.codelibs.fess.util.ResourceUtil;
import org.codelibs.fess.validation.FessActionValidator;
import org.lastaflute.core.message.supplier.UserMessagesCreator;
@ -75,6 +76,7 @@ import com.google.common.cache.LoadingCache;
import com.ibm.icu.util.ULocale;
public class SystemHelper {
private static final Logger logger = LoggerFactory.getLogger(SystemHelper.class);
protected final Map<String, String> designJspFileNameMap = new LinkedHashMap<>();
@ -214,17 +216,34 @@ public class SystemHelper {
}
}
public String normalizePath(final String path) {
public String normalizeConfigPath(final String path) {
if (StringUtil.isBlank(path) || path.trim().startsWith("#")) {
if (StringUtil.isBlank(path)) {
return StringUtils.EMPTY;
}
if (path.startsWith("contains:")) {
return (".*" + Pattern.quote(path.trim().substring("contains:".length())) + ".*");
String p = path.trim();
if (p.startsWith("#")) {
return StringUtils.EMPTY;
}
return path.trim();
if (p.startsWith(GsaConfigParser.CONTAINS)) {
return ".*" + Pattern.quote(p.substring(GsaConfigParser.CONTAINS.length())) + ".*";
}
if (p.startsWith(GsaConfigParser.REGEXP)) {
return p.substring(GsaConfigParser.REGEXP.length());
}
if (p.startsWith(GsaConfigParser.REGEXP_CASE)) {
return p.substring(GsaConfigParser.REGEXP_CASE.length());
}
if (p.startsWith(GsaConfigParser.REGEXP_IGNORE_CASE)) {
return "(?i)" + p.substring(GsaConfigParser.REGEXP_IGNORE_CASE.length());
}
return p;
}
public String getHelpLink(final String name) {

View file

@ -47,11 +47,13 @@ public class GsaConfigParser extends DefaultHandler {
private static final Logger logger = LoggerFactory.getLogger(GsaConfigParser.class);
protected static final String REGEXP = "regexp:";
public static final String REGEXP = "regexp:";
protected static final String REGEXP_IGNORE_CASE = "regexpIgnoreCase:";
public static final String REGEXP_CASE = "regexpCase:";
protected static final String CONTAINS = "contains:";
public static final String REGEXP_IGNORE_CASE = "regexpIgnoreCase:";
public static final String CONTAINS = "contains:";
protected static final String COLLECTIONS = "collections";
@ -254,6 +256,10 @@ public class GsaConfigParser extends DefaultHandler {
final StringBuilder buf = new StringBuilder(100);
buf.append("(?i)");
return appendFileterPath(buf, unescape(v));
} else if (s.startsWith(REGEXP_CASE)) {
final String v = s.substring(REGEXP_CASE.length());
final StringBuilder buf = new StringBuilder(100);
return appendFileterPath(buf, unescape(v));
} else if (s.startsWith(REGEXP)) {
final String v = s.substring(REGEXP.length());
final StringBuilder buf = new StringBuilder(100);

View file

@ -104,11 +104,14 @@ public class SystemHelperTest extends UnitFessTestCase {
assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc"));
}
public void normalizePaths() {
assertEquals("", systemHelper.normalizePath(""));
assertEquals(".*Q\\www.domain.com/test\\E.*", systemHelper.normalizePath("Contains:www.domain.com/test"));
assertEquals(".*Q\\/test\\E.*", systemHelper.normalizePath("Contains:/test/"));
assertEquals("www.domain.com/test", systemHelper.normalizePath("www.domain.com/test"));
assertEquals(".*domain.com/.*", systemHelper.normalizePath(".*domain.com/.*"));
public void test_normalizeConfigPath() {
assertEquals("", systemHelper.normalizeConfigPath(""));
assertEquals(".*\\Qwww.domain.com/test\\E.*", systemHelper.normalizeConfigPath("contains:www.domain.com/test"));
assertEquals(".*\\Q/test/\\E.*", systemHelper.normalizeConfigPath("contains:/test/"));
assertEquals("www.domain.com/test", systemHelper.normalizeConfigPath("www.domain.com/test"));
assertEquals(".*domain.com/.*", systemHelper.normalizeConfigPath(".*domain.com/.*"));
assertEquals("aaa", systemHelper.normalizeConfigPath("regexp:aaa"));
assertEquals("aaa", systemHelper.normalizeConfigPath("regexpCase:aaa"));
assertEquals("(?i)aaa", systemHelper.normalizeConfigPath("regexpIgnoreCase:aaa"));
}
}