fix #1740 add regexp: and regexpIgnoreCase:
This commit is contained in:
parent
1df3c77742
commit
f0e2046cc5
6 changed files with 65 additions and 28 deletions
|
@ -149,8 +149,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
|
|||
final List<Pattern> pathPatterList = new ArrayList<>();
|
||||
final String[] paths = getIncludedDocPaths().split("[\r\n]");
|
||||
for (final String u : paths) {
|
||||
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
|
||||
pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(u.trim())));
|
||||
final String v = systemHelper.normalizeConfigPath(u);
|
||||
if (StringUtil.isNotBlank(v)) {
|
||||
pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(v)));
|
||||
}
|
||||
}
|
||||
includedDocPathPatterns = pathPatterList.toArray(new Pattern[pathPatterList.size()]);
|
||||
|
@ -164,8 +165,9 @@ public class FileConfig extends BsFileConfig implements CrawlingConfig {
|
|||
final List<Pattern> pathPatterList = new ArrayList<>();
|
||||
final String[] paths = getExcludedDocPaths().split("[\r\n]");
|
||||
for (final String u : paths) {
|
||||
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
|
||||
pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(u.trim())));
|
||||
final String v = systemHelper.normalizeConfigPath(u);
|
||||
if (StringUtil.isNotBlank(v)) {
|
||||
pathPatterList.add(Pattern.compile(systemHelper.encodeUrlFilter(v)));
|
||||
}
|
||||
}
|
||||
excludedDocPathPatterns = pathPatterList.toArray(new Pattern[pathPatterList.size()]);
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.codelibs.fess.crawler.client.http.HcHttpClient;
|
|||
import org.codelibs.fess.es.config.bsentity.BsWebConfig;
|
||||
import org.codelibs.fess.es.config.exbhv.LabelTypeBhv;
|
||||
import org.codelibs.fess.es.config.exbhv.WebConfigToLabelBhv;
|
||||
import org.codelibs.fess.helper.SystemHelper;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.fess.util.ParameterUtil;
|
||||
|
@ -144,16 +145,16 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
|
||||
protected synchronized void initDocUrlPattern() {
|
||||
|
||||
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
|
||||
if (includedDocUrlPatterns == null) {
|
||||
if (StringUtil.isNotBlank(getIncludedDocUrls())) {
|
||||
final List<Pattern> urlPatterList = new ArrayList<>();
|
||||
final String[] urls = getIncludedDocUrls().split("[\r\n]");
|
||||
for (final String u : urls) {
|
||||
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
|
||||
if (v.isEmpty()) {
|
||||
break;
|
||||
final String v = systemHelper.normalizeConfigPath(u);
|
||||
if (StringUtil.isNotBlank(v)) {
|
||||
urlPatterList.add(Pattern.compile(v));
|
||||
}
|
||||
urlPatterList.add(Pattern.compile(v));
|
||||
}
|
||||
includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
|
||||
} else {
|
||||
|
@ -166,11 +167,10 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
final List<Pattern> urlPatterList = new ArrayList<>();
|
||||
final String[] urls = getExcludedDocUrls().split("[\r\n]");
|
||||
for (final String u : urls) {
|
||||
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
|
||||
if (v.isEmpty()) {
|
||||
break;
|
||||
final String v = systemHelper.normalizeConfigPath(u);
|
||||
if (StringUtil.isNotBlank(v)) {
|
||||
urlPatterList.add(Pattern.compile(v));
|
||||
}
|
||||
urlPatterList.add(Pattern.compile(v));
|
||||
}
|
||||
excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
|
||||
} else if (includedDocUrlPatterns.length > 0) {
|
||||
|
|
|
@ -205,6 +205,7 @@ public class LabelTypeHelper {
|
|||
public LabelTypePattern(final String value, final String includedPaths, final String excludedPaths) {
|
||||
this.value = value;
|
||||
|
||||
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
|
||||
if (StringUtil.isNotBlank(includedPaths)) {
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
char split = 0;
|
||||
|
@ -214,7 +215,10 @@ public class LabelTypeHelper {
|
|||
} else {
|
||||
buf.append(split);
|
||||
}
|
||||
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
|
||||
final String normalizePath = systemHelper.normalizeConfigPath(path);
|
||||
if (StringUtil.isNotBlank(normalizePath)) {
|
||||
buf.append(normalizePath);
|
||||
}
|
||||
}
|
||||
this.includedPaths = Pattern.compile(buf.toString());
|
||||
}
|
||||
|
@ -228,7 +232,10 @@ public class LabelTypeHelper {
|
|||
} else {
|
||||
buf.append(split);
|
||||
}
|
||||
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
|
||||
final String normalizePath = systemHelper.normalizeConfigPath(path);
|
||||
if (StringUtil.isNotBlank(normalizePath)) {
|
||||
buf.append(normalizePath);
|
||||
}
|
||||
}
|
||||
this.excludedPaths = Pattern.compile(buf.toString());
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ import org.codelibs.fess.mylasta.action.FessMessages;
|
|||
import org.codelibs.fess.mylasta.action.FessUserBean;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.fess.util.GsaConfigParser;
|
||||
import org.codelibs.fess.util.ResourceUtil;
|
||||
import org.codelibs.fess.validation.FessActionValidator;
|
||||
import org.lastaflute.core.message.supplier.UserMessagesCreator;
|
||||
|
@ -75,6 +76,7 @@ import com.google.common.cache.LoadingCache;
|
|||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
public class SystemHelper {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(SystemHelper.class);
|
||||
|
||||
protected final Map<String, String> designJspFileNameMap = new LinkedHashMap<>();
|
||||
|
@ -214,17 +216,34 @@ public class SystemHelper {
|
|||
}
|
||||
}
|
||||
|
||||
public String normalizePath(final String path) {
|
||||
public String normalizeConfigPath(final String path) {
|
||||
|
||||
if (StringUtil.isBlank(path) || path.trim().startsWith("#")) {
|
||||
if (StringUtil.isBlank(path)) {
|
||||
return StringUtils.EMPTY;
|
||||
}
|
||||
|
||||
if (path.startsWith("contains:")) {
|
||||
return (".*" + Pattern.quote(path.trim().substring("contains:".length())) + ".*");
|
||||
String p = path.trim();
|
||||
if (p.startsWith("#")) {
|
||||
return StringUtils.EMPTY;
|
||||
}
|
||||
|
||||
return path.trim();
|
||||
if (p.startsWith(GsaConfigParser.CONTAINS)) {
|
||||
return ".*" + Pattern.quote(p.substring(GsaConfigParser.CONTAINS.length())) + ".*";
|
||||
}
|
||||
|
||||
if (p.startsWith(GsaConfigParser.REGEXP)) {
|
||||
return p.substring(GsaConfigParser.REGEXP.length());
|
||||
}
|
||||
|
||||
if (p.startsWith(GsaConfigParser.REGEXP_CASE)) {
|
||||
return p.substring(GsaConfigParser.REGEXP_CASE.length());
|
||||
}
|
||||
|
||||
if (p.startsWith(GsaConfigParser.REGEXP_IGNORE_CASE)) {
|
||||
return "(?i)" + p.substring(GsaConfigParser.REGEXP_IGNORE_CASE.length());
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
public String getHelpLink(final String name) {
|
||||
|
|
|
@ -47,11 +47,13 @@ public class GsaConfigParser extends DefaultHandler {
|
|||
|
||||
private static final Logger logger = LoggerFactory.getLogger(GsaConfigParser.class);
|
||||
|
||||
protected static final String REGEXP = "regexp:";
|
||||
public static final String REGEXP = "regexp:";
|
||||
|
||||
protected static final String REGEXP_IGNORE_CASE = "regexpIgnoreCase:";
|
||||
public static final String REGEXP_CASE = "regexpCase:";
|
||||
|
||||
protected static final String CONTAINS = "contains:";
|
||||
public static final String REGEXP_IGNORE_CASE = "regexpIgnoreCase:";
|
||||
|
||||
public static final String CONTAINS = "contains:";
|
||||
|
||||
protected static final String COLLECTIONS = "collections";
|
||||
|
||||
|
@ -254,6 +256,10 @@ public class GsaConfigParser extends DefaultHandler {
|
|||
final StringBuilder buf = new StringBuilder(100);
|
||||
buf.append("(?i)");
|
||||
return appendFileterPath(buf, unescape(v));
|
||||
} else if (s.startsWith(REGEXP_CASE)) {
|
||||
final String v = s.substring(REGEXP_CASE.length());
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
return appendFileterPath(buf, unescape(v));
|
||||
} else if (s.startsWith(REGEXP)) {
|
||||
final String v = s.substring(REGEXP.length());
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
|
|
|
@ -104,11 +104,14 @@ public class SystemHelperTest extends UnitFessTestCase {
|
|||
assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc"));
|
||||
}
|
||||
|
||||
public void normalizePaths() {
|
||||
assertEquals("", systemHelper.normalizePath(""));
|
||||
assertEquals(".*Q\\www.domain.com/test\\E.*", systemHelper.normalizePath("Contains:www.domain.com/test"));
|
||||
assertEquals(".*Q\\/test\\E.*", systemHelper.normalizePath("Contains:/test/"));
|
||||
assertEquals("www.domain.com/test", systemHelper.normalizePath("www.domain.com/test"));
|
||||
assertEquals(".*domain.com/.*", systemHelper.normalizePath(".*domain.com/.*"));
|
||||
public void test_normalizeConfigPath() {
|
||||
assertEquals("", systemHelper.normalizeConfigPath(""));
|
||||
assertEquals(".*\\Qwww.domain.com/test\\E.*", systemHelper.normalizeConfigPath("contains:www.domain.com/test"));
|
||||
assertEquals(".*\\Q/test/\\E.*", systemHelper.normalizeConfigPath("contains:/test/"));
|
||||
assertEquals("www.domain.com/test", systemHelper.normalizeConfigPath("www.domain.com/test"));
|
||||
assertEquals(".*domain.com/.*", systemHelper.normalizeConfigPath(".*domain.com/.*"));
|
||||
assertEquals("aaa", systemHelper.normalizeConfigPath("regexp:aaa"));
|
||||
assertEquals("aaa", systemHelper.normalizeConfigPath("regexpCase:aaa"));
|
||||
assertEquals("(?i)aaa", systemHelper.normalizeConfigPath("regexpIgnoreCase:aaa"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue