Merge pull request #1739 from EyadA/master

'Contains' in addition to RegEx for path inclusion/exclusion
This commit is contained in:
Shinsuke Sugaya 2018-07-05 08:09:05 +09:00 committed by GitHub
commit 1df3c77742
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 43 additions and 8 deletions

View file

@ -149,9 +149,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
final List<Pattern> urlPatterList = new ArrayList<>();
final String[] urls = getIncludedDocUrls().split("[\r\n]");
for (final String u : urls) {
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
urlPatterList.add(Pattern.compile(u.trim()));
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
if (v.isEmpty()) {
break;
}
urlPatterList.add(Pattern.compile(v));
}
includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
} else {
@ -164,9 +166,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
final List<Pattern> urlPatterList = new ArrayList<>();
final String[] urls = getExcludedDocUrls().split("[\r\n]");
for (final String u : urls) {
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
urlPatterList.add(Pattern.compile(u.trim()));
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
if (v.isEmpty()) {
break;
}
urlPatterList.add(Pattern.compile(v));
}
excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
} else if (includedDocUrlPatterns.length > 0) {

View file

@ -126,7 +126,7 @@ public class Crawler {
public String expires;
protected Options() {
// noghing
// nothing
}
protected List<String> getWebConfigIdList() {

View file

@ -214,7 +214,7 @@ public class LabelTypeHelper {
} else {
buf.append(split);
}
buf.append(path.trim());
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
}
this.includedPaths = Pattern.compile(buf.toString());
}
@ -228,7 +228,7 @@ public class LabelTypeHelper {
} else {
buf.append(split);
}
buf.append(path.trim());
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
}
this.excludedPaths = Pattern.compile(buf.toString());
}
@ -242,13 +242,23 @@ public class LabelTypeHelper {
if (includedPaths != null) {
if (includedPaths.matcher(path).matches()) {
if (excludedPaths != null && excludedPaths.matcher(path).matches()) {
if (logger.isDebugEnabled()) {
logger.debug("Path " + path + " matched against the excludes paths expression " + excludedPaths.toString());
}
return false;
}
return true;
}
if (logger.isDebugEnabled()) {
logger.debug("Path " + path + " wasn't matched against the include paths expression " + includedPaths.toString());
}
return false;
} else {
return !excludedPaths.matcher(path).matches();
boolean match = !excludedPaths.matcher(path).matches();
if (!match && logger.isDebugEnabled()) {
logger.debug("Path " + path + " matched against the excludes paths expression " + includedPaths.toString());
}
return match;
}
}

View file

@ -40,6 +40,7 @@ import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
@ -213,6 +214,19 @@ public class SystemHelper {
}
}
public String normalizePath(final String path) {
if (StringUtil.isBlank(path) || path.trim().startsWith("#")) {
return StringUtils.EMPTY;
}
if (path.startsWith("contains:")) {
return (".*" + Pattern.quote(path.trim().substring("contains:".length())) + ".*");
}
return path.trim();
}
public String getHelpLink(final String name) {
final String url = ComponentUtil.getFessConfig().getOnlineHelpBaseLink() + name + "-guide.html";
return getHelpUrl(url);

View file

@ -104,4 +104,11 @@ public class SystemHelperTest extends UnitFessTestCase {
assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc"));
}
public void normalizePaths() {
assertEquals("", systemHelper.normalizePath(""));
assertEquals(".*Q\\www.domain.com/test\\E.*", systemHelper.normalizePath("Contains:www.domain.com/test"));
assertEquals(".*Q\\/test\\E.*", systemHelper.normalizePath("Contains:/test/"));
assertEquals("www.domain.com/test", systemHelper.normalizePath("www.domain.com/test"));
assertEquals(".*domain.com/.*", systemHelper.normalizePath(".*domain.com/.*"));
}
}