Merge pull request #1739 from EyadA/master
'Contains' in addition to RegEx for path inclusion/exclusion
This commit is contained in:
commit
1df3c77742
5 changed files with 43 additions and 8 deletions
|
@ -149,9 +149,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
final List<Pattern> urlPatterList = new ArrayList<>();
|
||||
final String[] urls = getIncludedDocUrls().split("[\r\n]");
|
||||
for (final String u : urls) {
|
||||
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
|
||||
urlPatterList.add(Pattern.compile(u.trim()));
|
||||
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
|
||||
if (v.isEmpty()) {
|
||||
break;
|
||||
}
|
||||
urlPatterList.add(Pattern.compile(v));
|
||||
}
|
||||
includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
|
||||
} else {
|
||||
|
@ -164,9 +166,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
final List<Pattern> urlPatterList = new ArrayList<>();
|
||||
final String[] urls = getExcludedDocUrls().split("[\r\n]");
|
||||
for (final String u : urls) {
|
||||
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
|
||||
urlPatterList.add(Pattern.compile(u.trim()));
|
||||
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
|
||||
if (v.isEmpty()) {
|
||||
break;
|
||||
}
|
||||
urlPatterList.add(Pattern.compile(v));
|
||||
}
|
||||
excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
|
||||
} else if (includedDocUrlPatterns.length > 0) {
|
||||
|
|
|
@ -126,7 +126,7 @@ public class Crawler {
|
|||
public String expires;
|
||||
|
||||
protected Options() {
|
||||
// noghing
|
||||
// nothing
|
||||
}
|
||||
|
||||
protected List<String> getWebConfigIdList() {
|
||||
|
|
|
@ -214,7 +214,7 @@ public class LabelTypeHelper {
|
|||
} else {
|
||||
buf.append(split);
|
||||
}
|
||||
buf.append(path.trim());
|
||||
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
|
||||
}
|
||||
this.includedPaths = Pattern.compile(buf.toString());
|
||||
}
|
||||
|
@ -228,7 +228,7 @@ public class LabelTypeHelper {
|
|||
} else {
|
||||
buf.append(split);
|
||||
}
|
||||
buf.append(path.trim());
|
||||
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
|
||||
}
|
||||
this.excludedPaths = Pattern.compile(buf.toString());
|
||||
}
|
||||
|
@ -242,13 +242,23 @@ public class LabelTypeHelper {
|
|||
if (includedPaths != null) {
|
||||
if (includedPaths.matcher(path).matches()) {
|
||||
if (excludedPaths != null && excludedPaths.matcher(path).matches()) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Path " + path + " matched against the excludes paths expression " + excludedPaths.toString());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Path " + path + " wasn't matched against the include paths expression " + includedPaths.toString());
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
return !excludedPaths.matcher(path).matches();
|
||||
boolean match = !excludedPaths.matcher(path).matches();
|
||||
if (!match && logger.isDebugEnabled()) {
|
||||
logger.debug("Path " + path + " matched against the excludes paths expression " + includedPaths.toString());
|
||||
}
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ import java.util.concurrent.ExecutionException;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import javax.annotation.PreDestroy;
|
||||
|
@ -213,6 +214,19 @@ public class SystemHelper {
|
|||
}
|
||||
}
|
||||
|
||||
public String normalizePath(final String path) {
|
||||
|
||||
if (StringUtil.isBlank(path) || path.trim().startsWith("#")) {
|
||||
return StringUtils.EMPTY;
|
||||
}
|
||||
|
||||
if (path.startsWith("contains:")) {
|
||||
return (".*" + Pattern.quote(path.trim().substring("contains:".length())) + ".*");
|
||||
}
|
||||
|
||||
return path.trim();
|
||||
}
|
||||
|
||||
public String getHelpLink(final String name) {
|
||||
final String url = ComponentUtil.getFessConfig().getOnlineHelpBaseLink() + name + "-guide.html";
|
||||
return getHelpUrl(url);
|
||||
|
|
|
@ -104,4 +104,11 @@ public class SystemHelperTest extends UnitFessTestCase {
|
|||
assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc"));
|
||||
}
|
||||
|
||||
public void normalizePaths() {
|
||||
assertEquals("", systemHelper.normalizePath(""));
|
||||
assertEquals(".*Q\\www.domain.com/test\\E.*", systemHelper.normalizePath("Contains:www.domain.com/test"));
|
||||
assertEquals(".*Q\\/test\\E.*", systemHelper.normalizePath("Contains:/test/"));
|
||||
assertEquals("www.domain.com/test", systemHelper.normalizePath("www.domain.com/test"));
|
||||
assertEquals(".*domain.com/.*", systemHelper.normalizePath(".*domain.com/.*"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue