Enhancement/feature to allow the use of "contains:" as a alternative to using RegEx. This is useful for when you have very many URLs to maintain and the paths are predefined. In addition, not everyone is familiar with RegEx and non programmers should be able to maitain the list of URLs to include/exclude and index. Lastly, it makes porting from GSA a lot simplier where you can just copy and paste collections to labels.
This commit is contained in:
parent
e28b72f0ab
commit
2ba4ee85b5
5 changed files with 43 additions and 8 deletions
|
@ -149,9 +149,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
final List<Pattern> urlPatterList = new ArrayList<>();
|
||||
final String[] urls = getIncludedDocUrls().split("[\r\n]");
|
||||
for (final String u : urls) {
|
||||
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
|
||||
urlPatterList.add(Pattern.compile(u.trim()));
|
||||
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
|
||||
if (v.isEmpty()) {
|
||||
break;
|
||||
}
|
||||
urlPatterList.add(Pattern.compile(v));
|
||||
}
|
||||
includedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
|
||||
} else {
|
||||
|
@ -164,9 +166,11 @@ public class WebConfig extends BsWebConfig implements CrawlingConfig {
|
|||
final List<Pattern> urlPatterList = new ArrayList<>();
|
||||
final String[] urls = getExcludedDocUrls().split("[\r\n]");
|
||||
for (final String u : urls) {
|
||||
if (StringUtil.isNotBlank(u) && !u.trim().startsWith("#")) {
|
||||
urlPatterList.add(Pattern.compile(u.trim()));
|
||||
final String v = ComponentUtil.getSystemHelper().normalizePath(u);
|
||||
if (v.isEmpty()) {
|
||||
break;
|
||||
}
|
||||
urlPatterList.add(Pattern.compile(v));
|
||||
}
|
||||
excludedDocUrlPatterns = urlPatterList.toArray(new Pattern[urlPatterList.size()]);
|
||||
} else if (includedDocUrlPatterns.length > 0) {
|
||||
|
|
|
@ -126,7 +126,7 @@ public class Crawler {
|
|||
public String expires;
|
||||
|
||||
protected Options() {
|
||||
// noghing
|
||||
// nothing
|
||||
}
|
||||
|
||||
protected List<String> getWebConfigIdList() {
|
||||
|
|
|
@ -214,7 +214,7 @@ public class LabelTypeHelper {
|
|||
} else {
|
||||
buf.append(split);
|
||||
}
|
||||
buf.append(path.trim());
|
||||
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
|
||||
}
|
||||
this.includedPaths = Pattern.compile(buf.toString());
|
||||
}
|
||||
|
@ -228,7 +228,7 @@ public class LabelTypeHelper {
|
|||
} else {
|
||||
buf.append(split);
|
||||
}
|
||||
buf.append(path.trim());
|
||||
buf.append(ComponentUtil.getSystemHelper().normalizePath(path));
|
||||
}
|
||||
this.excludedPaths = Pattern.compile(buf.toString());
|
||||
}
|
||||
|
@ -242,13 +242,23 @@ public class LabelTypeHelper {
|
|||
if (includedPaths != null) {
|
||||
if (includedPaths.matcher(path).matches()) {
|
||||
if (excludedPaths != null && excludedPaths.matcher(path).matches()) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Path " + path + " matched against the excludes paths expression " + excludedPaths.toString());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Path " + path + " wasn't matched against the include paths expression " + includedPaths.toString());
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
return !excludedPaths.matcher(path).matches();
|
||||
boolean match = !excludedPaths.matcher(path).matches();
|
||||
if (!match && logger.isDebugEnabled()) {
|
||||
logger.debug("Path " + path + " matched against the excludes paths expression " + includedPaths.toString());
|
||||
}
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ import java.util.concurrent.ExecutionException;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import javax.annotation.PreDestroy;
|
||||
|
@ -213,6 +214,19 @@ public class SystemHelper {
|
|||
}
|
||||
}
|
||||
|
||||
public String normalizePath(final String path) {
|
||||
|
||||
if (StringUtil.isBlank(path) || path.trim().startsWith("#")) {
|
||||
return StringUtils.EMPTY;
|
||||
}
|
||||
|
||||
if (path.startsWith("contains:")) {
|
||||
return (".*" + Pattern.quote(path.trim().substring("contains:".length())) + ".*");
|
||||
}
|
||||
|
||||
return path.trim();
|
||||
}
|
||||
|
||||
public String getHelpLink(final String name) {
|
||||
final String url = ComponentUtil.getFessConfig().getOnlineHelpBaseLink() + name + "-guide.html";
|
||||
return getHelpUrl(url);
|
||||
|
|
|
@ -104,4 +104,11 @@ public class SystemHelperTest extends UnitFessTestCase {
|
|||
assertEquals("bbb\\ccc", systemHelper.createSearchRole("", "aaa\\bbb\\ccc"));
|
||||
}
|
||||
|
||||
public void normalizePaths() {
|
||||
assertEquals("", systemHelper.normalizePath(""));
|
||||
assertEquals(".*Q\\www.domain.com/test\\E.*", systemHelper.normalizePath("Contains:www.domain.com/test"));
|
||||
assertEquals(".*Q\\/test\\E.*", systemHelper.normalizePath("Contains:/test/"));
|
||||
assertEquals("www.domain.com/test", systemHelper.normalizePath("www.domain.com/test"));
|
||||
assertEquals(".*domain.com/.*", systemHelper.normalizePath(".*domain.com/.*"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue