fix #1653 improve filtering paths
This commit is contained in:
parent
ee4d85d594
commit
ce72889f36
2 changed files with 77 additions and 35 deletions
|
@ -23,6 +23,7 @@ import java.util.HashMap;
|
|||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.xml.parsers.SAXParser;
|
||||
|
@ -223,29 +224,7 @@ public class GsaConfigParser extends DefaultHandler {
|
|||
|
||||
protected String parseFilterPaths(final String text, final boolean web, final boolean file) {
|
||||
return split(text, "\n").get(stream -> stream.map(String::trim).filter(StringUtil::isNotBlank).map(s -> {
|
||||
if (s.startsWith("#")) {
|
||||
return null;
|
||||
} else if (s.startsWith(CONTAINS)) {
|
||||
final String v = s.substring(CONTAINS.length());
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
return appendFileterPath(buf, escape(v));
|
||||
} else if (s.startsWith(REGEXP_IGNORE_CASE)) {
|
||||
final String v = s.substring(REGEXP_IGNORE_CASE.length());
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
buf.append("(?i)");
|
||||
return appendFileterPath(buf, unescape(v));
|
||||
} else if (s.startsWith(REGEXP)) {
|
||||
final String v = s.substring(REGEXP.length());
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
return appendFileterPath(buf, unescape(v));
|
||||
} else if (Arrays.stream(webProtocols).anyMatch(p -> s.startsWith(p))) {
|
||||
return escape(s) + ".*";
|
||||
} else if (Arrays.stream(fileProtocols).anyMatch(p -> s.startsWith(p))) {
|
||||
return escape(s) + ".*";
|
||||
} else {
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
return appendFileterPath(buf, escape(s));
|
||||
}
|
||||
return getFilterPath(s);
|
||||
}).filter(s -> {
|
||||
if (StringUtil.isBlank(s)) {
|
||||
return false;
|
||||
|
@ -260,15 +239,43 @@ public class GsaConfigParser extends DefaultHandler {
|
|||
}).collect(Collectors.joining("\n")));
|
||||
}
|
||||
|
||||
protected String getFilterPath(String s) {
|
||||
if (s.startsWith("#")) {
|
||||
return StringUtil.EMPTY;
|
||||
} else if (s.startsWith(CONTAINS)) {
|
||||
final String v = s.substring(CONTAINS.length());
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
return ".*" + appendFileterPath(buf, escape(v)) + ".*";
|
||||
} else if (s.startsWith(REGEXP_IGNORE_CASE)) {
|
||||
final String v = s.substring(REGEXP_IGNORE_CASE.length());
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
buf.append("(?i)");
|
||||
return appendFileterPath(buf, unescape(v));
|
||||
} else if (s.startsWith(REGEXP)) {
|
||||
final String v = s.substring(REGEXP.length());
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
return appendFileterPath(buf, unescape(v));
|
||||
} else if (Arrays.stream(webProtocols).anyMatch(p -> s.startsWith(p))) {
|
||||
return escape(s) + ".*";
|
||||
} else if (Arrays.stream(fileProtocols).anyMatch(p -> s.startsWith(p))) {
|
||||
return escape(s) + ".*";
|
||||
} else {
|
||||
final StringBuilder buf = new StringBuilder(100);
|
||||
return appendFileterPath(buf, escape(s));
|
||||
}
|
||||
}
|
||||
|
||||
protected String escape(final String s) {
|
||||
return s.replace(".", "\\.")//
|
||||
.replace("+", "\\+")//
|
||||
.replace("*", "\\*")//
|
||||
.replace("[", "\\[")//
|
||||
.replace("]", "\\]")//
|
||||
.replace("(", "\\(")//
|
||||
.replace("(", "\\)")//
|
||||
.replace("?", "\\?");
|
||||
if (s.startsWith("#")) {
|
||||
return StringUtil.EMPTY;
|
||||
} else if (s.startsWith("^") && s.endsWith("$")) {
|
||||
return "^" + Pattern.quote(s.substring(1, s.length() - 1)) + "$";
|
||||
} else if (s.startsWith("^")) {
|
||||
return "^" + Pattern.quote(s.substring(1));
|
||||
} else if (s.endsWith("$")) {
|
||||
return Pattern.quote(s.substring(0, s.length() - 1)) + "$";
|
||||
}
|
||||
return Pattern.quote(s);
|
||||
}
|
||||
|
||||
protected String unescape(final String s) {
|
||||
|
@ -276,12 +283,24 @@ public class GsaConfigParser extends DefaultHandler {
|
|||
}
|
||||
|
||||
protected String appendFileterPath(final StringBuilder buf, final String v) {
|
||||
if (!v.startsWith("^")) {
|
||||
buf.append(".*");
|
||||
if (StringUtil.isBlank(v)) {
|
||||
return StringUtil.EMPTY;
|
||||
}
|
||||
buf.append(v);
|
||||
if (!v.endsWith("$")) {
|
||||
|
||||
if (v.startsWith("^")) {
|
||||
buf.append(v);
|
||||
if (!v.endsWith("$")) {
|
||||
buf.append(".*");
|
||||
}
|
||||
} else if (v.endsWith("$")) {
|
||||
buf.append(".*");
|
||||
buf.append(v);
|
||||
} else if (v.endsWith("/\\E")) {
|
||||
buf.append(".*");
|
||||
buf.append(v);
|
||||
buf.append(".*");
|
||||
} else {
|
||||
buf.append(v);
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
|
|
@ -40,4 +40,27 @@ public class GsaConfigParserTest extends UnitFessTestCase {
|
|||
assertEquals(3, labelTypes.length);
|
||||
}
|
||||
|
||||
public void test_escape() {
|
||||
// https://www.google.com/support/enterprise/static/gsa/docs/admin/70/gsa_doc_set/admin_crawl/url_patterns.html#1076127
|
||||
assertEscapePattern("", "# Test");
|
||||
assertEscapePattern(".*\\Q!/\\E.*", "!/");
|
||||
assertEscapePattern("\\Qindex.html\\E", "index.html");
|
||||
assertEscapePattern("^\\Qhttp://\\E.*", "^http://");
|
||||
assertEscapePattern(".*\\Qindex.html\\E$", "index.html$");
|
||||
assertEscapePattern("^\\Qhttp://www.codelibs.org/page.html\\E$", "^http://www.codelibs.org/page.html$");
|
||||
assertEscapePattern("\\Qhttp://www.codelibs.org/\\E.*", "http://www.codelibs.org/");
|
||||
assertEscapePattern("\\Qsmb://server/test/\\E.*", "smb://server/test/");
|
||||
assertEscapePattern(".*\\Q?\\E.*", "contains:?");
|
||||
assertEscapePattern(".*\\Q\001\\E.*", "contains:\001");
|
||||
assertEscapePattern("(?i).*\\.exe$", "regexpIgnoreCase:\\.exe$");
|
||||
assertEscapePattern("(?i)index.html", "regexpIgnoreCase:index.html");
|
||||
assertEscapePattern(".*\\.exe$", "regexp:\\.exe$");
|
||||
assertEscapePattern("index.html", "regexp:index.html");
|
||||
}
|
||||
|
||||
private void assertEscapePattern(String expect, String value) {
|
||||
GsaConfigParser parser = new GsaConfigParser();
|
||||
assertEquals(expect, parser.getFilterPath(value));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue