fix #1534 support _ in pruned tags
This commit is contained in:
parent
25b96c5978
commit
00f8e23a06
2 changed files with 5 additions and 2 deletions
|
@ -659,7 +659,7 @@ public interface FessProp {
|
|||
final PrunedTag tag = new PrunedTag(matcher.group(1));
|
||||
if (matcher.group(2) != null) {
|
||||
final String attrPair = matcher.group(2).substring(1, matcher.group(2).length() - 1);
|
||||
final Matcher equalMatcher = Pattern.compile("(\\w+)=(\\w+)").matcher(attrPair);
|
||||
final Matcher equalMatcher = Pattern.compile("([\\w\\-]+)=(\\S+)").matcher(attrPair);
|
||||
if (equalMatcher.matches()) {
|
||||
tag.setAttr(equalMatcher.group(1), equalMatcher.group(2));
|
||||
}
|
||||
|
|
|
@ -146,7 +146,7 @@ public class FessPropTest extends UnitFessTestCase {
|
|||
FessConfig fessConfig = new FessConfig.SimpleImpl() {
|
||||
@Override
|
||||
public String getCrawlerDocumentHtmlPrunedTags() {
|
||||
return "script,div#main,p.image,a[rel=nofollow]";
|
||||
return "script,div#main,p.image,a[rel=nofollow],div[x-y=a-.:_0]";
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -163,6 +163,9 @@ public class FessPropTest extends UnitFessTestCase {
|
|||
|
||||
assertTrue(matchesTag(tags[3], "<a rel=\"nofollow\"></a>"));
|
||||
assertFalse(matchesTag(tags[3], "<a></a>"));
|
||||
|
||||
assertTrue(matchesTag(tags[4], "<div x-y=\"a-.:_0\"></div>"));
|
||||
assertFalse(matchesTag(tags[4], "<div x-y=\"a 0\"></div>"));
|
||||
}
|
||||
|
||||
private boolean matchesTag(final PrunedTag tag, final String text) throws Exception {
|
||||
|
|
Loading…
Add table
Reference in a new issue