fix #2553 add hypen
This commit is contained in:
parent
2c8d33bc6b
commit
af41d1fdd6
3 changed files with 64 additions and 24 deletions
|
@ -36,7 +36,6 @@ import java.util.Locale;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
@ -53,7 +52,6 @@ import org.codelibs.fesen.search.sort.SortBuilder;
|
|||
import org.codelibs.fesen.search.sort.SortBuilders;
|
||||
import org.codelibs.fesen.search.sort.SortOrder;
|
||||
import org.codelibs.fess.Constants;
|
||||
import org.codelibs.fess.exception.FessSystemException;
|
||||
import org.codelibs.fess.helper.PermissionHelper;
|
||||
import org.codelibs.fess.mylasta.action.FessUserBean;
|
||||
import org.codelibs.fess.taglib.FessFunctions;
|
||||
|
@ -759,28 +757,7 @@ public interface FessProp {
|
|||
default PrunedTag[] getCrawlerDocumentHtmlPrunedTagsAsArray() {
|
||||
PrunedTag[] tags = (PrunedTag[]) propMap.get("crawlerDocumentHtmlPrunedTags");
|
||||
if (tags == null) {
|
||||
tags = split(getCrawlerDocumentHtmlPrunedTags(), ",").get(stream -> stream.filter(StringUtil::isNotBlank).map(v -> {
|
||||
final Pattern pattern = Pattern.compile("(\\w+)(\\[[^\\]]+\\])?(\\.\\w+)?(#\\w+)?");
|
||||
final Matcher matcher = pattern.matcher(v.trim());
|
||||
if (matcher.matches()) {
|
||||
final PrunedTag tag = new PrunedTag(matcher.group(1));
|
||||
if (matcher.group(2) != null) {
|
||||
final String attrPair = matcher.group(2).substring(1, matcher.group(2).length() - 1);
|
||||
final Matcher equalMatcher = Pattern.compile("([\\w\\-]+)=(\\S+)").matcher(attrPair);
|
||||
if (equalMatcher.matches()) {
|
||||
tag.setAttr(equalMatcher.group(1), equalMatcher.group(2));
|
||||
}
|
||||
}
|
||||
if (matcher.group(3) != null) {
|
||||
tag.setCss(matcher.group(3).substring(1));
|
||||
}
|
||||
if (matcher.group(4) != null) {
|
||||
tag.setId(matcher.group(4).substring(1));
|
||||
}
|
||||
return tag;
|
||||
}
|
||||
throw new FessSystemException("Invalid pruned tag: " + v);
|
||||
}).toArray(n -> new PrunedTag[n]));
|
||||
tags = PrunedTag.parse(getCrawlerDocumentHtmlPrunedTags());
|
||||
propMap.put("crawlerDocumentHtmlPrunedTags", tags);
|
||||
}
|
||||
return tags;
|
||||
|
|
|
@ -15,11 +15,16 @@
|
|||
*/
|
||||
package org.codelibs.fess.util;
|
||||
|
||||
import static org.codelibs.core.stream.StreamUtil.split;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.core.stream.StreamUtil;
|
||||
import org.codelibs.fess.exception.FessSystemException;
|
||||
import org.w3c.dom.Node;
|
||||
|
||||
public class PrunedTag {
|
||||
|
@ -105,4 +110,29 @@ public class PrunedTag {
|
|||
public String toString() {
|
||||
return "PrunedTag [tag=" + tag + ", id=" + id + ", css=" + css + ", attrName=" + attrName + ", attrValue=" + attrValue + "]";
|
||||
}
|
||||
|
||||
public static PrunedTag[] parse(final String value) {
|
||||
return split(value, ",").get(stream -> stream.filter(StringUtil::isNotBlank).map(v -> {
|
||||
final Pattern pattern = Pattern.compile("(\\w+)(\\[[^\\]]+\\])?(\\.[\\w\\-]+)?(#[\\w\\-]+)?");
|
||||
final Matcher matcher = pattern.matcher(v.trim());
|
||||
if (matcher.matches()) {
|
||||
final PrunedTag tag = new PrunedTag(matcher.group(1));
|
||||
if (matcher.group(2) != null) {
|
||||
final String attrPair = matcher.group(2).substring(1, matcher.group(2).length() - 1);
|
||||
final Matcher equalMatcher = Pattern.compile("([\\w\\-]+)=(\\S+)").matcher(attrPair);
|
||||
if (equalMatcher.matches()) {
|
||||
tag.setAttr(equalMatcher.group(1), equalMatcher.group(2));
|
||||
}
|
||||
}
|
||||
if (matcher.group(3) != null) {
|
||||
tag.setCss(matcher.group(3).substring(1));
|
||||
}
|
||||
if (matcher.group(4) != null) {
|
||||
tag.setId(matcher.group(4).substring(1));
|
||||
}
|
||||
return tag;
|
||||
}
|
||||
throw new FessSystemException("Invalid pruned tag: " + v);
|
||||
}).toArray(n -> new PrunedTag[n]));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,6 +44,39 @@ public class PrunedTagTest extends UnitFessTestCase {
|
|||
prunedtag.setCss(css);
|
||||
assertEquals("PrunedTag [tag=" + tag + ", id=" + id + ", css=" + css + ", attrName=" + attrName + ", attrValue=" + attrValue + "]",
|
||||
prunedtag.toString());
|
||||
}
|
||||
|
||||
public void test_parse() {
|
||||
PrunedTag[] tags = PrunedTag.parse("");
|
||||
assertEquals(0, tags.length);
|
||||
|
||||
tags = PrunedTag.parse("a");
|
||||
assertEquals(1, tags.length);
|
||||
assertEquals("PrunedTag [tag=a, id=null, css=null, attrName=null, attrValue=null]", tags[0].toString());
|
||||
|
||||
tags = PrunedTag.parse("a#test");
|
||||
assertEquals(1, tags.length);
|
||||
assertEquals("PrunedTag [tag=a, id=test, css=null, attrName=null, attrValue=null]", tags[0].toString());
|
||||
|
||||
tags = PrunedTag.parse("a.test");
|
||||
assertEquals(1, tags.length);
|
||||
assertEquals("PrunedTag [tag=a, id=null, css=test, attrName=null, attrValue=null]", tags[0].toString());
|
||||
|
||||
tags = PrunedTag.parse("a[target=_blank]");
|
||||
assertEquals(1, tags.length);
|
||||
assertEquals("PrunedTag [tag=a, id=null, css=null, attrName=target, attrValue=_blank]", tags[0].toString());
|
||||
|
||||
tags = PrunedTag.parse("a.link,div#123");
|
||||
assertEquals(2, tags.length);
|
||||
assertEquals("PrunedTag [tag=a, id=null, css=link, attrName=null, attrValue=null]", tags[0].toString());
|
||||
assertEquals("PrunedTag [tag=div, id=123, css=null, attrName=null, attrValue=null]", tags[1].toString());
|
||||
|
||||
tags = PrunedTag.parse("a#test-a");
|
||||
assertEquals(1, tags.length);
|
||||
assertEquals("PrunedTag [tag=a, id=test-a, css=null, attrName=null, attrValue=null]", tags[0].toString());
|
||||
|
||||
tags = PrunedTag.parse("a.test-a");
|
||||
assertEquals(1, tags.length);
|
||||
assertEquals("PrunedTag [tag=a, id=null, css=test-a, attrName=null, attrValue=null]", tags[0].toString());
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue