fix #2553 add hypen

This commit is contained in:
Shinsuke Sugaya 2021-04-01 18:12:25 +09:00
parent 2c8d33bc6b
commit af41d1fdd6
3 changed files with 64 additions and 24 deletions

View file

@ -36,7 +36,6 @@ import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -53,7 +52,6 @@ import org.codelibs.fesen.search.sort.SortBuilder;
import org.codelibs.fesen.search.sort.SortBuilders;
import org.codelibs.fesen.search.sort.SortOrder;
import org.codelibs.fess.Constants;
import org.codelibs.fess.exception.FessSystemException;
import org.codelibs.fess.helper.PermissionHelper;
import org.codelibs.fess.mylasta.action.FessUserBean;
import org.codelibs.fess.taglib.FessFunctions;
@ -759,28 +757,7 @@ public interface FessProp {
default PrunedTag[] getCrawlerDocumentHtmlPrunedTagsAsArray() {
PrunedTag[] tags = (PrunedTag[]) propMap.get("crawlerDocumentHtmlPrunedTags");
if (tags == null) {
tags = split(getCrawlerDocumentHtmlPrunedTags(), ",").get(stream -> stream.filter(StringUtil::isNotBlank).map(v -> {
final Pattern pattern = Pattern.compile("(\\w+)(\\[[^\\]]+\\])?(\\.\\w+)?(#\\w+)?");
final Matcher matcher = pattern.matcher(v.trim());
if (matcher.matches()) {
final PrunedTag tag = new PrunedTag(matcher.group(1));
if (matcher.group(2) != null) {
final String attrPair = matcher.group(2).substring(1, matcher.group(2).length() - 1);
final Matcher equalMatcher = Pattern.compile("([\\w\\-]+)=(\\S+)").matcher(attrPair);
if (equalMatcher.matches()) {
tag.setAttr(equalMatcher.group(1), equalMatcher.group(2));
}
}
if (matcher.group(3) != null) {
tag.setCss(matcher.group(3).substring(1));
}
if (matcher.group(4) != null) {
tag.setId(matcher.group(4).substring(1));
}
return tag;
}
throw new FessSystemException("Invalid pruned tag: " + v);
}).toArray(n -> new PrunedTag[n]));
tags = PrunedTag.parse(getCrawlerDocumentHtmlPrunedTags());
propMap.put("crawlerDocumentHtmlPrunedTags", tags);
}
return tags;

View file

@ -15,11 +15,16 @@
*/
package org.codelibs.fess.util;
import static org.codelibs.core.stream.StreamUtil.split;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.exception.FessSystemException;
import org.w3c.dom.Node;
public class PrunedTag {
@ -105,4 +110,29 @@ public class PrunedTag {
public String toString() {
return "PrunedTag [tag=" + tag + ", id=" + id + ", css=" + css + ", attrName=" + attrName + ", attrValue=" + attrValue + "]";
}
public static PrunedTag[] parse(final String value) {
return split(value, ",").get(stream -> stream.filter(StringUtil::isNotBlank).map(v -> {
final Pattern pattern = Pattern.compile("(\\w+)(\\[[^\\]]+\\])?(\\.[\\w\\-]+)?(#[\\w\\-]+)?");
final Matcher matcher = pattern.matcher(v.trim());
if (matcher.matches()) {
final PrunedTag tag = new PrunedTag(matcher.group(1));
if (matcher.group(2) != null) {
final String attrPair = matcher.group(2).substring(1, matcher.group(2).length() - 1);
final Matcher equalMatcher = Pattern.compile("([\\w\\-]+)=(\\S+)").matcher(attrPair);
if (equalMatcher.matches()) {
tag.setAttr(equalMatcher.group(1), equalMatcher.group(2));
}
}
if (matcher.group(3) != null) {
tag.setCss(matcher.group(3).substring(1));
}
if (matcher.group(4) != null) {
tag.setId(matcher.group(4).substring(1));
}
return tag;
}
throw new FessSystemException("Invalid pruned tag: " + v);
}).toArray(n -> new PrunedTag[n]));
}
}

View file

@ -44,6 +44,39 @@ public class PrunedTagTest extends UnitFessTestCase {
prunedtag.setCss(css);
assertEquals("PrunedTag [tag=" + tag + ", id=" + id + ", css=" + css + ", attrName=" + attrName + ", attrValue=" + attrValue + "]",
prunedtag.toString());
}
public void test_parse() {
PrunedTag[] tags = PrunedTag.parse("");
assertEquals(0, tags.length);
tags = PrunedTag.parse("a");
assertEquals(1, tags.length);
assertEquals("PrunedTag [tag=a, id=null, css=null, attrName=null, attrValue=null]", tags[0].toString());
tags = PrunedTag.parse("a#test");
assertEquals(1, tags.length);
assertEquals("PrunedTag [tag=a, id=test, css=null, attrName=null, attrValue=null]", tags[0].toString());
tags = PrunedTag.parse("a.test");
assertEquals(1, tags.length);
assertEquals("PrunedTag [tag=a, id=null, css=test, attrName=null, attrValue=null]", tags[0].toString());
tags = PrunedTag.parse("a[target=_blank]");
assertEquals(1, tags.length);
assertEquals("PrunedTag [tag=a, id=null, css=null, attrName=target, attrValue=_blank]", tags[0].toString());
tags = PrunedTag.parse("a.link,div#123");
assertEquals(2, tags.length);
assertEquals("PrunedTag [tag=a, id=null, css=link, attrName=null, attrValue=null]", tags[0].toString());
assertEquals("PrunedTag [tag=div, id=123, css=null, attrName=null, attrValue=null]", tags[1].toString());
tags = PrunedTag.parse("a#test-a");
assertEquals(1, tags.length);
assertEquals("PrunedTag [tag=a, id=test-a, css=null, attrName=null, attrValue=null]", tags[0].toString());
tags = PrunedTag.parse("a.test-a");
assertEquals(1, tags.length);
assertEquals("PrunedTag [tag=a, id=null, css=test-a, attrName=null, attrValue=null]", tags[0].toString());
}
}