Merge branch '10.3.x'

This commit is contained in:
Shinsuke Sugaya 2016-11-05 14:33:15 +09:00
commit 17a393521f
2 changed files with 58 additions and 6 deletions

View file

@ -36,6 +36,7 @@ import org.apache.xpath.objects.XObject;
import org.codelibs.core.io.InputStreamUtil;
import org.codelibs.core.io.SerializeUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.misc.ValueHolder;
import org.codelibs.fess.Constants;
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
import org.codelibs.fess.crawler.entity.AccessResultData;
@ -80,6 +81,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
protected FessConfig fessConfig;
protected boolean useGoogleOffOn = true;
@PostConstruct
public void init() {
fessConfig = ComponentUtil.getFessConfig();
@ -398,13 +401,14 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
} else {
buf.append(' ');
}
final Node node = list.item(i);
if (pruned) {
final Node n = pruneNode(node.cloneNode(true));
buf.append(n.getTextContent());
} else {
buf.append(node.getTextContent());
Node node = list.item(i).cloneNode(true);
if (useGoogleOffOn) {
node = processGoogleOffOn(node, new ValueHolder<Boolean>(true));
}
if (pruned) {
node = pruneNode(node);
}
buf.append(node.getTextContent());
}
} catch (final Exception e) {
logger.warn("Could not parse a value of " + xpath);
@ -415,6 +419,37 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
return buf.toUnsafeString().trim();
}
protected Node processGoogleOffOn(final Node node, final ValueHolder<Boolean> flag) {
final NodeList nodeList = node.getChildNodes();
List<Node> removedNodeList = null;
for (int i = 0; i < nodeList.getLength(); i++) {
final Node childNode = nodeList.item(i);
if (childNode.getNodeType() == Node.COMMENT_NODE) {
String comment = childNode.getNodeValue().trim();
if (comment.startsWith("googleoff:")) {
flag.setValue(false);
} else if (comment.startsWith("googleon:")) {
flag.setValue(true);
}
}
if (!flag.getValue() && childNode.getNodeType() == Node.TEXT_NODE) {
if (removedNodeList == null) {
removedNodeList = new ArrayList<>();
}
removedNodeList.add(childNode);
} else {
processGoogleOffOn(childNode, flag);
}
}
if (removedNodeList != null) {
removedNodeList.stream().forEach(n -> node.removeChild(n));
}
return node;
}
protected Node pruneNode(final Node node) {
final NodeList nodeList = node.getChildNodes();
final List<Node> childNodeList = new ArrayList<>();
@ -573,4 +608,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
return fessConfig.getCrawlerDocumentHtmlPrunedTagsAsArray();
}
public void setUseGoogleOffOn(boolean useGoogleOffOn) {
this.useGoogleOffOn = useGoogleOffOn;
}
}

View file

@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.codelibs.core.misc.ValueHolder;
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
import org.codelibs.fess.crawler.entity.RequestData;
import org.codelibs.fess.crawler.entity.ResponseData;
@ -110,6 +111,18 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
assertFalse(pnString.contains("bar"));
}
public void test_processGoogleOffOn() throws Exception {
final String data =
"<html><body>foo1<!--googleoff: index-->foo2<a href=\"index.html\">foo3</a>foo4<!--googleon: index-->foo5</body></html>";
final Document document = getDocument(data);
final FessXpathTransformer transformer = new FessXpathTransformer();
final Node pruneNode = transformer.processGoogleOffOn(document, new ValueHolder<>(true));
final String output = getXmlString(pruneNode).replaceAll(".*<BODY>", "").replaceAll("</BODY>.*", "");
assertEquals("foo1<!--googleoff: index--><A href=\"index.html\"></A><!--googleon: index-->foo5", output);
}
private Document getDocument(final String data) throws Exception {
final DOMParser parser = new DOMParser();
final ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes("UTF-8"));