diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java index 69abd9bc1..04c4f5d9d 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java @@ -36,6 +36,7 @@ import org.apache.xpath.objects.XObject; import org.codelibs.core.io.InputStreamUtil; import org.codelibs.core.io.SerializeUtil; import org.codelibs.core.lang.StringUtil; +import org.codelibs.core.misc.ValueHolder; import org.codelibs.fess.Constants; import org.codelibs.fess.crawler.builder.RequestDataBuilder; import org.codelibs.fess.crawler.entity.AccessResultData; @@ -80,6 +81,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf protected FessConfig fessConfig; + protected boolean useGoogleOffOn = true; + @PostConstruct public void init() { fessConfig = ComponentUtil.getFessConfig(); @@ -398,13 +401,14 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf } else { buf.append(' '); } - final Node node = list.item(i); - if (pruned) { - final Node n = pruneNode(node.cloneNode(true)); - buf.append(n.getTextContent()); - } else { - buf.append(node.getTextContent()); + Node node = list.item(i).cloneNode(true); + if (useGoogleOffOn) { + node = processGoogleOffOn(node, new ValueHolder(true)); } + if (pruned) { + node = pruneNode(node); + } + buf.append(node.getTextContent()); } } catch (final Exception e) { logger.warn("Could not parse a value of " + xpath); @@ -415,6 +419,37 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf return buf.toUnsafeString().trim(); } + protected Node processGoogleOffOn(final Node node, final ValueHolder flag) { + final NodeList nodeList = node.getChildNodes(); + List removedNodeList = null; + for (int i = 0; i < nodeList.getLength(); i++) { + final Node childNode = nodeList.item(i); + if (childNode.getNodeType() == Node.COMMENT_NODE) { + String comment = childNode.getNodeValue().trim(); + if (comment.startsWith("googleoff:")) { + flag.setValue(false); + } else if (comment.startsWith("googleon:")) { + flag.setValue(true); + } + } + + if (!flag.getValue() && childNode.getNodeType() == Node.TEXT_NODE) { + if (removedNodeList == null) { + removedNodeList = new ArrayList<>(); + } + removedNodeList.add(childNode); + } else { + processGoogleOffOn(childNode, flag); + } + } + + if (removedNodeList != null) { + removedNodeList.stream().forEach(n -> node.removeChild(n)); + } + + return node; + } + protected Node pruneNode(final Node node) { final NodeList nodeList = node.getChildNodes(); final List childNodeList = new ArrayList<>(); @@ -573,4 +608,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf return fessConfig.getCrawlerDocumentHtmlPrunedTagsAsArray(); } + public void setUseGoogleOffOn(boolean useGoogleOffOn) { + this.useGoogleOffOn = useGoogleOffOn; + } + } diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java index 3ff2be279..c306acd0c 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java @@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.codelibs.core.misc.ValueHolder; import org.codelibs.fess.crawler.builder.RequestDataBuilder; import org.codelibs.fess.crawler.entity.RequestData; import org.codelibs.fess.crawler.entity.ResponseData; @@ -110,6 +111,18 @@ public class FessXpathTransformerTest extends UnitFessTestCase { assertFalse(pnString.contains("bar")); } + public void test_processGoogleOffOn() throws Exception { + final String data = + "foo1foo2foo3foo4foo5"; + final Document document = getDocument(data); + + final FessXpathTransformer transformer = new FessXpathTransformer(); + + final Node pruneNode = transformer.processGoogleOffOn(document, new ValueHolder<>(true)); + final String output = getXmlString(pruneNode).replaceAll(".*", "").replaceAll(".*", ""); + assertEquals("foo1foo5", output); + } + private Document getDocument(final String data) throws Exception { final DOMParser parser = new DOMParser(); final ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes("UTF-8"));