Shinsuke Sugaya 8 лет назад
Родитель
Сommit
17a393521f

+ 44 - 5
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

@@ -36,6 +36,7 @@ import org.apache.xpath.objects.XObject;
 import org.codelibs.core.io.InputStreamUtil;
 import org.codelibs.core.io.SerializeUtil;
 import org.codelibs.core.lang.StringUtil;
+import org.codelibs.core.misc.ValueHolder;
 import org.codelibs.fess.Constants;
 import org.codelibs.fess.crawler.builder.RequestDataBuilder;
 import org.codelibs.fess.crawler.entity.AccessResultData;
@@ -80,6 +81,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
 
     protected FessConfig fessConfig;
 
+    protected boolean useGoogleOffOn = true;
+
     @PostConstruct
     public void init() {
         fessConfig = ComponentUtil.getFessConfig();
@@ -398,13 +401,14 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
                 } else {
                     buf.append(' ');
                 }
-                final Node node = list.item(i);
+                Node node = list.item(i).cloneNode(true);
+                if (useGoogleOffOn) {
+                    node = processGoogleOffOn(node, new ValueHolder<Boolean>(true));
+                }
                 if (pruned) {
-                    final Node n = pruneNode(node.cloneNode(true));
-                    buf.append(n.getTextContent());
-                } else {
-                    buf.append(node.getTextContent());
+                    node = pruneNode(node);
                 }
+                buf.append(node.getTextContent());
             }
         } catch (final Exception e) {
             logger.warn("Could not parse a value of " + xpath);
@@ -415,6 +419,37 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         return buf.toUnsafeString().trim();
     }
 
+    protected Node processGoogleOffOn(final Node node, final ValueHolder<Boolean> flag) {
+        final NodeList nodeList = node.getChildNodes();
+        List<Node> removedNodeList = null;
+        for (int i = 0; i < nodeList.getLength(); i++) {
+            final Node childNode = nodeList.item(i);
+            if (childNode.getNodeType() == Node.COMMENT_NODE) {
+                String comment = childNode.getNodeValue().trim();
+                if (comment.startsWith("googleoff:")) {
+                    flag.setValue(false);
+                } else if (comment.startsWith("googleon:")) {
+                    flag.setValue(true);
+                }
+            }
+
+            if (!flag.getValue() && childNode.getNodeType() == Node.TEXT_NODE) {
+                if (removedNodeList == null) {
+                    removedNodeList = new ArrayList<>();
+                }
+                removedNodeList.add(childNode);
+            } else {
+                processGoogleOffOn(childNode, flag);
+            }
+        }
+
+        if (removedNodeList != null) {
+            removedNodeList.stream().forEach(n -> node.removeChild(n));
+        }
+
+        return node;
+    }
+
     protected Node pruneNode(final Node node) {
         final NodeList nodeList = node.getChildNodes();
         final List<Node> childNodeList = new ArrayList<>();
@@ -573,4 +608,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         return fessConfig.getCrawlerDocumentHtmlPrunedTagsAsArray();
     }
 
+    public void setUseGoogleOffOn(boolean useGoogleOffOn) {
+        this.useGoogleOffOn = useGoogleOffOn;
+    }
+
 }

+ 13 - 0
src/test/java/org/codelibs/fess/crawler/transformer/FessXpathTransformerTest.java

@@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
+import org.codelibs.core.misc.ValueHolder;
 import org.codelibs.fess.crawler.builder.RequestDataBuilder;
 import org.codelibs.fess.crawler.entity.RequestData;
 import org.codelibs.fess.crawler.entity.ResponseData;
@@ -110,6 +111,18 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
         assertFalse(pnString.contains("bar"));
     }
 
+    public void test_processGoogleOffOn() throws Exception {
+        final String data =
+                "<html><body>foo1<!--googleoff: index-->foo2<a href=\"index.html\">foo3</a>foo4<!--googleon: index-->foo5</body></html>";
+        final Document document = getDocument(data);
+
+        final FessXpathTransformer transformer = new FessXpathTransformer();
+
+        final Node pruneNode = transformer.processGoogleOffOn(document, new ValueHolder<>(true));
+        final String output = getXmlString(pruneNode).replaceAll(".*<BODY>", "").replaceAll("</BODY>.*", "");
+        assertEquals("foo1<!--googleoff: index--><A href=\"index.html\"></A><!--googleon: index-->foo5", output);
+    }
+
     private Document getDocument(final String data) throws Exception {
         final DOMParser parser = new DOMParser();
         final ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes("UTF-8"));