Merge branch '10.3.x'
This commit is contained in:
commit
17a393521f
2 changed files with 58 additions and 6 deletions
|
@ -36,6 +36,7 @@ import org.apache.xpath.objects.XObject;
|
|||
import org.codelibs.core.io.InputStreamUtil;
|
||||
import org.codelibs.core.io.SerializeUtil;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.core.misc.ValueHolder;
|
||||
import org.codelibs.fess.Constants;
|
||||
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
|
||||
import org.codelibs.fess.crawler.entity.AccessResultData;
|
||||
|
@ -80,6 +81,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
|
||||
protected FessConfig fessConfig;
|
||||
|
||||
protected boolean useGoogleOffOn = true;
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
fessConfig = ComponentUtil.getFessConfig();
|
||||
|
@ -398,13 +401,14 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
} else {
|
||||
buf.append(' ');
|
||||
}
|
||||
final Node node = list.item(i);
|
||||
if (pruned) {
|
||||
final Node n = pruneNode(node.cloneNode(true));
|
||||
buf.append(n.getTextContent());
|
||||
} else {
|
||||
buf.append(node.getTextContent());
|
||||
Node node = list.item(i).cloneNode(true);
|
||||
if (useGoogleOffOn) {
|
||||
node = processGoogleOffOn(node, new ValueHolder<Boolean>(true));
|
||||
}
|
||||
if (pruned) {
|
||||
node = pruneNode(node);
|
||||
}
|
||||
buf.append(node.getTextContent());
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Could not parse a value of " + xpath);
|
||||
|
@ -415,6 +419,37 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
return buf.toUnsafeString().trim();
|
||||
}
|
||||
|
||||
protected Node processGoogleOffOn(final Node node, final ValueHolder<Boolean> flag) {
|
||||
final NodeList nodeList = node.getChildNodes();
|
||||
List<Node> removedNodeList = null;
|
||||
for (int i = 0; i < nodeList.getLength(); i++) {
|
||||
final Node childNode = nodeList.item(i);
|
||||
if (childNode.getNodeType() == Node.COMMENT_NODE) {
|
||||
String comment = childNode.getNodeValue().trim();
|
||||
if (comment.startsWith("googleoff:")) {
|
||||
flag.setValue(false);
|
||||
} else if (comment.startsWith("googleon:")) {
|
||||
flag.setValue(true);
|
||||
}
|
||||
}
|
||||
|
||||
if (!flag.getValue() && childNode.getNodeType() == Node.TEXT_NODE) {
|
||||
if (removedNodeList == null) {
|
||||
removedNodeList = new ArrayList<>();
|
||||
}
|
||||
removedNodeList.add(childNode);
|
||||
} else {
|
||||
processGoogleOffOn(childNode, flag);
|
||||
}
|
||||
}
|
||||
|
||||
if (removedNodeList != null) {
|
||||
removedNodeList.stream().forEach(n -> node.removeChild(n));
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
protected Node pruneNode(final Node node) {
|
||||
final NodeList nodeList = node.getChildNodes();
|
||||
final List<Node> childNodeList = new ArrayList<>();
|
||||
|
@ -573,4 +608,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
|
|||
return fessConfig.getCrawlerDocumentHtmlPrunedTagsAsArray();
|
||||
}
|
||||
|
||||
public void setUseGoogleOffOn(boolean useGoogleOffOn) {
|
||||
this.useGoogleOffOn = useGoogleOffOn;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
|
|||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import org.codelibs.core.misc.ValueHolder;
|
||||
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
|
||||
import org.codelibs.fess.crawler.entity.RequestData;
|
||||
import org.codelibs.fess.crawler.entity.ResponseData;
|
||||
|
@ -110,6 +111,18 @@ public class FessXpathTransformerTest extends UnitFessTestCase {
|
|||
assertFalse(pnString.contains("bar"));
|
||||
}
|
||||
|
||||
public void test_processGoogleOffOn() throws Exception {
|
||||
final String data =
|
||||
"<html><body>foo1<!--googleoff: index-->foo2<a href=\"index.html\">foo3</a>foo4<!--googleon: index-->foo5</body></html>";
|
||||
final Document document = getDocument(data);
|
||||
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
|
||||
final Node pruneNode = transformer.processGoogleOffOn(document, new ValueHolder<>(true));
|
||||
final String output = getXmlString(pruneNode).replaceAll(".*<BODY>", "").replaceAll("</BODY>.*", "");
|
||||
assertEquals("foo1<!--googleoff: index--><A href=\"index.html\"></A><!--googleon: index-->foo5", output);
|
||||
}
|
||||
|
||||
private Document getDocument(final String data) throws Exception {
|
||||
final DOMParser parser = new DOMParser();
|
||||
final ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes("UTF-8"));
|
||||
|
|
Loading…
Add table
Reference in a new issue