This commit is contained in:
Shinsuke Sugaya 2014-07-05 07:41:01 +09:00
parent c7cd4b5025
commit db85947fe5
3 changed files with 41 additions and 12 deletions

View file

@ -66,6 +66,7 @@ import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.traversal.NodeIterator;
import org.xml.sax.InputSource;
public class FessXpathTransformer extends AbstractFessXpathTransformer {
@ -409,21 +410,31 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
protected String getSingleNodeValue(final Document document,
final String xpath, final boolean pruned) {
Node value = null;
StringBuilder buf = null;
NodeList list = null;
try {
value = getXPathAPI().selectSingleNode(document, xpath);
list = getXPathAPI().selectNodeList(document, xpath);
for (int i = 0; i < list.getLength(); i++) {
if (buf == null) {
buf = new StringBuilder(1000);
} else {
buf.append(' ');
}
Node node = list.item(i);
if (pruned) {
final Node n = pruneNode(node.cloneNode(true));
buf.append(n.getTextContent());
} else {
buf.append(node.getTextContent());
}
}
} catch (final Exception e) {
logger.warn("Could not parse a value of " + xpath);
}
if (value == null) {
if (buf == null) {
return null;
}
if (pruned) {
final Node node = pruneNode(value.cloneNode(true));
return node.getTextContent();
} else {
return value.getTextContent();
}
return buf.toString();
}
protected Node pruneNode(final Node node) {

View file

@ -16,10 +16,10 @@
#{"feed:" : "http:"}
</property>
<!--
<property name="cacheXpath">"/HTML/BODY"</property>
<property name="contentXpath">"/HTML/BODY"</property>
<property name="cacheXpath">"//BODY"</property>
<property name="contentXpath">"//BODY"</property>
<property name="anchorXpath">"//A/@href"</property>
<property name="digestXpath">"/HTML/HEAD/META[@name='description']/@content"</property>
<property name="digestXpath">"//META[@name='description']/@content"</property>
-->
<property name="replaceSiteEncodingWhenEnglish">true</property>
<property name="siteEncoding">"UTF-8"</property>

View file

@ -295,4 +295,22 @@ public class FessXpathTransformerTest extends S2TestCase {
.next());
}
}
public void test_contentXpath() throws Exception {
final FessXpathTransformer transformer = new FessXpathTransformer();
String data = "<html><head><meta name=\"keywords\" content=\"bbb\"></head><body>aaa</body></html>";
Document document = getDocument(data);
String value = transformer
.getSingleNodeValue(document, "//BODY", false);
assertEquals("aaa", value);
value = transformer.getSingleNodeValue(document,
"//META[@name='keywords']/@content", false);
assertEquals("bbb", value);
value = transformer.getSingleNodeValue(document,
"//META[@name='keywords']/@content|//BODY", false);
assertEquals("bbb aaa", value);
}
}