fix #179
This commit is contained in:
parent
c7cd4b5025
commit
db85947fe5
3 changed files with 41 additions and 12 deletions
|
@ -66,6 +66,7 @@ import org.slf4j.LoggerFactory;
|
|||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.traversal.NodeIterator;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
||||
|
@ -409,21 +410,31 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
|
||||
protected String getSingleNodeValue(final Document document,
|
||||
final String xpath, final boolean pruned) {
|
||||
Node value = null;
|
||||
StringBuilder buf = null;
|
||||
NodeList list = null;
|
||||
try {
|
||||
value = getXPathAPI().selectSingleNode(document, xpath);
|
||||
list = getXPathAPI().selectNodeList(document, xpath);
|
||||
for (int i = 0; i < list.getLength(); i++) {
|
||||
if (buf == null) {
|
||||
buf = new StringBuilder(1000);
|
||||
} else {
|
||||
buf.append(' ');
|
||||
}
|
||||
Node node = list.item(i);
|
||||
if (pruned) {
|
||||
final Node n = pruneNode(node.cloneNode(true));
|
||||
buf.append(n.getTextContent());
|
||||
} else {
|
||||
buf.append(node.getTextContent());
|
||||
}
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Could not parse a value of " + xpath);
|
||||
}
|
||||
if (value == null) {
|
||||
if (buf == null) {
|
||||
return null;
|
||||
}
|
||||
if (pruned) {
|
||||
final Node node = pruneNode(value.cloneNode(true));
|
||||
return node.getTextContent();
|
||||
} else {
|
||||
return value.getTextContent();
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
protected Node pruneNode(final Node node) {
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
#{"feed:" : "http:"}
|
||||
</property>
|
||||
<!--
|
||||
<property name="cacheXpath">"/HTML/BODY"</property>
|
||||
<property name="contentXpath">"/HTML/BODY"</property>
|
||||
<property name="cacheXpath">"//BODY"</property>
|
||||
<property name="contentXpath">"//BODY"</property>
|
||||
<property name="anchorXpath">"//A/@href"</property>
|
||||
<property name="digestXpath">"/HTML/HEAD/META[@name='description']/@content"</property>
|
||||
<property name="digestXpath">"//META[@name='description']/@content"</property>
|
||||
-->
|
||||
<property name="replaceSiteEncodingWhenEnglish">true</property>
|
||||
<property name="siteEncoding">"UTF-8"</property>
|
||||
|
|
|
@ -295,4 +295,22 @@ public class FessXpathTransformerTest extends S2TestCase {
|
|||
.next());
|
||||
}
|
||||
}
|
||||
|
||||
public void test_contentXpath() throws Exception {
|
||||
final FessXpathTransformer transformer = new FessXpathTransformer();
|
||||
|
||||
String data = "<html><head><meta name=\"keywords\" content=\"bbb\"></head><body>aaa</body></html>";
|
||||
Document document = getDocument(data);
|
||||
String value = transformer
|
||||
.getSingleNodeValue(document, "//BODY", false);
|
||||
assertEquals("aaa", value);
|
||||
|
||||
value = transformer.getSingleNodeValue(document,
|
||||
"//META[@name='keywords']/@content", false);
|
||||
assertEquals("bbb", value);
|
||||
|
||||
value = transformer.getSingleNodeValue(document,
|
||||
"//META[@name='keywords']/@content|//BODY", false);
|
||||
assertEquals("bbb aaa", value);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue