瀏覽代碼

fix #1959 add isPruned option

Shinsuke Sugaya 6 年之前
父節點
當前提交
8c3c76c22a

+ 12 - 1
src/main/java/org/codelibs/fess/crawler/transformer/FessXpathTransformer.java

@@ -106,6 +106,8 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
 
     protected boolean useGoogleOffOn = true;
 
+    protected Map<String, Boolean> fieldPrunedRuleMap = new HashMap<>();
+
     @PostConstruct
     public void init() {
         fessConfig = ComponentUtil.getFessConfig();
@@ -170,7 +172,11 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
                 case XObject.CLASS_RTREEFRAG:
                 case XObject.CLASS_UNRESOLVEDVARIABLE:
                 default:
-                    final Node value = getXPathAPI().selectSingleNode(document, entry.getValue());
+                    final Boolean isPruned = fieldPrunedRuleMap.get(entry.getKey());
+                    Node value = getXPathAPI().selectSingleNode(document, entry.getValue());
+                    if (isPruned != null && isPruned.booleanValue()) {
+                        value = pruneNode(value);
+                    }
                     putResultDataBody(dataMap, entry.getKey(), value != null ? value.getTextContent() : null);
                     break;
                 }
@@ -913,4 +919,9 @@ public class FessXpathTransformer extends XpathTransformer implements FessTransf
         }
         return null;
     }
+
+    public void addFieldRule(final String name, final String xpath, final boolean isPruned) {
+        addFieldRule(name, xpath);
+        fieldPrunedRuleMap.put(name, isPruned);
+    }
 }

+ 2 - 0
src/main/resources/crawler/transformer.xml

@@ -19,10 +19,12 @@
 		<postConstruct name="addFieldRule">
 			<arg>"title"</arg>
 			<arg>"//TITLE"</arg>
+			<arg>true</arg>
 		</postConstruct>
 		<postConstruct name="addFieldRule">
 			<arg>"important_content"</arg>
 			<arg>"//*[self::H1 or self::H2 or self::H3]"</arg>
+			<arg>true</arg>
 		</postConstruct>
 	</component>