Shinsuke Sugaya 11 lat temu
rodzic
commit
29e7c2a34c

+ 12 - 0
src/main/java/jp/sf/fess/db/exentity/CrawlingConfig.java

@@ -16,9 +16,16 @@
 
 package jp.sf.fess.db.exentity;
 
+import java.util.Map;
+
 import org.seasar.robot.client.S2RobotClientFactory;
 
 public interface CrawlingConfig {
+    public static final String XPATH_PREFIX = "field.xpath.";
+
+    public static final String SCRIPT_PREFIX = "field.script.";
+
+    public static final String CLIENT_PREFIX = "client.";
 
     Long getId();
 
@@ -38,6 +45,8 @@ public interface CrawlingConfig {
 
     void initializeClientFactory(S2RobotClientFactory s2RobotClientFactory);
 
+    Map<String, String> getConfigParameterMap(ConfigName name);
+
     public enum ConfigType {
         WEB("W"), FILE("F"), DATA("D");
 
@@ -59,4 +68,7 @@ public interface CrawlingConfig {
         }
     }
 
+    public enum ConfigName {
+        CLIENT, XPATH, SCRIPT;
+    }
 }

+ 6 - 0
src/main/java/jp/sf/fess/db/exentity/DataCrawlingConfig.java

@@ -18,6 +18,7 @@ package jp.sf.fess.db.exentity;
 
 import java.math.BigDecimal;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -383,4 +384,9 @@ public class DataCrawlingConfig extends BsDataCrawlingConfig implements
         }
 
     }
+
+    @Override
+    public Map<String, String> getConfigParameterMap(final ConfigName name) {
+        return Collections.emptyMap();
+    }
 }

+ 42 - 3
src/main/java/jp/sf/fess/db/exentity/FileCrawlingConfig.java

@@ -18,6 +18,7 @@ package jp.sf.fess.db.exentity;
 
 import java.math.BigDecimal;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -60,6 +61,8 @@ public class FileCrawlingConfig extends BsFileCrawlingConfig implements
 
     protected volatile Pattern[] excludedDocPathPatterns;
 
+    protected volatile Map<ConfigName, Map<String, String>> configParameterMap;
+
     public FileCrawlingConfig() {
         super();
         setBoost(BigDecimal.ONE);
@@ -235,9 +238,10 @@ public class FileCrawlingConfig extends BsFileCrawlingConfig implements
         //  Parameters
         final Map<String, Object> paramMap = new HashMap<String, Object>();
         clientFactory.setInitParameterMap(paramMap);
-        final String configParam = getConfigParameter();
-        if (StringUtil.isNotBlank(configParam)) {
-            ParameterUtil.loadConfigParams(paramMap, configParam);
+
+        final Map<String, String> clientConfigMap = getConfigParameterMap(ConfigName.CLIENT);
+        if (clientConfigMap != null) {
+            paramMap.putAll(clientConfigMap);
         }
 
         // auth params
@@ -262,4 +266,39 @@ public class FileCrawlingConfig extends BsFileCrawlingConfig implements
                 smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()]));
 
     }
+
+    @Override
+    public Map<String, String> getConfigParameterMap(final ConfigName name) {
+        if (configParameterMap == null) {
+            final Map<ConfigName, Map<String, String>> map = new HashMap<>();
+            final Map<String, String> clientConfigMap = new HashMap<>();
+            final Map<String, String> xpathConfigMap = new HashMap<>();
+            final Map<String, String> scriptConfigMap = new HashMap<>();
+            map.put(ConfigName.CLIENT, clientConfigMap);
+            map.put(ConfigName.XPATH, xpathConfigMap);
+            map.put(ConfigName.SCRIPT, scriptConfigMap);
+            for (final Map.Entry<String, String> entry : ParameterUtil.parse(
+                    getConfigParameter()).entrySet()) {
+                final String key = entry.getKey();
+                if (key.startsWith(CLIENT_PREFIX)) {
+                    clientConfigMap.put(key.substring(CLIENT_PREFIX.length()),
+                            entry.getValue());
+                } else if (key.startsWith(XPATH_PREFIX)) {
+                    xpathConfigMap.put(key.substring(XPATH_PREFIX.length()),
+                            entry.getValue());
+                } else if (key.startsWith(SCRIPT_PREFIX)) {
+                    scriptConfigMap.put(key.substring(SCRIPT_PREFIX.length()),
+                            entry.getValue());
+                }
+            }
+
+            configParameterMap = map;
+        }
+
+        final Map<String, String> configMap = configParameterMap.get(name);
+        if (configMap == null) {
+            return Collections.emptyMap();
+        }
+        return configMap;
+    }
 }

+ 40 - 3
src/main/java/jp/sf/fess/db/exentity/WebCrawlingConfig.java

@@ -18,6 +18,7 @@ package jp.sf.fess.db.exentity;
 
 import java.math.BigDecimal;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -59,6 +60,8 @@ public class WebCrawlingConfig extends BsWebCrawlingConfig implements
 
     protected volatile Pattern[] excludedDocUrlPatterns;
 
+    protected volatile Map<ConfigName, Map<String, String>> configParameterMap;
+
     public WebCrawlingConfig() {
         super();
         setBoost(BigDecimal.ONE);
@@ -242,9 +245,9 @@ public class WebCrawlingConfig extends BsWebCrawlingConfig implements
         final Map<String, Object> paramMap = new HashMap<String, Object>();
         clientFactory.setInitParameterMap(paramMap);
 
-        final String configParam = getConfigParameter();
-        if (StringUtil.isNotBlank(configParam)) {
-            ParameterUtil.loadConfigParams(paramMap, configParam);
+        final Map<String, String> clientConfigMap = getConfigParameterMap(ConfigName.CLIENT);
+        if (clientConfigMap != null) {
+            paramMap.putAll(clientConfigMap);
         }
 
         final String userAgent = getUserAgent();
@@ -274,4 +277,38 @@ public class WebCrawlingConfig extends BsWebCrawlingConfig implements
 
     }
 
+    @Override
+    public Map<String, String> getConfigParameterMap(final ConfigName name) {
+        if (configParameterMap == null) {
+            final Map<ConfigName, Map<String, String>> map = new HashMap<>();
+            final Map<String, String> clientConfigMap = new HashMap<>();
+            final Map<String, String> xpathConfigMap = new HashMap<>();
+            final Map<String, String> scriptConfigMap = new HashMap<>();
+            map.put(ConfigName.CLIENT, clientConfigMap);
+            map.put(ConfigName.XPATH, xpathConfigMap);
+            map.put(ConfigName.SCRIPT, scriptConfigMap);
+            for (final Map.Entry<String, String> entry : ParameterUtil.parse(
+                    getConfigParameter()).entrySet()) {
+                final String key = entry.getKey();
+                if (key.startsWith(CLIENT_PREFIX)) {
+                    clientConfigMap.put(key.substring(CLIENT_PREFIX.length()),
+                            entry.getValue());
+                } else if (key.startsWith(XPATH_PREFIX)) {
+                    xpathConfigMap.put(key.substring(XPATH_PREFIX.length()),
+                            entry.getValue());
+                } else if (key.startsWith(SCRIPT_PREFIX)) {
+                    scriptConfigMap.put(key.substring(SCRIPT_PREFIX.length()),
+                            entry.getValue());
+                }
+            }
+
+            configParameterMap = map;
+        }
+
+        final Map<String, String> configMap = configParameterMap.get(name);
+        if (configMap == null) {
+            return Collections.emptyMap();
+        }
+        return configMap;
+    }
 }

+ 41 - 0
src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java

@@ -38,6 +38,7 @@ import javax.xml.transform.TransformerException;
 
 import jp.sf.fess.Constants;
 import jp.sf.fess.db.exentity.CrawlingConfig;
+import jp.sf.fess.db.exentity.CrawlingConfig.ConfigName;
 import jp.sf.fess.helper.CrawlingConfigHelper;
 import jp.sf.fess.helper.CrawlingSessionHelper;
 import jp.sf.fess.helper.FileTypeHelper;
@@ -52,6 +53,7 @@ import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.xpath.objects.XObject;
 import org.cyberneko.html.parsers.DOMParser;
+import org.seasar.framework.util.OgnlUtil;
 import org.seasar.framework.util.SerializeUtil;
 import org.seasar.framework.util.StringUtil;
 import org.seasar.robot.RobotCrawlAccessException;
@@ -331,6 +333,45 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
             putResultDataBody(dataMap, "url", url); // set again
         }
 
+        // from config
+        final Map<String, String> xpathConfigMap = crawlingConfig
+                .getConfigParameterMap(ConfigName.XPATH);
+        final Map<String, String> scriptConfigMap = crawlingConfig
+                .getConfigParameterMap(ConfigName.SCRIPT);
+        for (final Map.Entry<String, String> entry : xpathConfigMap.entrySet()) {
+            String value = getSingleNodeValue(document, entry.getValue(), true);
+            final String key = entry.getKey();
+            final String template = scriptConfigMap.get(key);
+            if (template != null) {
+                final Map<String, Object> paramMap = new HashMap<>(
+                        dataMap.size());
+                paramMap.putAll(dataMap);
+                paramMap.put("value", value);
+                value = convertValue(template, paramMap);
+            }
+            if (value != null) {
+                putResultDataBody(dataMap, key, value);
+            }
+        }
+    }
+
+    protected String convertValue(final String template,
+            final Map<String, Object> paramMap) {
+        if (StringUtil.isEmpty(template)) {
+            return Constants.EMPTY_STRING;
+        }
+
+        try {
+            final Object exp = OgnlUtil.parseExpression(template);
+            final Object value = OgnlUtil.getValue(exp, paramMap);
+            if (value == null) {
+                return null;
+            }
+            return value.toString();
+        } catch (final Exception e) {
+            logger.warn("Invalid value format: " + template, e);
+            return null;
+        }
     }
 
     protected String getCanonicalUrl(final ResponseData responseData,