diff --git a/src/main/java/jp/sf/fess/db/exentity/CrawlingConfig.java b/src/main/java/jp/sf/fess/db/exentity/CrawlingConfig.java index 81c5ee546..4208426b4 100644 --- a/src/main/java/jp/sf/fess/db/exentity/CrawlingConfig.java +++ b/src/main/java/jp/sf/fess/db/exentity/CrawlingConfig.java @@ -62,6 +62,6 @@ public interface CrawlingConfig { } public enum ConfigName { - CLIENT, XPATH, SCRIPT, FIELD; + CLIENT, XPATH, VALUE, SCRIPT, FIELD; } } \ No newline at end of file diff --git a/src/main/java/jp/sf/fess/transformer/AbstractFessFileTransformer.java b/src/main/java/jp/sf/fess/transformer/AbstractFessFileTransformer.java index c8877d730..4b4225d0c 100644 --- a/src/main/java/jp/sf/fess/transformer/AbstractFessFileTransformer.java +++ b/src/main/java/jp/sf/fess/transformer/AbstractFessFileTransformer.java @@ -97,13 +97,6 @@ public abstract class AbstractFessFileTransformer extends protected abstract Extractor getExtractor(ResponseData responseData); - protected void putResultDataBody(final Map dataMap, - final String key, final Object value) { - if (!dataMap.containsKey(key)) { - dataMap.put(key, value); - } - } - @Override public ResultData transform(final ResponseData responseData) { if (responseData == null || responseData.getResponseBody() == null) { @@ -339,6 +332,17 @@ public abstract class AbstractFessFileTransformer extends putResultDataBody(dataMap, "url", url); // set again } + // from config + final Map scriptConfigMap = crawlingConfig + .getConfigParameterMap(ConfigName.SCRIPT); + final Map valueConfigMap = crawlingConfig + .getConfigParameterMap(ConfigName.VALUE); + for (final Map.Entry entry : valueConfigMap.entrySet()) { + final String key = entry.getKey(); + putResultDataWithTemplate(dataMap, key, entry.getValue(), + scriptConfigMap.get(key)); + } + try { resultData.setData(SerializeUtil.fromObjectToBinary(dataMap)); } catch (final Exception e) { diff --git a/src/main/java/jp/sf/fess/transformer/AbstractFessXpathTransformer.java b/src/main/java/jp/sf/fess/transformer/AbstractFessXpathTransformer.java index 289142b52..8172a4694 100644 --- a/src/main/java/jp/sf/fess/transformer/AbstractFessXpathTransformer.java +++ b/src/main/java/jp/sf/fess/transformer/AbstractFessXpathTransformer.java @@ -17,12 +17,20 @@ package jp.sf.fess.transformer; import java.net.URLDecoder; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.lang.StringUtils; import org.codelibs.core.util.StringUtil; +import org.seasar.framework.util.OgnlUtil; import org.seasar.robot.transformer.impl.XpathTransformer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public abstract class AbstractFessXpathTransformer extends XpathTransformer { + private static final Logger logger = LoggerFactory + .getLogger(AbstractFessXpathTransformer.class); + public int maxSiteLength = 50; public String unknownHostname = "unknown"; @@ -104,4 +112,41 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer { } return content.replaceAll("\\s+", " "); } + + protected void putResultDataBody(final Map dataMap, + final String key, final Object value) { + dataMap.put(key, value); + } + + protected void putResultDataWithTemplate(final Map dataMap, + final String key, String value, final String template) { + if (template != null) { + final Map paramMap = new HashMap<>(dataMap.size()); + paramMap.putAll(dataMap); + paramMap.put("value", value); + value = convertValue(template, paramMap); + } + if (value != null) { + putResultDataBody(dataMap, key, value); + } + } + + protected String convertValue(final String template, + final Map paramMap) { + if (StringUtil.isEmpty(template)) { + return StringUtil.EMPTY; + } + + try { + final Object exp = OgnlUtil.parseExpression(template); + final Object value = OgnlUtil.getValue(exp, paramMap); + if (value == null) { + return null; + } + return value.toString(); + } catch (final Exception e) { + logger.warn("Invalid value format: " + template, e); + return null; + } + } } \ No newline at end of file diff --git a/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java b/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java index c5c57d5df..34ccbb4e0 100644 --- a/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java +++ b/src/main/java/jp/sf/fess/transformer/FessXpathTransformer.java @@ -51,7 +51,6 @@ import org.apache.xpath.objects.XObject; import org.codelibs.core.util.StringUtil; import org.cyberneko.html.parsers.DOMParser; import org.seasar.framework.util.InputStreamUtil; -import org.seasar.framework.util.OgnlUtil; import org.seasar.framework.util.SerializeUtil; import org.seasar.robot.RobotCrawlAccessException; import org.seasar.robot.RobotSystemException; @@ -97,11 +96,6 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { public Map convertUrlMap = new HashMap(); - protected void putResultDataBody(final Map dataMap, - final String key, final Object value) { - dataMap.put(key, value); - } - @Override protected void storeData(final ResponseData responseData, final ResultData resultData) { @@ -348,38 +342,18 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer { final Map scriptConfigMap = crawlingConfig .getConfigParameterMap(ConfigName.SCRIPT); for (final Map.Entry entry : xpathConfigMap.entrySet()) { - String value = getSingleNodeValue(document, entry.getValue(), true); final String key = entry.getKey(); - final String template = scriptConfigMap.get(key); - if (template != null) { - final Map paramMap = new HashMap<>( - dataMap.size()); - paramMap.putAll(dataMap); - paramMap.put("value", value); - value = convertValue(template, paramMap); - } - if (value != null) { - putResultDataBody(dataMap, key, value); - } + final String value = getSingleNodeValue(document, entry.getValue(), + true); + putResultDataWithTemplate(dataMap, key, value, + scriptConfigMap.get(key)); } - } - - protected String convertValue(final String template, - final Map paramMap) { - if (StringUtil.isEmpty(template)) { - return StringUtil.EMPTY; - } - - try { - final Object exp = OgnlUtil.parseExpression(template); - final Object value = OgnlUtil.getValue(exp, paramMap); - if (value == null) { - return null; - } - return value.toString(); - } catch (final Exception e) { - logger.warn("Invalid value format: " + template, e); - return null; + final Map valueConfigMap = crawlingConfig + .getConfigParameterMap(ConfigName.VALUE); + for (final Map.Entry entry : valueConfigMap.entrySet()) { + final String key = entry.getKey(); + putResultDataWithTemplate(dataMap, key, entry.getValue(), + scriptConfigMap.get(key)); } } diff --git a/src/main/java/jp/sf/fess/util/ParameterUtil.java b/src/main/java/jp/sf/fess/util/ParameterUtil.java index 5d4d29bb6..a8a45fbb6 100644 --- a/src/main/java/jp/sf/fess/util/ParameterUtil.java +++ b/src/main/java/jp/sf/fess/util/ParameterUtil.java @@ -27,6 +27,8 @@ import org.codelibs.core.util.StringUtil; public class ParameterUtil { protected static final String XPATH_PREFIX = "field.xpath."; + protected static final String VALUE_PREFIX = "field.value."; + protected static final String SCRIPT_PREFIX = "field.script."; protected static final String CLIENT_PREFIX = "client."; @@ -77,10 +79,12 @@ public class ParameterUtil { final Map> map = new HashMap<>(); final Map clientConfigMap = new HashMap<>(); final Map xpathConfigMap = new HashMap<>(); + final Map valueConfigMap = new HashMap<>(); final Map scriptConfigMap = new HashMap<>(); final Map fieldConfigMap = new HashMap<>(); map.put(ConfigName.CLIENT, clientConfigMap); map.put(ConfigName.XPATH, xpathConfigMap); + map.put(ConfigName.VALUE, valueConfigMap); map.put(ConfigName.SCRIPT, scriptConfigMap); map.put(ConfigName.FIELD, fieldConfigMap); for (final Map.Entry entry : ParameterUtil.parse( @@ -92,6 +96,9 @@ public class ParameterUtil { } else if (key.startsWith(XPATH_PREFIX)) { xpathConfigMap.put(key.substring(XPATH_PREFIX.length()), entry.getValue()); + } else if (key.startsWith(VALUE_PREFIX)) { + valueConfigMap.put(key.substring(VALUE_PREFIX.length()), + entry.getValue()); } else if (key.startsWith(SCRIPT_PREFIX)) { scriptConfigMap.put(key.substring(SCRIPT_PREFIX.length()), entry.getValue());