This commit is contained in:
Shinsuke Sugaya 2014-03-08 08:03:25 +09:00
parent 256a0da50e
commit 19f949a0b2
5 changed files with 74 additions and 44 deletions

View file

@ -62,6 +62,6 @@ public interface CrawlingConfig {
}
public enum ConfigName {
CLIENT, XPATH, SCRIPT, FIELD;
CLIENT, XPATH, VALUE, SCRIPT, FIELD;
}
}

View file

@ -97,13 +97,6 @@ public abstract class AbstractFessFileTransformer extends
protected abstract Extractor getExtractor(ResponseData responseData);
protected void putResultDataBody(final Map<String, Object> dataMap,
final String key, final Object value) {
if (!dataMap.containsKey(key)) {
dataMap.put(key, value);
}
}
@Override
public ResultData transform(final ResponseData responseData) {
if (responseData == null || responseData.getResponseBody() == null) {
@ -339,6 +332,17 @@ public abstract class AbstractFessFileTransformer extends
putResultDataBody(dataMap, "url", url); // set again
}
// from config
final Map<String, String> scriptConfigMap = crawlingConfig
.getConfigParameterMap(ConfigName.SCRIPT);
final Map<String, String> valueConfigMap = crawlingConfig
.getConfigParameterMap(ConfigName.VALUE);
for (final Map.Entry<String, String> entry : valueConfigMap.entrySet()) {
final String key = entry.getKey();
putResultDataWithTemplate(dataMap, key, entry.getValue(),
scriptConfigMap.get(key));
}
try {
resultData.setData(SerializeUtil.fromObjectToBinary(dataMap));
} catch (final Exception e) {

View file

@ -17,12 +17,20 @@
package jp.sf.fess.transformer;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.codelibs.core.util.StringUtil;
import org.seasar.framework.util.OgnlUtil;
import org.seasar.robot.transformer.impl.XpathTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class AbstractFessXpathTransformer extends XpathTransformer {
private static final Logger logger = LoggerFactory
.getLogger(AbstractFessXpathTransformer.class);
public int maxSiteLength = 50;
public String unknownHostname = "unknown";
@ -104,4 +112,41 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer {
}
return content.replaceAll("\\s+", " ");
}
protected void putResultDataBody(final Map<String, Object> dataMap,
final String key, final Object value) {
dataMap.put(key, value);
}
protected void putResultDataWithTemplate(final Map<String, Object> dataMap,
final String key, String value, final String template) {
if (template != null) {
final Map<String, Object> paramMap = new HashMap<>(dataMap.size());
paramMap.putAll(dataMap);
paramMap.put("value", value);
value = convertValue(template, paramMap);
}
if (value != null) {
putResultDataBody(dataMap, key, value);
}
}
protected String convertValue(final String template,
final Map<String, Object> paramMap) {
if (StringUtil.isEmpty(template)) {
return StringUtil.EMPTY;
}
try {
final Object exp = OgnlUtil.parseExpression(template);
final Object value = OgnlUtil.getValue(exp, paramMap);
if (value == null) {
return null;
}
return value.toString();
} catch (final Exception e) {
logger.warn("Invalid value format: " + template, e);
return null;
}
}
}

View file

@ -51,7 +51,6 @@ import org.apache.xpath.objects.XObject;
import org.codelibs.core.util.StringUtil;
import org.cyberneko.html.parsers.DOMParser;
import org.seasar.framework.util.InputStreamUtil;
import org.seasar.framework.util.OgnlUtil;
import org.seasar.framework.util.SerializeUtil;
import org.seasar.robot.RobotCrawlAccessException;
import org.seasar.robot.RobotSystemException;
@ -97,11 +96,6 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
public Map<String, String> convertUrlMap = new HashMap<String, String>();
protected void putResultDataBody(final Map<String, Object> dataMap,
final String key, final Object value) {
dataMap.put(key, value);
}
@Override
protected void storeData(final ResponseData responseData,
final ResultData resultData) {
@ -348,38 +342,18 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
final Map<String, String> scriptConfigMap = crawlingConfig
.getConfigParameterMap(ConfigName.SCRIPT);
for (final Map.Entry<String, String> entry : xpathConfigMap.entrySet()) {
String value = getSingleNodeValue(document, entry.getValue(), true);
final String key = entry.getKey();
final String template = scriptConfigMap.get(key);
if (template != null) {
final Map<String, Object> paramMap = new HashMap<>(
dataMap.size());
paramMap.putAll(dataMap);
paramMap.put("value", value);
value = convertValue(template, paramMap);
}
if (value != null) {
putResultDataBody(dataMap, key, value);
}
final String value = getSingleNodeValue(document, entry.getValue(),
true);
putResultDataWithTemplate(dataMap, key, value,
scriptConfigMap.get(key));
}
}
protected String convertValue(final String template,
final Map<String, Object> paramMap) {
if (StringUtil.isEmpty(template)) {
return StringUtil.EMPTY;
}
try {
final Object exp = OgnlUtil.parseExpression(template);
final Object value = OgnlUtil.getValue(exp, paramMap);
if (value == null) {
return null;
}
return value.toString();
} catch (final Exception e) {
logger.warn("Invalid value format: " + template, e);
return null;
final Map<String, String> valueConfigMap = crawlingConfig
.getConfigParameterMap(ConfigName.VALUE);
for (final Map.Entry<String, String> entry : valueConfigMap.entrySet()) {
final String key = entry.getKey();
putResultDataWithTemplate(dataMap, key, entry.getValue(),
scriptConfigMap.get(key));
}
}

View file

@ -27,6 +27,8 @@ import org.codelibs.core.util.StringUtil;
public class ParameterUtil {
protected static final String XPATH_PREFIX = "field.xpath.";
protected static final String VALUE_PREFIX = "field.value.";
protected static final String SCRIPT_PREFIX = "field.script.";
protected static final String CLIENT_PREFIX = "client.";
@ -77,10 +79,12 @@ public class ParameterUtil {
final Map<ConfigName, Map<String, String>> map = new HashMap<>();
final Map<String, String> clientConfigMap = new HashMap<>();
final Map<String, String> xpathConfigMap = new HashMap<>();
final Map<String, String> valueConfigMap = new HashMap<>();
final Map<String, String> scriptConfigMap = new HashMap<>();
final Map<String, String> fieldConfigMap = new HashMap<>();
map.put(ConfigName.CLIENT, clientConfigMap);
map.put(ConfigName.XPATH, xpathConfigMap);
map.put(ConfigName.VALUE, valueConfigMap);
map.put(ConfigName.SCRIPT, scriptConfigMap);
map.put(ConfigName.FIELD, fieldConfigMap);
for (final Map.Entry<String, String> entry : ParameterUtil.parse(
@ -92,6 +96,9 @@ public class ParameterUtil {
} else if (key.startsWith(XPATH_PREFIX)) {
xpathConfigMap.put(key.substring(XPATH_PREFIX.length()),
entry.getValue());
} else if (key.startsWith(VALUE_PREFIX)) {
valueConfigMap.put(key.substring(VALUE_PREFIX.length()),
entry.getValue());
} else if (key.startsWith(SCRIPT_PREFIX)) {
scriptConfigMap.put(key.substring(SCRIPT_PREFIX.length()),
entry.getValue());