fix #127
This commit is contained in:
parent
256a0da50e
commit
19f949a0b2
5 changed files with 74 additions and 44 deletions
|
@ -62,6 +62,6 @@ public interface CrawlingConfig {
|
|||
}
|
||||
|
||||
public enum ConfigName {
|
||||
CLIENT, XPATH, SCRIPT, FIELD;
|
||||
CLIENT, XPATH, VALUE, SCRIPT, FIELD;
|
||||
}
|
||||
}
|
|
@ -97,13 +97,6 @@ public abstract class AbstractFessFileTransformer extends
|
|||
|
||||
protected abstract Extractor getExtractor(ResponseData responseData);
|
||||
|
||||
protected void putResultDataBody(final Map<String, Object> dataMap,
|
||||
final String key, final Object value) {
|
||||
if (!dataMap.containsKey(key)) {
|
||||
dataMap.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultData transform(final ResponseData responseData) {
|
||||
if (responseData == null || responseData.getResponseBody() == null) {
|
||||
|
@ -339,6 +332,17 @@ public abstract class AbstractFessFileTransformer extends
|
|||
putResultDataBody(dataMap, "url", url); // set again
|
||||
}
|
||||
|
||||
// from config
|
||||
final Map<String, String> scriptConfigMap = crawlingConfig
|
||||
.getConfigParameterMap(ConfigName.SCRIPT);
|
||||
final Map<String, String> valueConfigMap = crawlingConfig
|
||||
.getConfigParameterMap(ConfigName.VALUE);
|
||||
for (final Map.Entry<String, String> entry : valueConfigMap.entrySet()) {
|
||||
final String key = entry.getKey();
|
||||
putResultDataWithTemplate(dataMap, key, entry.getValue(),
|
||||
scriptConfigMap.get(key));
|
||||
}
|
||||
|
||||
try {
|
||||
resultData.setData(SerializeUtil.fromObjectToBinary(dataMap));
|
||||
} catch (final Exception e) {
|
||||
|
|
|
@ -17,12 +17,20 @@
|
|||
package jp.sf.fess.transformer;
|
||||
|
||||
import java.net.URLDecoder;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.codelibs.core.util.StringUtil;
|
||||
import org.seasar.framework.util.OgnlUtil;
|
||||
import org.seasar.robot.transformer.impl.XpathTransformer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
||||
private static final Logger logger = LoggerFactory
|
||||
.getLogger(AbstractFessXpathTransformer.class);
|
||||
|
||||
public int maxSiteLength = 50;
|
||||
|
||||
public String unknownHostname = "unknown";
|
||||
|
@ -104,4 +112,41 @@ public abstract class AbstractFessXpathTransformer extends XpathTransformer {
|
|||
}
|
||||
return content.replaceAll("\\s+", " ");
|
||||
}
|
||||
|
||||
protected void putResultDataBody(final Map<String, Object> dataMap,
|
||||
final String key, final Object value) {
|
||||
dataMap.put(key, value);
|
||||
}
|
||||
|
||||
protected void putResultDataWithTemplate(final Map<String, Object> dataMap,
|
||||
final String key, String value, final String template) {
|
||||
if (template != null) {
|
||||
final Map<String, Object> paramMap = new HashMap<>(dataMap.size());
|
||||
paramMap.putAll(dataMap);
|
||||
paramMap.put("value", value);
|
||||
value = convertValue(template, paramMap);
|
||||
}
|
||||
if (value != null) {
|
||||
putResultDataBody(dataMap, key, value);
|
||||
}
|
||||
}
|
||||
|
||||
protected String convertValue(final String template,
|
||||
final Map<String, Object> paramMap) {
|
||||
if (StringUtil.isEmpty(template)) {
|
||||
return StringUtil.EMPTY;
|
||||
}
|
||||
|
||||
try {
|
||||
final Object exp = OgnlUtil.parseExpression(template);
|
||||
final Object value = OgnlUtil.getValue(exp, paramMap);
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
return value.toString();
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Invalid value format: " + template, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -51,7 +51,6 @@ import org.apache.xpath.objects.XObject;
|
|||
import org.codelibs.core.util.StringUtil;
|
||||
import org.cyberneko.html.parsers.DOMParser;
|
||||
import org.seasar.framework.util.InputStreamUtil;
|
||||
import org.seasar.framework.util.OgnlUtil;
|
||||
import org.seasar.framework.util.SerializeUtil;
|
||||
import org.seasar.robot.RobotCrawlAccessException;
|
||||
import org.seasar.robot.RobotSystemException;
|
||||
|
@ -97,11 +96,6 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
|
||||
public Map<String, String> convertUrlMap = new HashMap<String, String>();
|
||||
|
||||
protected void putResultDataBody(final Map<String, Object> dataMap,
|
||||
final String key, final Object value) {
|
||||
dataMap.put(key, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void storeData(final ResponseData responseData,
|
||||
final ResultData resultData) {
|
||||
|
@ -348,38 +342,18 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
final Map<String, String> scriptConfigMap = crawlingConfig
|
||||
.getConfigParameterMap(ConfigName.SCRIPT);
|
||||
for (final Map.Entry<String, String> entry : xpathConfigMap.entrySet()) {
|
||||
String value = getSingleNodeValue(document, entry.getValue(), true);
|
||||
final String key = entry.getKey();
|
||||
final String template = scriptConfigMap.get(key);
|
||||
if (template != null) {
|
||||
final Map<String, Object> paramMap = new HashMap<>(
|
||||
dataMap.size());
|
||||
paramMap.putAll(dataMap);
|
||||
paramMap.put("value", value);
|
||||
value = convertValue(template, paramMap);
|
||||
}
|
||||
if (value != null) {
|
||||
putResultDataBody(dataMap, key, value);
|
||||
}
|
||||
final String value = getSingleNodeValue(document, entry.getValue(),
|
||||
true);
|
||||
putResultDataWithTemplate(dataMap, key, value,
|
||||
scriptConfigMap.get(key));
|
||||
}
|
||||
}
|
||||
|
||||
protected String convertValue(final String template,
|
||||
final Map<String, Object> paramMap) {
|
||||
if (StringUtil.isEmpty(template)) {
|
||||
return StringUtil.EMPTY;
|
||||
}
|
||||
|
||||
try {
|
||||
final Object exp = OgnlUtil.parseExpression(template);
|
||||
final Object value = OgnlUtil.getValue(exp, paramMap);
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
return value.toString();
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Invalid value format: " + template, e);
|
||||
return null;
|
||||
final Map<String, String> valueConfigMap = crawlingConfig
|
||||
.getConfigParameterMap(ConfigName.VALUE);
|
||||
for (final Map.Entry<String, String> entry : valueConfigMap.entrySet()) {
|
||||
final String key = entry.getKey();
|
||||
putResultDataWithTemplate(dataMap, key, entry.getValue(),
|
||||
scriptConfigMap.get(key));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,6 +27,8 @@ import org.codelibs.core.util.StringUtil;
|
|||
public class ParameterUtil {
|
||||
protected static final String XPATH_PREFIX = "field.xpath.";
|
||||
|
||||
protected static final String VALUE_PREFIX = "field.value.";
|
||||
|
||||
protected static final String SCRIPT_PREFIX = "field.script.";
|
||||
|
||||
protected static final String CLIENT_PREFIX = "client.";
|
||||
|
@ -77,10 +79,12 @@ public class ParameterUtil {
|
|||
final Map<ConfigName, Map<String, String>> map = new HashMap<>();
|
||||
final Map<String, String> clientConfigMap = new HashMap<>();
|
||||
final Map<String, String> xpathConfigMap = new HashMap<>();
|
||||
final Map<String, String> valueConfigMap = new HashMap<>();
|
||||
final Map<String, String> scriptConfigMap = new HashMap<>();
|
||||
final Map<String, String> fieldConfigMap = new HashMap<>();
|
||||
map.put(ConfigName.CLIENT, clientConfigMap);
|
||||
map.put(ConfigName.XPATH, xpathConfigMap);
|
||||
map.put(ConfigName.VALUE, valueConfigMap);
|
||||
map.put(ConfigName.SCRIPT, scriptConfigMap);
|
||||
map.put(ConfigName.FIELD, fieldConfigMap);
|
||||
for (final Map.Entry<String, String> entry : ParameterUtil.parse(
|
||||
|
@ -92,6 +96,9 @@ public class ParameterUtil {
|
|||
} else if (key.startsWith(XPATH_PREFIX)) {
|
||||
xpathConfigMap.put(key.substring(XPATH_PREFIX.length()),
|
||||
entry.getValue());
|
||||
} else if (key.startsWith(VALUE_PREFIX)) {
|
||||
valueConfigMap.put(key.substring(VALUE_PREFIX.length()),
|
||||
entry.getValue());
|
||||
} else if (key.startsWith(SCRIPT_PREFIX)) {
|
||||
scriptConfigMap.put(key.substring(SCRIPT_PREFIX.length()),
|
||||
entry.getValue());
|
||||
|
|
Loading…
Add table
Reference in a new issue