fix #122
This commit is contained in:
parent
a637c889f8
commit
29e7c2a34c
5 changed files with 141 additions and 6 deletions
|
@ -16,9 +16,16 @@
|
|||
|
||||
package jp.sf.fess.db.exentity;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.seasar.robot.client.S2RobotClientFactory;
|
||||
|
||||
public interface CrawlingConfig {
|
||||
public static final String XPATH_PREFIX = "field.xpath.";
|
||||
|
||||
public static final String SCRIPT_PREFIX = "field.script.";
|
||||
|
||||
public static final String CLIENT_PREFIX = "client.";
|
||||
|
||||
Long getId();
|
||||
|
||||
|
@ -38,6 +45,8 @@ public interface CrawlingConfig {
|
|||
|
||||
void initializeClientFactory(S2RobotClientFactory s2RobotClientFactory);
|
||||
|
||||
Map<String, String> getConfigParameterMap(ConfigName name);
|
||||
|
||||
public enum ConfigType {
|
||||
WEB("W"), FILE("F"), DATA("D");
|
||||
|
||||
|
@ -59,4 +68,7 @@ public interface CrawlingConfig {
|
|||
}
|
||||
}
|
||||
|
||||
public enum ConfigName {
|
||||
CLIENT, XPATH, SCRIPT;
|
||||
}
|
||||
}
|
|
@ -18,6 +18,7 @@ package jp.sf.fess.db.exentity;
|
|||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -383,4 +384,9 @@ public class DataCrawlingConfig extends BsDataCrawlingConfig implements
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getConfigParameterMap(final ConfigName name) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package jp.sf.fess.db.exentity;
|
|||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -60,6 +61,8 @@ public class FileCrawlingConfig extends BsFileCrawlingConfig implements
|
|||
|
||||
protected volatile Pattern[] excludedDocPathPatterns;
|
||||
|
||||
protected volatile Map<ConfigName, Map<String, String>> configParameterMap;
|
||||
|
||||
public FileCrawlingConfig() {
|
||||
super();
|
||||
setBoost(BigDecimal.ONE);
|
||||
|
@ -235,9 +238,10 @@ public class FileCrawlingConfig extends BsFileCrawlingConfig implements
|
|||
// Parameters
|
||||
final Map<String, Object> paramMap = new HashMap<String, Object>();
|
||||
clientFactory.setInitParameterMap(paramMap);
|
||||
final String configParam = getConfigParameter();
|
||||
if (StringUtil.isNotBlank(configParam)) {
|
||||
ParameterUtil.loadConfigParams(paramMap, configParam);
|
||||
|
||||
final Map<String, String> clientConfigMap = getConfigParameterMap(ConfigName.CLIENT);
|
||||
if (clientConfigMap != null) {
|
||||
paramMap.putAll(clientConfigMap);
|
||||
}
|
||||
|
||||
// auth params
|
||||
|
@ -262,4 +266,39 @@ public class FileCrawlingConfig extends BsFileCrawlingConfig implements
|
|||
smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()]));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getConfigParameterMap(final ConfigName name) {
|
||||
if (configParameterMap == null) {
|
||||
final Map<ConfigName, Map<String, String>> map = new HashMap<>();
|
||||
final Map<String, String> clientConfigMap = new HashMap<>();
|
||||
final Map<String, String> xpathConfigMap = new HashMap<>();
|
||||
final Map<String, String> scriptConfigMap = new HashMap<>();
|
||||
map.put(ConfigName.CLIENT, clientConfigMap);
|
||||
map.put(ConfigName.XPATH, xpathConfigMap);
|
||||
map.put(ConfigName.SCRIPT, scriptConfigMap);
|
||||
for (final Map.Entry<String, String> entry : ParameterUtil.parse(
|
||||
getConfigParameter()).entrySet()) {
|
||||
final String key = entry.getKey();
|
||||
if (key.startsWith(CLIENT_PREFIX)) {
|
||||
clientConfigMap.put(key.substring(CLIENT_PREFIX.length()),
|
||||
entry.getValue());
|
||||
} else if (key.startsWith(XPATH_PREFIX)) {
|
||||
xpathConfigMap.put(key.substring(XPATH_PREFIX.length()),
|
||||
entry.getValue());
|
||||
} else if (key.startsWith(SCRIPT_PREFIX)) {
|
||||
scriptConfigMap.put(key.substring(SCRIPT_PREFIX.length()),
|
||||
entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
configParameterMap = map;
|
||||
}
|
||||
|
||||
final Map<String, String> configMap = configParameterMap.get(name);
|
||||
if (configMap == null) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
return configMap;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package jp.sf.fess.db.exentity;
|
|||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -59,6 +60,8 @@ public class WebCrawlingConfig extends BsWebCrawlingConfig implements
|
|||
|
||||
protected volatile Pattern[] excludedDocUrlPatterns;
|
||||
|
||||
protected volatile Map<ConfigName, Map<String, String>> configParameterMap;
|
||||
|
||||
public WebCrawlingConfig() {
|
||||
super();
|
||||
setBoost(BigDecimal.ONE);
|
||||
|
@ -242,9 +245,9 @@ public class WebCrawlingConfig extends BsWebCrawlingConfig implements
|
|||
final Map<String, Object> paramMap = new HashMap<String, Object>();
|
||||
clientFactory.setInitParameterMap(paramMap);
|
||||
|
||||
final String configParam = getConfigParameter();
|
||||
if (StringUtil.isNotBlank(configParam)) {
|
||||
ParameterUtil.loadConfigParams(paramMap, configParam);
|
||||
final Map<String, String> clientConfigMap = getConfigParameterMap(ConfigName.CLIENT);
|
||||
if (clientConfigMap != null) {
|
||||
paramMap.putAll(clientConfigMap);
|
||||
}
|
||||
|
||||
final String userAgent = getUserAgent();
|
||||
|
@ -274,4 +277,38 @@ public class WebCrawlingConfig extends BsWebCrawlingConfig implements
|
|||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getConfigParameterMap(final ConfigName name) {
|
||||
if (configParameterMap == null) {
|
||||
final Map<ConfigName, Map<String, String>> map = new HashMap<>();
|
||||
final Map<String, String> clientConfigMap = new HashMap<>();
|
||||
final Map<String, String> xpathConfigMap = new HashMap<>();
|
||||
final Map<String, String> scriptConfigMap = new HashMap<>();
|
||||
map.put(ConfigName.CLIENT, clientConfigMap);
|
||||
map.put(ConfigName.XPATH, xpathConfigMap);
|
||||
map.put(ConfigName.SCRIPT, scriptConfigMap);
|
||||
for (final Map.Entry<String, String> entry : ParameterUtil.parse(
|
||||
getConfigParameter()).entrySet()) {
|
||||
final String key = entry.getKey();
|
||||
if (key.startsWith(CLIENT_PREFIX)) {
|
||||
clientConfigMap.put(key.substring(CLIENT_PREFIX.length()),
|
||||
entry.getValue());
|
||||
} else if (key.startsWith(XPATH_PREFIX)) {
|
||||
xpathConfigMap.put(key.substring(XPATH_PREFIX.length()),
|
||||
entry.getValue());
|
||||
} else if (key.startsWith(SCRIPT_PREFIX)) {
|
||||
scriptConfigMap.put(key.substring(SCRIPT_PREFIX.length()),
|
||||
entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
configParameterMap = map;
|
||||
}
|
||||
|
||||
final Map<String, String> configMap = configParameterMap.get(name);
|
||||
if (configMap == null) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
return configMap;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ import javax.xml.transform.TransformerException;
|
|||
|
||||
import jp.sf.fess.Constants;
|
||||
import jp.sf.fess.db.exentity.CrawlingConfig;
|
||||
import jp.sf.fess.db.exentity.CrawlingConfig.ConfigName;
|
||||
import jp.sf.fess.helper.CrawlingConfigHelper;
|
||||
import jp.sf.fess.helper.CrawlingSessionHelper;
|
||||
import jp.sf.fess.helper.FileTypeHelper;
|
||||
|
@ -52,6 +53,7 @@ import org.apache.commons.io.IOUtils;
|
|||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.xpath.objects.XObject;
|
||||
import org.cyberneko.html.parsers.DOMParser;
|
||||
import org.seasar.framework.util.OgnlUtil;
|
||||
import org.seasar.framework.util.SerializeUtil;
|
||||
import org.seasar.framework.util.StringUtil;
|
||||
import org.seasar.robot.RobotCrawlAccessException;
|
||||
|
@ -331,6 +333,45 @@ public class FessXpathTransformer extends AbstractFessXpathTransformer {
|
|||
putResultDataBody(dataMap, "url", url); // set again
|
||||
}
|
||||
|
||||
// from config
|
||||
final Map<String, String> xpathConfigMap = crawlingConfig
|
||||
.getConfigParameterMap(ConfigName.XPATH);
|
||||
final Map<String, String> scriptConfigMap = crawlingConfig
|
||||
.getConfigParameterMap(ConfigName.SCRIPT);
|
||||
for (final Map.Entry<String, String> entry : xpathConfigMap.entrySet()) {
|
||||
String value = getSingleNodeValue(document, entry.getValue(), true);
|
||||
final String key = entry.getKey();
|
||||
final String template = scriptConfigMap.get(key);
|
||||
if (template != null) {
|
||||
final Map<String, Object> paramMap = new HashMap<>(
|
||||
dataMap.size());
|
||||
paramMap.putAll(dataMap);
|
||||
paramMap.put("value", value);
|
||||
value = convertValue(template, paramMap);
|
||||
}
|
||||
if (value != null) {
|
||||
putResultDataBody(dataMap, key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected String convertValue(final String template,
|
||||
final Map<String, Object> paramMap) {
|
||||
if (StringUtil.isEmpty(template)) {
|
||||
return Constants.EMPTY_STRING;
|
||||
}
|
||||
|
||||
try {
|
||||
final Object exp = OgnlUtil.parseExpression(template);
|
||||
final Object value = OgnlUtil.getValue(exp, paramMap);
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
return value.toString();
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Invalid value format: " + template, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
protected String getCanonicalUrl(final ResponseData responseData,
|
||||
|
|
Loading…
Add table
Reference in a new issue