#23 initialize S2RobotClientFactory

This commit is contained in:
Shinsuke Sugaya 2013-10-14 21:36:05 +09:00
parent e56083224e
commit 15b76c3c3a
8 changed files with 359 additions and 296 deletions

View file

@ -16,6 +16,8 @@
package jp.sf.fess.db.exentity;
import org.seasar.robot.client.S2RobotClientFactory;
public interface CrawlingConfig {
Long getId();
@ -31,4 +33,6 @@ public interface CrawlingConfig {
String getIndexingTarget(String input);
String getConfigId();
void initializeClientFactory(S2RobotClientFactory s2RobotClientFactory);
}

View file

@ -18,13 +18,33 @@ package jp.sf.fess.db.exentity;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import jp.sf.fess.Constants;
import jp.sf.fess.db.bsentity.BsDataCrawlingConfig;
import jp.sf.fess.util.ParameterUtil;
import org.apache.http.auth.AuthScheme;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.NTCredentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.auth.DigestScheme;
import org.apache.http.impl.auth.NTLMScheme;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.client.S2RobotClientFactory;
import org.seasar.robot.client.http.Authentication;
import org.seasar.robot.client.http.HcHttpClient;
import org.seasar.robot.client.http.impl.AuthenticationImpl;
import org.seasar.robot.client.http.ntlm.JcifsEngine;
import org.seasar.robot.client.smb.SmbAuthentication;
import org.seasar.robot.client.smb.SmbClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The entity of DATA_CRAWLING_CONFIG.
@ -40,6 +60,19 @@ public class DataCrawlingConfig extends BsDataCrawlingConfig implements
/** Serial version UID. (Default) */
private static final long serialVersionUID = 1L;
private static final Logger logger = LoggerFactory
.getLogger(DataCrawlingConfig.class);
private static final String S2ROBOT_WEB_HEADER_PREFIX = "s2robot.web.header.";
private static final String S2ROBOT_WEB_AUTH = "s2robot.web.auth";
private static final String S2ROBOT_USERAGENT = "s2robot.useragent";
private static final String S2ROBOT_PARAM_PREFIX = "s2robot.param.";
private static final Object S2ROBOT_FILE_AUTH = "s2robot.file.auth";
private String[] browserTypeIds;
private String[] labelTypeIds;
@ -50,6 +83,10 @@ public class DataCrawlingConfig extends BsDataCrawlingConfig implements
protected Pattern[] excludedDocPathPatterns;
private Map<String, String> handlerParameterMap;
private Map<String, String> handlerScriptMap;
public DataCrawlingConfig() {
super();
setBoost(BigDecimal.ONE);
@ -158,4 +195,195 @@ public class DataCrawlingConfig extends BsDataCrawlingConfig implements
}
return null;
}
public Map<String, String> getHandlerParameterMap() {
if (handlerParameterMap == null) {
handlerParameterMap = ParameterUtil.parse(getHandlerParameter());
}
return handlerParameterMap;
}
public Map<String, String> getHandlerScriptMap() {
if (handlerScriptMap == null) {
handlerScriptMap = ParameterUtil.parse(getHandlerScript());
}
return handlerScriptMap;
}
@Override
public void initializeClientFactory(
final S2RobotClientFactory robotClientFactory) {
final Map<String, String> paramMap = getHandlerParameterMap();
final Map<String, Object> factoryParamMap = new HashMap<String, Object>();
robotClientFactory.setInitParameterMap(factoryParamMap);
// parameters
for (final Map.Entry<String, String> entry : paramMap.entrySet()) {
final String key = entry.getKey();
if (key.startsWith(S2ROBOT_PARAM_PREFIX)) {
factoryParamMap.put(
key.substring(S2ROBOT_PARAM_PREFIX.length()),
entry.getValue());
}
}
// user agent
final String userAgent = paramMap.get(S2ROBOT_USERAGENT);
if (StringUtil.isNotBlank(userAgent)) {
factoryParamMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
}
// web auth
final String webAuthStr = paramMap.get(S2ROBOT_WEB_AUTH);
if (StringUtil.isNotBlank(webAuthStr)) {
final String[] webAuthNames = webAuthStr.split(",");
final List<Authentication> basicAuthList = new ArrayList<Authentication>();
for (final String webAuthName : webAuthNames) {
final String scheme = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".scheme");
final String hostname = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".host");
final String port = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".port");
final String realm = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".realm");
final String username = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".username");
final String password = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".password");
if (StringUtil.isEmpty(username)) {
logger.warn("username is empty. webAuth:" + webAuthName);
continue;
}
AuthScheme authScheme = null;
if (Constants.BASIC.equals(scheme)) {
authScheme = new BasicScheme();
} else if (Constants.DIGEST.equals(scheme)) {
authScheme = new DigestScheme();
} else if (Constants.NTLM.equals(scheme)) {
authScheme = new NTLMScheme(new JcifsEngine());
}
AuthScope authScope;
if (StringUtil.isBlank(hostname)) {
authScope = AuthScope.ANY;
} else {
int p = AuthScope.ANY_PORT;
if (StringUtil.isNotBlank(port)) {
try {
p = Integer.parseInt(port);
} catch (final NumberFormatException e) {
logger.warn("Failed to parse " + port, e);
}
}
String r = realm;
if (StringUtil.isBlank(realm)) {
r = AuthScope.ANY_REALM;
}
String s = scheme;
if (StringUtil.isBlank(scheme)
|| Constants.NTLM.equals(scheme)) {
s = AuthScope.ANY_SCHEME;
}
authScope = new AuthScope(hostname, p, r, s);
}
Credentials credentials;
if (Constants.NTLM.equals(scheme)) {
final String workstation = paramMap.get(S2ROBOT_WEB_AUTH
+ "." + webAuthName + ".workstation");
final String domain = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".domain");
credentials = new NTCredentials(username,
password == null ? "" : password,
workstation == null ? "" : workstation,
domain == null ? "" : domain);
} else {
credentials = new UsernamePasswordCredentials(username,
password == null ? "" : password);
}
basicAuthList.add(new AuthenticationImpl(authScope,
credentials, authScheme));
}
factoryParamMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY,
basicAuthList.toArray(new Authentication[basicAuthList
.size()]));
}
// request header
final List<org.seasar.robot.client.http.RequestHeader> rhList = new ArrayList<org.seasar.robot.client.http.RequestHeader>();
int count = 1;
String headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count
+ ".name");
while (StringUtil.isNotBlank(headerName)) {
final String headerValue = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX
+ count + ".value");
rhList.add(new org.seasar.robot.client.http.RequestHeader(
headerName, headerValue));
count++;
headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count
+ ".name");
}
if (!rhList.isEmpty()) {
factoryParamMap
.put(HcHttpClient.REQUERT_HEADERS_PROPERTY,
rhList.toArray(new org.seasar.robot.client.http.RequestHeader[rhList
.size()]));
}
// file auth
final String fileAuthStr = paramMap.get(S2ROBOT_FILE_AUTH);
if (StringUtil.isNotBlank(fileAuthStr)) {
final String[] fileAuthNames = fileAuthStr.split(",");
final List<SmbAuthentication> smbAuthList = new ArrayList<SmbAuthentication>();
for (final String fileAuthName : fileAuthNames) {
final String scheme = paramMap.get(S2ROBOT_FILE_AUTH + "."
+ fileAuthName + ".scheme");
if (Constants.SAMBA.equals(scheme)) {
final String domain = paramMap.get(S2ROBOT_FILE_AUTH + "."
+ fileAuthName + ".domain");
final String hostname = paramMap.get(S2ROBOT_FILE_AUTH
+ "." + fileAuthName + ".host");
final String port = paramMap.get(S2ROBOT_FILE_AUTH + "."
+ fileAuthName + ".port");
final String username = paramMap.get(S2ROBOT_FILE_AUTH
+ "." + fileAuthName + ".username");
final String password = paramMap.get(S2ROBOT_FILE_AUTH
+ "." + fileAuthName + ".password");
if (StringUtil.isEmpty(username)) {
logger.warn("username is empty. fileAuth:"
+ fileAuthName);
continue;
}
final SmbAuthentication smbAuth = new SmbAuthentication();
smbAuth.setDomain(domain == null ? "" : domain);
smbAuth.setServer(hostname);
if (StringUtil.isNotBlank(port)) {
try {
smbAuth.setPort(Integer.parseInt(port));
} catch (final NumberFormatException e) {
logger.warn("Failed to parse " + port, e);
}
}
smbAuth.setUsername(username);
smbAuth.setPassword(password == null ? "" : password);
smbAuthList.add(smbAuth);
}
}
if (!smbAuthList.isEmpty()) {
factoryParamMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY,
smbAuthList.toArray(new SmbAuthentication[smbAuthList
.size()]));
}
}
}
}

View file

@ -18,15 +18,22 @@ package jp.sf.fess.db.exentity;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import jp.sf.fess.Constants;
import jp.sf.fess.db.bsentity.BsFileCrawlingConfig;
import jp.sf.fess.helper.SystemHelper;
import jp.sf.fess.service.FileAuthenticationService;
import jp.sf.fess.util.ParameterUtil;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.client.S2RobotClientFactory;
import org.seasar.robot.client.smb.SmbAuthentication;
import org.seasar.robot.client.smb.SmbClient;
/**
* The entity of FILE_CRAWLING_CONFIG.
@ -222,4 +229,39 @@ public class FileCrawlingConfig extends BsFileCrawlingConfig implements
}
return null;
}
public void initializeClientFactory(final S2RobotClientFactory clientFactory) {
final FileAuthenticationService fileAuthenticationService = SingletonS2Container
.getComponent(FileAuthenticationService.class);
// Parameters
final Map<String, Object> paramMap = new HashMap<String, Object>();
clientFactory.setInitParameterMap(paramMap);
final String configParam = getConfigParameter();
if (StringUtil.isNotBlank(configParam)) {
ParameterUtil.loadConfigParams(paramMap, configParam);
}
// auth params
final List<FileAuthentication> fileAuthList = fileAuthenticationService
.getFileAuthenticationList(getId());
final List<SmbAuthentication> smbAuthList = new ArrayList<SmbAuthentication>();
for (final FileAuthentication fileAuth : fileAuthList) {
if (Constants.SAMBA.equals(fileAuth.getProtocolScheme())) {
final SmbAuthentication smbAuth = new SmbAuthentication();
final Map<String, String> map = ParameterUtil.parse(fileAuth
.getParameters());
final String domain = map.get("domain");
smbAuth.setDomain(domain == null ? "" : domain);
smbAuth.setServer(fileAuth.getHostname());
smbAuth.setPort(fileAuth.getPort());
smbAuth.setUsername(fileAuth.getUsername());
smbAuth.setPassword(fileAuth.getPassword());
smbAuthList.add(smbAuth);
}
}
paramMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY,
smbAuthList.toArray(new SmbAuthentication[smbAuthList.size()]));
}
}

View file

@ -18,13 +18,22 @@ package jp.sf.fess.db.exentity;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import jp.sf.fess.Constants;
import jp.sf.fess.db.bsentity.BsWebCrawlingConfig;
import jp.sf.fess.service.RequestHeaderService;
import jp.sf.fess.service.WebAuthenticationService;
import jp.sf.fess.util.ParameterUtil;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.client.S2RobotClientFactory;
import org.seasar.robot.client.http.Authentication;
import org.seasar.robot.client.http.HcHttpClient;
/**
* The entity of WEB_CRAWLING_CONFIG.
@ -225,4 +234,47 @@ public class WebCrawlingConfig extends BsWebCrawlingConfig implements
return null;
}
public void initializeClientFactory(
final S2RobotClientFactory clientFactory) {
final WebAuthenticationService webAuthenticationService = SingletonS2Container
.getComponent(WebAuthenticationService.class);
final RequestHeaderService requestHeaderService = SingletonS2Container
.getComponent(RequestHeaderService.class);
// HttpClient Parameters
final Map<String, Object> paramMap = new HashMap<String, Object>();
clientFactory.setInitParameterMap(paramMap);
final String configParam = getConfigParameter();
if (StringUtil.isNotBlank(configParam)) {
ParameterUtil.loadConfigParams(paramMap, configParam);
}
final String userAgent = getUserAgent();
if (StringUtil.isNotBlank(userAgent)) {
paramMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
}
final List<WebAuthentication> webAuthList = webAuthenticationService
.getWebAuthenticationList(getId());
final List<Authentication> basicAuthList = new ArrayList<Authentication>();
for (final WebAuthentication webAuth : webAuthList) {
basicAuthList.add(webAuth.getAuthentication());
}
paramMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY,
basicAuthList.toArray(new Authentication[basicAuthList.size()]));
// request header
final List<RequestHeader> requestHeaderList = requestHeaderService
.getRequestHeaderList(getId());
final List<org.seasar.robot.client.http.RequestHeader> rhList = new ArrayList<org.seasar.robot.client.http.RequestHeader>();
for (final RequestHeader requestHeader : requestHeaderList) {
rhList.add(requestHeader.getS2RobotRequestHeader());
}
paramMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, rhList
.toArray(new org.seasar.robot.client.http.RequestHeader[rhList
.size()]));
}
}

View file

@ -29,7 +29,6 @@ import jp.sf.fess.ds.IndexUpdateCallback;
import jp.sf.fess.helper.CrawlingConfigHelper;
import jp.sf.fess.helper.CrawlingSessionHelper;
import jp.sf.fess.taglib.FessFunctions;
import jp.sf.fess.util.ParameterUtil;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.util.OgnlUtil;
@ -55,18 +54,18 @@ public abstract class AbstractDataStoreImpl implements DataStore {
public void store(final DataCrawlingConfig config,
final IndexUpdateCallback callback,
final Map<String, String> initParamMap) {
Map<String, String> paramMap = ParameterUtil.parse(config
.getHandlerParameter());
final Map<String, String> scriptMap = ParameterUtil.parse(config
.getHandlerScript());
final Map<String, String> configParamMap = config
.getHandlerParameterMap();
final Map<String, String> configScriptMap = config
.getHandlerScriptMap();
final CrawlingSessionHelper crawlingSessionHelper = SingletonS2Container
.getComponent("crawlingSessionHelper");
final Date documentExpires = crawlingSessionHelper.getDocumentExpires();
final CrawlingConfigHelper crawlingConfigHelper = SingletonS2Container
.getComponent("crawlingConfigHelper");
initParamMap.putAll(paramMap);
paramMap = initParamMap;
initParamMap.putAll(configParamMap);
final Map<String, String> paramMap = initParamMap;
// default values
final Map<String, Object> defaultDataMap = new HashMap<String, Object>();
@ -120,7 +119,7 @@ public abstract class AbstractDataStoreImpl implements DataStore {
// lastModified
// id
storeData(callback, paramMap, scriptMap, defaultDataMap);
storeData(callback, paramMap, configScriptMap, defaultDataMap);
}

View file

@ -19,37 +19,22 @@ package jp.sf.fess.ds.impl;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import jp.sf.fess.Constants;
import jp.sf.fess.db.exentity.DataCrawlingConfig;
import jp.sf.fess.ds.DataStoreException;
import jp.sf.fess.ds.IndexUpdateCallback;
import jp.sf.fess.helper.CrawlingSessionHelper;
import jp.sf.orangesignal.csv.CsvConfig;
import org.apache.http.auth.AuthScheme;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.NTCredentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.auth.DigestScheme;
import org.apache.http.impl.auth.NTLMScheme;
import org.codelibs.solr.lib.SolrGroup;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.util.SerializeUtil;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.RobotSystemException;
import org.seasar.robot.client.S2RobotClient;
import org.seasar.robot.client.S2RobotClientFactory;
import org.seasar.robot.client.http.Authentication;
import org.seasar.robot.client.http.HcHttpClient;
import org.seasar.robot.client.http.impl.AuthenticationImpl;
import org.seasar.robot.client.http.ntlm.JcifsEngine;
import org.seasar.robot.client.smb.SmbAuthentication;
import org.seasar.robot.client.smb.SmbClient;
import org.seasar.robot.entity.ResponseData;
import org.seasar.robot.entity.ResultData;
import org.seasar.robot.processor.ResponseProcessor;
@ -63,16 +48,6 @@ import org.slf4j.LoggerFactory;
public class FileListDataStoreImpl extends CsvDataStoreImpl {
private static final String S2ROBOT_WEB_HEADER_PREFIX = "s2robot.web.header.";
private static final String S2ROBOT_WEB_AUTH = "s2robot.web.auth";
private static final String S2ROBOT_USERAGENT = "s2robot.useragent";
private static final String S2ROBOT_PARAM_PREFIX = "s2robot.param.";
private static final Object S2ROBOT_FILE_AUTH = "s2robot.file.auth";
private static final Logger logger = LoggerFactory
.getLogger(FileListDataStoreImpl.class);
@ -114,185 +89,24 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
return false;
}
@Override
public void store(final DataCrawlingConfig config,
final IndexUpdateCallback callback,
final Map<String, String> initParamMap) {
robotClientFactory = SingletonS2Container
.getComponent(S2RobotClientFactory.class);
config.initializeClientFactory(robotClientFactory);
super.store(config, callback, initParamMap);
}
@Override
protected void storeData(final IndexUpdateCallback callback,
final Map<String, String> paramMap,
final Map<String, String> scriptMap,
final Map<String, Object> defaultDataMap) {
robotClientFactory = SingletonS2Container
.getComponent(S2RobotClientFactory.class);
final Map<String, Object> initParamMap = new HashMap<String, Object>();
robotClientFactory.setInitParameterMap(initParamMap);
// parameters
for (final Map.Entry<String, String> entry : paramMap.entrySet()) {
final String key = entry.getKey();
if (key.startsWith(S2ROBOT_PARAM_PREFIX)) {
initParamMap.put(key.substring(S2ROBOT_PARAM_PREFIX.length()),
entry.getValue());
}
}
// user agent
final String userAgent = paramMap.get(S2ROBOT_USERAGENT);
if (StringUtil.isNotBlank(userAgent)) {
initParamMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
}
// web auth
final String webAuthStr = paramMap.get(S2ROBOT_WEB_AUTH);
if (StringUtil.isNotBlank(webAuthStr)) {
final String[] webAuthNames = webAuthStr.split(",");
final List<Authentication> basicAuthList = new ArrayList<Authentication>();
for (final String webAuthName : webAuthNames) {
final String scheme = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".scheme");
final String hostname = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".host");
final String port = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".port");
final String realm = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".realm");
final String username = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".username");
final String password = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".password");
if (StringUtil.isEmpty(username)) {
logger.warn("username is empty. webAuth:" + webAuthName);
continue;
}
AuthScheme authScheme = null;
if (Constants.BASIC.equals(scheme)) {
authScheme = new BasicScheme();
} else if (Constants.DIGEST.equals(scheme)) {
authScheme = new DigestScheme();
} else if (Constants.NTLM.equals(scheme)) {
authScheme = new NTLMScheme(new JcifsEngine());
}
AuthScope authScope;
if (StringUtil.isBlank(hostname)) {
authScope = AuthScope.ANY;
} else {
int p = AuthScope.ANY_PORT;
if (StringUtil.isNotBlank(port)) {
try {
p = Integer.parseInt(port);
} catch (final NumberFormatException e) {
logger.warn("Failed to parse " + port, e);
}
}
String r = realm;
if (StringUtil.isBlank(realm)) {
r = AuthScope.ANY_REALM;
}
String s = scheme;
if (StringUtil.isBlank(scheme)
|| Constants.NTLM.equals(scheme)) {
s = AuthScope.ANY_SCHEME;
}
authScope = new AuthScope(hostname, p, r, s);
}
Credentials credentials;
if (Constants.NTLM.equals(scheme)) {
final String workstation = paramMap.get(S2ROBOT_WEB_AUTH
+ "." + webAuthName + ".workstation");
final String domain = paramMap.get(S2ROBOT_WEB_AUTH + "."
+ webAuthName + ".domain");
credentials = new NTCredentials(username,
password == null ? "" : password,
workstation == null ? "" : workstation,
domain == null ? "" : domain);
} else {
credentials = new UsernamePasswordCredentials(username,
password == null ? "" : password);
}
basicAuthList.add(new AuthenticationImpl(authScope,
credentials, authScheme));
}
initParamMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY,
basicAuthList.toArray(new Authentication[basicAuthList
.size()]));
}
// request header
final List<org.seasar.robot.client.http.RequestHeader> rhList = new ArrayList<org.seasar.robot.client.http.RequestHeader>();
int count = 1;
String headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count
+ ".name");
while (StringUtil.isNotBlank(headerName)) {
final String headerValue = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX
+ count + ".value");
rhList.add(new org.seasar.robot.client.http.RequestHeader(
headerName, headerValue));
count++;
headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count
+ ".name");
}
if (!rhList.isEmpty()) {
initParamMap
.put(HcHttpClient.REQUERT_HEADERS_PROPERTY,
rhList.toArray(new org.seasar.robot.client.http.RequestHeader[rhList
.size()]));
}
// file auth
final String fileAuthStr = paramMap.get(S2ROBOT_FILE_AUTH);
if (StringUtil.isNotBlank(fileAuthStr)) {
final String[] fileAuthNames = fileAuthStr.split(",");
final List<SmbAuthentication> smbAuthList = new ArrayList<SmbAuthentication>();
for (final String fileAuthName : fileAuthNames) {
final String scheme = paramMap.get(S2ROBOT_FILE_AUTH + "."
+ fileAuthName + ".scheme");
if (Constants.SAMBA.equals(scheme)) {
final String domain = paramMap.get(S2ROBOT_FILE_AUTH + "."
+ fileAuthName + ".domain");
final String hostname = paramMap.get(S2ROBOT_FILE_AUTH
+ "." + fileAuthName + ".host");
final String port = paramMap.get(S2ROBOT_FILE_AUTH + "."
+ fileAuthName + ".port");
final String username = paramMap.get(S2ROBOT_FILE_AUTH
+ "." + fileAuthName + ".username");
final String password = paramMap.get(S2ROBOT_FILE_AUTH
+ "." + fileAuthName + ".password");
if (StringUtil.isEmpty(username)) {
logger.warn("username is empty. fileAuth:"
+ fileAuthName);
continue;
}
final SmbAuthentication smbAuth = new SmbAuthentication();
smbAuth.setDomain(domain == null ? "" : domain);
smbAuth.setServer(hostname);
if (StringUtil.isNotBlank(port)) {
try {
smbAuth.setPort(Integer.parseInt(port));
} catch (final NumberFormatException e) {
logger.warn("Failed to parse " + port, e);
}
}
smbAuth.setUsername(username);
smbAuth.setPassword(password == null ? "" : password);
smbAuthList.add(smbAuth);
}
}
if (!smbAuthList.isEmpty()) {
initParamMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY,
smbAuthList.toArray(new SmbAuthentication[smbAuthList
.size()]));
}
}
crawlingSessionHelper = SingletonS2Container
.getComponent(CrawlingSessionHelper.class);
super.storeData(new FileListIndexUpdateCallback(callback), paramMap,
scriptMap, defaultDataMap);

View file

@ -19,27 +19,19 @@ package jp.sf.fess.helper;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import jp.sf.fess.Constants;
import jp.sf.fess.db.exentity.FileAuthentication;
import jp.sf.fess.db.exentity.FileCrawlingConfig;
import jp.sf.fess.db.exentity.RequestHeader;
import jp.sf.fess.db.exentity.WebAuthentication;
import jp.sf.fess.db.exentity.WebCrawlingConfig;
import jp.sf.fess.interval.FessIntervalController;
import jp.sf.fess.service.FailureUrlService;
import jp.sf.fess.service.FileAuthenticationService;
import jp.sf.fess.service.FileCrawlingConfigService;
import jp.sf.fess.service.RequestHeaderService;
import jp.sf.fess.service.WebAuthenticationService;
import jp.sf.fess.service.WebCrawlingConfigService;
import jp.sf.fess.solr.IndexUpdater;
import jp.sf.fess.util.ParameterUtil;
import org.codelibs.core.util.DynamicProperties;
import org.codelibs.solr.lib.SolrGroup;
@ -47,10 +39,6 @@ import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.S2Robot;
import org.seasar.robot.S2RobotContext;
import org.seasar.robot.client.http.Authentication;
import org.seasar.robot.client.http.HcHttpClient;
import org.seasar.robot.client.smb.SmbAuthentication;
import org.seasar.robot.client.smb.SmbClient;
import org.seasar.robot.service.DataService;
import org.seasar.robot.service.UrlFilterService;
import org.seasar.robot.service.UrlQueueService;
@ -70,12 +58,6 @@ public class WebFsIndexHelper implements Serializable {
@Resource
public WebCrawlingConfigService webCrawlingConfigService;
@Resource
protected WebAuthenticationService webAuthenticationService;
@Resource
protected RequestHeaderService requestHeaderService;
@Resource
protected FileCrawlingConfigService fileCrawlingConfigService;
@ -209,40 +191,8 @@ public class WebFsIndexHelper implements Serializable {
.getMaxAccessCount() : maxAccessCount;
robotContext.setMaxAccessCount(maxCount);
// HttpClient Parameters
final Map<String, Object> paramMap = new HashMap<String, Object>();
s2Robot.getClientFactory().setInitParameterMap(paramMap);
final String configParam = webCrawlingConfig.getConfigParameter();
if (StringUtil.isNotBlank(configParam)) {
loadConfigParams(paramMap, configParam);
}
final String userAgent = webCrawlingConfig.getUserAgent();
if (StringUtil.isNotBlank(userAgent)) {
paramMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
}
final List<WebAuthentication> webAuthList = webAuthenticationService
.getWebAuthenticationList(webCrawlingConfig.getId());
final List<Authentication> basicAuthList = new ArrayList<Authentication>();
for (final WebAuthentication webAuth : webAuthList) {
basicAuthList.add(webAuth.getAuthentication());
}
paramMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY,
basicAuthList.toArray(new Authentication[basicAuthList
.size()]));
// request header
final List<RequestHeader> requestHeaderList = requestHeaderService
.getRequestHeaderList(webCrawlingConfig.getId());
final List<org.seasar.robot.client.http.RequestHeader> rhList = new ArrayList<org.seasar.robot.client.http.RequestHeader>();
for (final RequestHeader requestHeader : requestHeaderList) {
rhList.add(requestHeader.getS2RobotRequestHeader());
}
paramMap.put(
HcHttpClient.REQUERT_HEADERS_PROPERTY,
rhList.toArray(new org.seasar.robot.client.http.RequestHeader[rhList
.size()]));
webCrawlingConfig.initializeClientFactory(s2Robot
.getClientFactory());
// set urls
final String[] urls = urlsStr.split("[\r\n]");
@ -358,34 +308,8 @@ public class WebFsIndexHelper implements Serializable {
.getMaxAccessCount() : maxAccessCount;
robotContext.setMaxAccessCount(maxCount);
// Parameters
final Map<String, Object> paramMap = new HashMap<String, Object>();
s2Robot.getClientFactory().setInitParameterMap(paramMap);
final String configParam = fileCrawlingConfig.getConfigParameter();
if (StringUtil.isNotBlank(configParam)) {
loadConfigParams(paramMap, configParam);
}
// auth params
final List<FileAuthentication> fileAuthList = fileAuthenticationService
.getFileAuthenticationList(fileCrawlingConfig.getId());
final List<SmbAuthentication> smbAuthList = new ArrayList<SmbAuthentication>();
for (final FileAuthentication fileAuth : fileAuthList) {
if (Constants.SAMBA.equals(fileAuth.getProtocolScheme())) {
final SmbAuthentication smbAuth = new SmbAuthentication();
final Map<String, String> map = ParameterUtil
.parse(fileAuth.getParameters());
final String domain = map.get("domain");
smbAuth.setDomain(domain == null ? "" : domain);
smbAuth.setServer(fileAuth.getHostname());
smbAuth.setPort(fileAuth.getPort());
smbAuth.setUsername(fileAuth.getUsername());
smbAuth.setPassword(fileAuth.getPassword());
smbAuthList.add(smbAuth);
}
}
paramMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY, smbAuthList
.toArray(new SmbAuthentication[smbAuthList.size()]));
fileCrawlingConfig.initializeClientFactory(s2Robot
.getClientFactory());
// set paths
final String[] paths = pathsStr.split("[\r\n]");
@ -586,12 +510,4 @@ public class WebFsIndexHelper implements Serializable {
}
protected void loadConfigParams(final Map<String, Object> paramMap,
final String configParam) {
final Map<String, String> map = ParameterUtil.parse(configParam);
if (!map.isEmpty()) {
paramMap.putAll(map);
}
}
}

View file

@ -55,4 +55,12 @@ public class ParameterUtil {
}
return paramMap;
}
public static void loadConfigParams(final Map<String, Object> paramMap,
final String configParam) {
final Map<String, String> map = ParameterUtil.parse(configParam);
if (!map.isEmpty()) {
paramMap.putAll(map);
}
}
}