ソースを参照

#7 load parameters from a data store config.

Shinsuke Sugaya 11 年 前
コミット
9c36921692
1 ファイル変更243 行追加25 行削除
  1. 243 25
      src/main/java/jp/sf/fess/ds/impl/FileListDataStoreImpl.java

+ 243 - 25
src/main/java/jp/sf/fess/ds/impl/FileListDataStoreImpl.java

@@ -19,30 +19,59 @@ package jp.sf.fess.ds.impl;
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import jp.sf.fess.Constants;
 import jp.sf.fess.ds.DataStoreException;
 import jp.sf.fess.ds.IndexUpdateCallback;
 import jp.sf.fess.helper.CrawlingSessionHelper;
 import jp.sf.orangesignal.csv.CsvConfig;
 
+import org.apache.http.auth.AuthScheme;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.Credentials;
+import org.apache.http.auth.NTCredentials;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.impl.auth.BasicScheme;
+import org.apache.http.impl.auth.DigestScheme;
+import org.apache.http.impl.auth.NTLMScheme;
 import org.codelibs.solr.lib.SolrGroup;
 import org.seasar.framework.container.SingletonS2Container;
 import org.seasar.framework.util.SerializeUtil;
+import org.seasar.framework.util.StringUtil;
 import org.seasar.robot.RobotSystemException;
 import org.seasar.robot.client.S2RobotClient;
+import org.seasar.robot.client.S2RobotClientFactory;
+import org.seasar.robot.client.http.Authentication;
+import org.seasar.robot.client.http.HcHttpClient;
+import org.seasar.robot.client.http.impl.AuthenticationImpl;
+import org.seasar.robot.client.http.ntlm.JcifsEngine;
+import org.seasar.robot.client.smb.SmbAuthentication;
+import org.seasar.robot.client.smb.SmbClient;
 import org.seasar.robot.entity.ResponseData;
 import org.seasar.robot.entity.ResultData;
+import org.seasar.robot.processor.ResponseProcessor;
+import org.seasar.robot.processor.impl.DefaultResponseProcessor;
+import org.seasar.robot.rule.Rule;
+import org.seasar.robot.rule.RuleManager;
 import org.seasar.robot.transformer.Transformer;
 import org.seasar.robot.util.LruHashMap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class FileListDataStoreImpl extends CsvDataStoreImpl {
-    private static final String S2ROBOT_TRANSFORMER_PARAM = "s2robot.transformer";
 
-    private static final String S2ROBOT_CLIENT_PARAM = "s2robot.client";
+    private static final String S2ROBOT_WEB_HEADER_PREFIX = "s2robot.web.header.";
+
+    private static final String S2ROBOT_WEB_AUTH = "s2robot.web.auth";
+
+    private static final String S2ROBOT_USERAGENT = "s2robot.useragent";
+
+    private static final String S2ROBOT_PARAM_PREFIX = "s2robot.param.";
+
+    private static final Object S2ROBOT_FILE_AUTH = "s2robot.file.auth";
 
     private static final Logger logger = LoggerFactory
             .getLogger(FileListDataStoreImpl.class);
@@ -65,9 +94,7 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
 
     public int maxDeleteDocumentCacheSize = 100;
 
-    protected S2RobotClient robotClient;
-
-    protected Transformer transformer;
+    protected S2RobotClientFactory robotClientFactory;
 
     protected CrawlingSessionHelper crawlingSessionHelper;
 
@@ -89,10 +116,173 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
             final Map<String, String> paramMap,
             final Map<String, String> scriptMap,
             final Map<String, Object> defaultDataMap) {
-        robotClient = SingletonS2Container.getComponent(paramMap
-                .get(S2ROBOT_CLIENT_PARAM));
-        transformer = SingletonS2Container.getComponent(paramMap
-                .get(S2ROBOT_TRANSFORMER_PARAM));
+        robotClientFactory = SingletonS2Container
+                .getComponent(S2RobotClientFactory.class);
+
+        final Map<String, Object> initParamMap = new HashMap<String, Object>();
+        robotClientFactory.setInitParameterMap(initParamMap);
+
+        // parameters
+        for (final Map.Entry<String, String> entry : paramMap.entrySet()) {
+            final String key = entry.getKey();
+            if (key.startsWith(S2ROBOT_PARAM_PREFIX)) {
+                initParamMap.put(key.substring(S2ROBOT_PARAM_PREFIX.length()),
+                        entry.getValue());
+            }
+        }
+
+        // user agent
+        final String userAgent = paramMap.get(S2ROBOT_USERAGENT);
+        if (StringUtil.isNotBlank(userAgent)) {
+            initParamMap.put(HcHttpClient.USER_AGENT_PROPERTY, userAgent);
+        }
+
+        // web auth
+        final String webAuthStr = paramMap.get(S2ROBOT_WEB_AUTH);
+        if (StringUtil.isNotBlank(webAuthStr)) {
+            final String[] webAuthNames = webAuthStr.split(",");
+            final List<Authentication> basicAuthList = new ArrayList<Authentication>();
+            for (final String webAuthName : webAuthNames) {
+                final String scheme = paramMap.get(S2ROBOT_WEB_AUTH + "."
+                        + webAuthName + ".scheme");
+                final String hostname = paramMap.get(S2ROBOT_WEB_AUTH + "."
+                        + webAuthName + ".host");
+                final String port = paramMap.get(S2ROBOT_WEB_AUTH + "."
+                        + webAuthName + ".port");
+                final String realm = paramMap.get(S2ROBOT_WEB_AUTH + "."
+                        + webAuthName + ".realm");
+                final String username = paramMap.get(S2ROBOT_WEB_AUTH + "."
+                        + webAuthName + ".username");
+                final String password = paramMap.get(S2ROBOT_WEB_AUTH + "."
+                        + webAuthName + ".password");
+
+                if (StringUtil.isEmpty(username)) {
+                    logger.warn("username is empty. webAuth:" + webAuthName);
+                    continue;
+                }
+
+                AuthScheme authScheme = null;
+                if (Constants.BASIC.equals(scheme)) {
+                    authScheme = new BasicScheme();
+                } else if (Constants.DIGEST.equals(scheme)) {
+                    authScheme = new DigestScheme();
+                } else if (Constants.NTLM.equals(scheme)) {
+                    authScheme = new NTLMScheme(new JcifsEngine());
+                }
+
+                AuthScope authScope;
+                if (StringUtil.isBlank(hostname)) {
+                    authScope = AuthScope.ANY;
+                } else {
+                    int p = AuthScope.ANY_PORT;
+                    if (StringUtil.isNotBlank(port)) {
+                        try {
+                            p = Integer.parseInt(port);
+                        } catch (final NumberFormatException e) {
+                            logger.warn("Failed to parse " + port, e);
+                        }
+                    }
+
+                    String r = realm;
+                    if (StringUtil.isBlank(realm)) {
+                        r = AuthScope.ANY_REALM;
+                    }
+
+                    String s = scheme;
+                    if (StringUtil.isBlank(scheme)
+                            || Constants.NTLM.equals(scheme)) {
+                        s = AuthScope.ANY_SCHEME;
+                    }
+                    authScope = new AuthScope(hostname, p, r, s);
+                }
+
+                Credentials credentials;
+                if (Constants.NTLM.equals(scheme)) {
+                    final String workstation = paramMap.get(S2ROBOT_WEB_AUTH
+                            + "." + webAuthName + ".workstation");
+                    final String domain = paramMap.get(S2ROBOT_WEB_AUTH + "."
+                            + webAuthName + ".domain");
+                    credentials = new NTCredentials(username,
+                            password == null ? "" : password,
+                            workstation == null ? "" : workstation,
+                            domain == null ? "" : domain);
+                } else {
+                    credentials = new UsernamePasswordCredentials(username,
+                            password == null ? "" : password);
+                }
+
+                basicAuthList.add(new AuthenticationImpl(authScope,
+                        credentials, authScheme));
+            }
+            initParamMap.put(HcHttpClient.BASIC_AUTHENTICATIONS_PROPERTY,
+                    basicAuthList.toArray(new Authentication[basicAuthList
+                            .size()]));
+        }
+
+        // request header
+        final List<org.seasar.robot.client.http.RequestHeader> rhList = new ArrayList<org.seasar.robot.client.http.RequestHeader>();
+        int count = 1;
+        String headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count
+                + ".name");
+        while (StringUtil.isNotBlank(headerName)) {
+            final String headerValue = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX
+                    + count + ".value");
+            rhList.add(new org.seasar.robot.client.http.RequestHeader(
+                    headerName, headerValue));
+            count++;
+            headerName = paramMap.get(S2ROBOT_WEB_HEADER_PREFIX + count
+                    + ".name");
+        }
+        initParamMap.put(HcHttpClient.REQUERT_HEADERS_PROPERTY, rhList
+                .toArray(new org.seasar.robot.client.http.RequestHeader[rhList
+                        .size()]));
+
+        // file auth
+        final String fileAuthStr = paramMap.get(S2ROBOT_FILE_AUTH);
+        if (StringUtil.isNotBlank(fileAuthStr)) {
+            final String[] fileAuthNames = fileAuthStr.split(",");
+            final List<SmbAuthentication> smbAuthList = new ArrayList<SmbAuthentication>();
+            for (final String fileAuthName : fileAuthNames) {
+                final String scheme = paramMap.get(S2ROBOT_FILE_AUTH + "."
+                        + fileAuthName + ".scheme");
+                if (Constants.SAMBA.equals(scheme)) {
+                    final String domain = paramMap.get(S2ROBOT_FILE_AUTH + "."
+                            + fileAuthName + ".domain");
+                    final String hostname = paramMap.get(S2ROBOT_FILE_AUTH
+                            + "." + fileAuthName + ".host");
+                    final String port = paramMap.get(S2ROBOT_FILE_AUTH + "."
+                            + fileAuthName + ".port");
+                    final String username = paramMap.get(S2ROBOT_FILE_AUTH
+                            + "." + fileAuthName + ".username");
+                    final String password = paramMap.get(S2ROBOT_FILE_AUTH
+                            + "." + fileAuthName + ".password");
+
+                    if (StringUtil.isEmpty(username)) {
+                        logger.warn("username is empty. fileAuth:"
+                                + fileAuthName);
+                        continue;
+                    }
+
+                    final SmbAuthentication smbAuth = new SmbAuthentication();
+                    smbAuth.setDomain(domain == null ? "" : domain);
+                    smbAuth.setServer(hostname);
+                    try {
+                        smbAuth.setPort(Integer.parseInt(port));
+                    } catch (final NumberFormatException e) {
+                        logger.warn("Failed to parse " + port, e);
+                    }
+                    smbAuth.setUsername(username);
+                    smbAuth.setPassword(password == null ? "" : password);
+                    smbAuthList.add(smbAuth);
+                }
+            }
+            if (!smbAuthList.isEmpty()) {
+                initParamMap.put(SmbClient.SMB_AUTHENTICATIONS_PROPERTY,
+                        smbAuthList.toArray(new SmbAuthentication[smbAuthList
+                                .size()]));
+            }
+        }
+
         crawlingSessionHelper = SingletonS2Container
                 .getComponent(CrawlingSessionHelper.class);
 
@@ -172,29 +362,57 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
                     return false;
                 }
 
+                final String url = dataMap.get(urlField).toString();
+                final S2RobotClientFactory robotClientFactory = SingletonS2Container
+                        .getComponent(S2RobotClientFactory.class);
+                final S2RobotClient client = robotClientFactory.getClient(url);
+                if (client == null) {
+                    logger.warn("S2RobotClient is null. Data: " + dataMap);
+                    return false;
+                }
+
                 final long startTime = System.currentTimeMillis();
-                final ResponseData responseData = robotClient.doGet(dataMap
-                        .get(urlField).toString());
+                final ResponseData responseData = client.doGet(url);
                 responseData.setExecutionTime(System.currentTimeMillis()
                         - startTime);
                 responseData.setSessionId((String) dataMap.get("sessionId"));
 
-                // TODO inefficient conversion...
-                final ResultData resultData = transformer
-                        .transform(responseData);
-                final byte[] data = resultData.getData();
-                if (data != null) {
-                    try {
-                        final Map<String, Object> responseDataMap = (Map<String, Object>) SerializeUtil
-                                .fromBinaryToObject(data);
-                        dataMap.putAll(responseDataMap);
-                    } catch (final Exception e) {
-                        throw new RobotSystemException(
-                                "Could not create an instanced from bytes.", e);
+                final RuleManager ruleManager = SingletonS2Container
+                        .getComponent(RuleManager.class);
+                final Rule rule = ruleManager.getRule(responseData);
+                if (rule == null) {
+                    logger.warn("No url rule. Data: " + dataMap);
+                    return false;
+                } else {
+                    responseData.setRuleId(rule.getRuleId());
+                    final ResponseProcessor responseProcessor = rule
+                            .getResponseProcessor();
+                    if (responseProcessor instanceof DefaultResponseProcessor) {
+                        final Transformer transformer = ((DefaultResponseProcessor) responseProcessor)
+                                .getTransformer();
+                        final ResultData resultData = transformer
+                                .transform(responseData);
+                        final byte[] data = resultData.getData();
+                        if (data != null) {
+                            try {
+                                @SuppressWarnings("unchecked")
+                                final Map<String, Object> responseDataMap = (Map<String, Object>) SerializeUtil
+                                        .fromBinaryToObject(data);
+                                dataMap.putAll(responseDataMap);
+                            } catch (final Exception e) {
+                                throw new RobotSystemException(
+                                        "Could not create an instanced from bytes.",
+                                        e);
+                            }
+                        }
+
+                        return indexUpdateCallback.store(dataMap);
+                    } else {
+                        logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: "
+                                + responseProcessor + ", Data: " + dataMap);
+                        return false;
                     }
                 }
-
-                return indexUpdateCallback.store(dataMap);
             }
         }