Shinsuke Sugaya 11 gadi atpakaļ
vecāks
revīzija
41ab30560d

+ 21 - 0
src/main/java/jp/sf/fess/ds/DataStoreCrawlingException.java

@@ -0,0 +1,21 @@
+package jp.sf.fess.ds;
+
+import org.seasar.robot.RobotCrawlAccessException;
+
+public class DataStoreCrawlingException extends RobotCrawlAccessException {
+
+    private static final long serialVersionUID = 1L;
+
+    private final String url;
+
+    public DataStoreCrawlingException(final String url, final String message,
+            final Exception e) {
+        super(message, e);
+        this.url = url;
+    }
+
+    public String getUrl() {
+        return url;
+    }
+
+}

+ 15 - 9
src/main/java/jp/sf/fess/ds/impl/CsvDataStoreImpl.java

@@ -30,6 +30,7 @@ import java.util.regex.Pattern;
 
 import jp.sf.fess.Constants;
 import jp.sf.fess.db.exentity.DataCrawlingConfig;
+import jp.sf.fess.ds.DataStoreCrawlingException;
 import jp.sf.fess.ds.DataStoreException;
 import jp.sf.fess.ds.IndexUpdateCallback;
 import jp.sf.fess.service.FailureUrlService;
@@ -261,28 +262,33 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
                     }
 
                     String errorName;
-                    final Throwable cause = e.getCause();
+                    final Throwable cause = target.getCause();
                     if (cause != null) {
                         errorName = cause.getClass().getCanonicalName();
                     } else {
-                        errorName = e.getClass().getCanonicalName();
+                        errorName = target.getClass().getCanonicalName();
                     }
 
+                    String url;
+                    if (target instanceof DataStoreCrawlingException) {
+                        url = ((DataStoreCrawlingException) target).getUrl();
+                    } else {
+                        url = csvFile.getAbsolutePath() + ":"
+                                + csvReader.getLineNumber();
+
+                    }
                     final FailureUrlService failureUrlService = SingletonS2Container
                             .getComponent(FailureUrlService.class);
-                    failureUrlService.store(
-                            dataConfig,
-                            errorName,
-                            csvFile.getAbsolutePath() + ":"
-                                    + csvReader.getLineNumber(), e);
+                    failureUrlService.store(dataConfig, errorName, url, target);
 
                     logger.warn("Crawling Access Exception at : " + dataMap, e);
                 } catch (final Exception e) {
+                    final String url = csvFile.getAbsolutePath() + ":"
+                            + csvReader.getLineNumber();
                     final FailureUrlService failureUrlService = SingletonS2Container
                             .getComponent(FailureUrlService.class);
                     failureUrlService.store(dataConfig, e.getClass()
-                            .getCanonicalName(), csvFile.getAbsolutePath()
-                            + ":" + csvReader.getLineNumber(), e);
+                            .getCanonicalName(), url, e);
 
                     logger.warn("Crawling Access Exception at : " + dataMap, e);
                 }

+ 55 - 49
src/main/java/jp/sf/fess/ds/impl/FileListDataStoreImpl.java

@@ -24,6 +24,7 @@ import java.util.Map;
 
 import jp.sf.fess.Constants;
 import jp.sf.fess.db.exentity.DataCrawlingConfig;
+import jp.sf.fess.ds.DataStoreCrawlingException;
 import jp.sf.fess.ds.DataStoreException;
 import jp.sf.fess.ds.IndexUpdateCallback;
 import jp.sf.fess.helper.CrawlingSessionHelper;
@@ -178,7 +179,6 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
 
         protected boolean addDocument(final Map<String, Object> dataMap) {
             synchronized (indexUpdateCallback) {
-
                 //   required check
                 if (!dataMap.containsKey(urlField)
                         || dataMap.get(urlField) == null) {
@@ -187,59 +187,65 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
                 }
 
                 final String url = dataMap.get(urlField).toString();
-                final S2RobotClient client = robotClientFactory.getClient(url);
-                if (client == null) {
-                    logger.warn("S2RobotClient is null. Data: " + dataMap);
-                    return false;
-                }
+                try {
+                    final S2RobotClient client = robotClientFactory
+                            .getClient(url);
+                    if (client == null) {
+                        logger.warn("S2RobotClient is null. Data: " + dataMap);
+                        return false;
+                    }
 
-                final long startTime = System.currentTimeMillis();
-                final ResponseData responseData = client.doGet(url);
-                responseData.setExecutionTime(System.currentTimeMillis()
-                        - startTime);
-                responseData.setSessionId((String) dataMap
-                        .get(Constants.SESSION_ID));
-
-                final RuleManager ruleManager = SingletonS2Container
-                        .getComponent(RuleManager.class);
-                final Rule rule = ruleManager.getRule(responseData);
-                if (rule == null) {
-                    logger.warn("No url rule. Data: " + dataMap);
-                    return false;
-                } else {
-                    responseData.setRuleId(rule.getRuleId());
-                    final ResponseProcessor responseProcessor = rule
-                            .getResponseProcessor();
-                    if (responseProcessor instanceof DefaultResponseProcessor) {
-                        final Transformer transformer = ((DefaultResponseProcessor) responseProcessor)
-                                .getTransformer();
-                        final ResultData resultData = transformer
-                                .transform(responseData);
-                        final byte[] data = resultData.getData();
-                        if (data != null) {
-                            try {
-                                @SuppressWarnings("unchecked")
-                                final Map<String, Object> responseDataMap = (Map<String, Object>) SerializeUtil
-                                        .fromBinaryToObject(data);
-                                dataMap.putAll(responseDataMap);
-                            } catch (final Exception e) {
-                                throw new RobotSystemException(
-                                        "Could not create an instanced from bytes.",
-                                        e);
+                    final long startTime = System.currentTimeMillis();
+                    final ResponseData responseData = client.doGet(url);
+                    responseData.setExecutionTime(System.currentTimeMillis()
+                            - startTime);
+                    responseData.setSessionId((String) dataMap
+                            .get(Constants.SESSION_ID));
+
+                    final RuleManager ruleManager = SingletonS2Container
+                            .getComponent(RuleManager.class);
+                    final Rule rule = ruleManager.getRule(responseData);
+                    if (rule == null) {
+                        logger.warn("No url rule. Data: " + dataMap);
+                        return false;
+                    } else {
+                        responseData.setRuleId(rule.getRuleId());
+                        final ResponseProcessor responseProcessor = rule
+                                .getResponseProcessor();
+                        if (responseProcessor instanceof DefaultResponseProcessor) {
+                            final Transformer transformer = ((DefaultResponseProcessor) responseProcessor)
+                                    .getTransformer();
+                            final ResultData resultData = transformer
+                                    .transform(responseData);
+                            final byte[] data = resultData.getData();
+                            if (data != null) {
+                                try {
+                                    @SuppressWarnings("unchecked")
+                                    final Map<String, Object> responseDataMap = (Map<String, Object>) SerializeUtil
+                                            .fromBinaryToObject(data);
+                                    dataMap.putAll(responseDataMap);
+                                } catch (final Exception e) {
+                                    throw new RobotSystemException(
+                                            "Could not create an instance from bytes.",
+                                            e);
+                                }
                             }
-                        }
 
-                        // remove
-                        for (final String fieldName : ignoreFieldNames) {
-                            dataMap.remove(fieldName);
-                        }
+                            // remove
+                            for (final String fieldName : ignoreFieldNames) {
+                                dataMap.remove(fieldName);
+                            }
 
-                        return indexUpdateCallback.store(dataMap);
-                    } else {
-                        logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: "
-                                + responseProcessor + ", Data: " + dataMap);
-                        return false;
+                            return indexUpdateCallback.store(dataMap);
+                        } else {
+                            logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: "
+                                    + responseProcessor + ", Data: " + dataMap);
+                            return false;
+                        }
                     }
+                } catch (final Exception e) {
+                    throw new DataStoreCrawlingException(url, "Failed to add: "
+                            + dataMap, e);
                 }
             }
         }