This commit is contained in:
Shinsuke Sugaya 2013-11-30 20:07:35 +09:00
parent d57a7ae563
commit 41ab30560d
3 changed files with 94 additions and 61 deletions

View file

@ -0,0 +1,21 @@
package jp.sf.fess.ds;
import org.seasar.robot.RobotCrawlAccessException;
public class DataStoreCrawlingException extends RobotCrawlAccessException {
private static final long serialVersionUID = 1L;
private final String url;
public DataStoreCrawlingException(final String url, final String message,
final Exception e) {
super(message, e);
this.url = url;
}
public String getUrl() {
return url;
}
}

View file

@ -30,6 +30,7 @@ import java.util.regex.Pattern;
import jp.sf.fess.Constants;
import jp.sf.fess.db.exentity.DataCrawlingConfig;
import jp.sf.fess.ds.DataStoreCrawlingException;
import jp.sf.fess.ds.DataStoreException;
import jp.sf.fess.ds.IndexUpdateCallback;
import jp.sf.fess.service.FailureUrlService;
@ -261,28 +262,33 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
}
String errorName;
final Throwable cause = e.getCause();
final Throwable cause = target.getCause();
if (cause != null) {
errorName = cause.getClass().getCanonicalName();
} else {
errorName = e.getClass().getCanonicalName();
errorName = target.getClass().getCanonicalName();
}
String url;
if (target instanceof DataStoreCrawlingException) {
url = ((DataStoreCrawlingException) target).getUrl();
} else {
url = csvFile.getAbsolutePath() + ":"
+ csvReader.getLineNumber();
}
final FailureUrlService failureUrlService = SingletonS2Container
.getComponent(FailureUrlService.class);
failureUrlService.store(
dataConfig,
errorName,
csvFile.getAbsolutePath() + ":"
+ csvReader.getLineNumber(), e);
failureUrlService.store(dataConfig, errorName, url, target);
logger.warn("Crawling Access Exception at : " + dataMap, e);
} catch (final Exception e) {
final String url = csvFile.getAbsolutePath() + ":"
+ csvReader.getLineNumber();
final FailureUrlService failureUrlService = SingletonS2Container
.getComponent(FailureUrlService.class);
failureUrlService.store(dataConfig, e.getClass()
.getCanonicalName(), csvFile.getAbsolutePath()
+ ":" + csvReader.getLineNumber(), e);
.getCanonicalName(), url, e);
logger.warn("Crawling Access Exception at : " + dataMap, e);
}

View file

@ -24,6 +24,7 @@ import java.util.Map;
import jp.sf.fess.Constants;
import jp.sf.fess.db.exentity.DataCrawlingConfig;
import jp.sf.fess.ds.DataStoreCrawlingException;
import jp.sf.fess.ds.DataStoreException;
import jp.sf.fess.ds.IndexUpdateCallback;
import jp.sf.fess.helper.CrawlingSessionHelper;
@ -178,7 +179,6 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
protected boolean addDocument(final Map<String, Object> dataMap) {
synchronized (indexUpdateCallback) {
// required check
if (!dataMap.containsKey(urlField)
|| dataMap.get(urlField) == null) {
@ -187,59 +187,65 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
}
final String url = dataMap.get(urlField).toString();
final S2RobotClient client = robotClientFactory.getClient(url);
if (client == null) {
logger.warn("S2RobotClient is null. Data: " + dataMap);
return false;
}
final long startTime = System.currentTimeMillis();
final ResponseData responseData = client.doGet(url);
responseData.setExecutionTime(System.currentTimeMillis()
- startTime);
responseData.setSessionId((String) dataMap
.get(Constants.SESSION_ID));
final RuleManager ruleManager = SingletonS2Container
.getComponent(RuleManager.class);
final Rule rule = ruleManager.getRule(responseData);
if (rule == null) {
logger.warn("No url rule. Data: " + dataMap);
return false;
} else {
responseData.setRuleId(rule.getRuleId());
final ResponseProcessor responseProcessor = rule
.getResponseProcessor();
if (responseProcessor instanceof DefaultResponseProcessor) {
final Transformer transformer = ((DefaultResponseProcessor) responseProcessor)
.getTransformer();
final ResultData resultData = transformer
.transform(responseData);
final byte[] data = resultData.getData();
if (data != null) {
try {
@SuppressWarnings("unchecked")
final Map<String, Object> responseDataMap = (Map<String, Object>) SerializeUtil
.fromBinaryToObject(data);
dataMap.putAll(responseDataMap);
} catch (final Exception e) {
throw new RobotSystemException(
"Could not create an instanced from bytes.",
e);
}
}
// remove
for (final String fieldName : ignoreFieldNames) {
dataMap.remove(fieldName);
}
return indexUpdateCallback.store(dataMap);
} else {
logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: "
+ responseProcessor + ", Data: " + dataMap);
try {
final S2RobotClient client = robotClientFactory
.getClient(url);
if (client == null) {
logger.warn("S2RobotClient is null. Data: " + dataMap);
return false;
}
final long startTime = System.currentTimeMillis();
final ResponseData responseData = client.doGet(url);
responseData.setExecutionTime(System.currentTimeMillis()
- startTime);
responseData.setSessionId((String) dataMap
.get(Constants.SESSION_ID));
final RuleManager ruleManager = SingletonS2Container
.getComponent(RuleManager.class);
final Rule rule = ruleManager.getRule(responseData);
if (rule == null) {
logger.warn("No url rule. Data: " + dataMap);
return false;
} else {
responseData.setRuleId(rule.getRuleId());
final ResponseProcessor responseProcessor = rule
.getResponseProcessor();
if (responseProcessor instanceof DefaultResponseProcessor) {
final Transformer transformer = ((DefaultResponseProcessor) responseProcessor)
.getTransformer();
final ResultData resultData = transformer
.transform(responseData);
final byte[] data = resultData.getData();
if (data != null) {
try {
@SuppressWarnings("unchecked")
final Map<String, Object> responseDataMap = (Map<String, Object>) SerializeUtil
.fromBinaryToObject(data);
dataMap.putAll(responseDataMap);
} catch (final Exception e) {
throw new RobotSystemException(
"Could not create an instance from bytes.",
e);
}
}
// remove
for (final String fieldName : ignoreFieldNames) {
dataMap.remove(fieldName);
}
return indexUpdateCallback.store(dataMap);
} else {
logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: "
+ responseProcessor + ", Data: " + dataMap);
return false;
}
}
} catch (final Exception e) {
throw new DataStoreCrawlingException(url, "Failed to add: "
+ dataMap, e);
}
}
}