fix #2823 Log Failure URL ID in CrawlingAccessException in fess.log
This commit is contained in:
parent
0a9a4f7306
commit
78070c9e74
2 changed files with 31 additions and 16 deletions
|
@ -128,9 +128,9 @@ public class FailureUrlService {
|
|||
});
|
||||
}
|
||||
|
||||
public void store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) {
|
||||
public FailureUrl store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) {
|
||||
if (e instanceof ContainerNotAvailableException) {
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
final FailureUrlBhv bhv = ComponentUtil.getComponent(FailureUrlBhv.class);
|
||||
|
@ -160,6 +160,7 @@ public class FailureUrlService {
|
|||
bhv.insertOrUpdate(failureUrl, op -> {
|
||||
op.setRefreshPolicy(Constants.TRUE);
|
||||
});
|
||||
return failureUrl;
|
||||
}
|
||||
|
||||
private String getStackTrace(final Throwable t) {
|
||||
|
|
|
@ -20,10 +20,12 @@ import org.apache.logging.log4j.Logger;
|
|||
import org.codelibs.fess.app.service.FailureUrlService;
|
||||
import org.codelibs.fess.crawler.CrawlerContext;
|
||||
import org.codelibs.fess.crawler.entity.UrlQueue;
|
||||
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
|
||||
import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
|
||||
import org.codelibs.fess.crawler.helper.impl.LogHelperImpl;
|
||||
import org.codelibs.fess.crawler.log.LogType;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.es.config.exentity.FailureUrl;
|
||||
import org.codelibs.fess.exception.ContainerNotAvailableException;
|
||||
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
|
@ -84,25 +86,30 @@ public class CrawlerLogHelper extends LogHelperImpl {
|
|||
|
||||
@Override
|
||||
protected void processCrawlingAccessException(final Object... objs) {
|
||||
String failureUrlId = "?";
|
||||
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
|
||||
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
|
||||
final CrawlingAccessException cae = (CrawlingAccessException) objs[2];
|
||||
try {
|
||||
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
|
||||
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
|
||||
Throwable e = (Throwable) objs[2];
|
||||
if (e instanceof MultipleCrawlingAccessException) {
|
||||
final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses();
|
||||
Throwable t = cae;
|
||||
if (t instanceof MultipleCrawlingAccessException mcae) {
|
||||
final Throwable[] causes = mcae.getCauses();
|
||||
if (causes.length > 0) {
|
||||
e = causes[causes.length - 1];
|
||||
t = causes[causes.length - 1];
|
||||
}
|
||||
}
|
||||
|
||||
String errorName;
|
||||
final Throwable cause = e.getCause();
|
||||
final Throwable cause = t.getCause();
|
||||
if (cause != null) {
|
||||
errorName = cause.getClass().getCanonicalName();
|
||||
} else {
|
||||
errorName = e.getClass().getCanonicalName();
|
||||
errorName = t.getClass().getCanonicalName();
|
||||
}
|
||||
FailureUrl failureUrl = storeFailureUrl(crawlerContext, urlQueue, errorName, t);
|
||||
if (failureUrl != null) {
|
||||
failureUrlId = failureUrl.getId();
|
||||
}
|
||||
storeFailureUrl(crawlerContext, urlQueue, errorName, e);
|
||||
} catch (final ContainerNotAvailableException e) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("container was destroyed.");
|
||||
|
@ -118,10 +125,17 @@ public class CrawlerLogHelper extends LogHelperImpl {
|
|||
logger.warn("Failed to store a failure url.", e);
|
||||
}
|
||||
|
||||
super.processCrawlingAccessException(objs);
|
||||
if (objs.length > 1 && objs[1] instanceof final UrlQueue<?> urlQueue) {
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.ACCESS_EXCEPTION);
|
||||
if (cae.isDebugEnabled()) {
|
||||
logger.debug("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
|
||||
} else if (cae.isInfoEnabled()) {
|
||||
logger.info("[{}] {}", failureUrlId, cae.getMessage());
|
||||
} else if (cae.isWarnEnabled()) {
|
||||
logger.warn("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
|
||||
} else if (cae.isErrorEnabled()) {
|
||||
logger.error("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
|
||||
}
|
||||
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.ACCESS_EXCEPTION);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -153,14 +167,14 @@ public class CrawlerLogHelper extends LogHelperImpl {
|
|||
}
|
||||
}
|
||||
|
||||
protected void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
|
||||
protected FailureUrl storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
|
||||
final Throwable e) {
|
||||
|
||||
final CrawlingConfig crawlingConfig = getCrawlingConfig(crawlerContext.getSessionId());
|
||||
final String url = urlQueue.getUrl();
|
||||
|
||||
final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
|
||||
failureUrlService.store(crawlingConfig, errorName, url, e);
|
||||
return failureUrlService.store(crawlingConfig, errorName, url, e);
|
||||
}
|
||||
|
||||
protected CrawlingConfig getCrawlingConfig(final String sessionCountId) {
|
||||
|
|
Loading…
Add table
Reference in a new issue