fix #2823 Log Failure URL ID in CrawlingAccessException in fess.log

This commit is contained in:
Shinsuke Sugaya 2024-06-24 10:20:17 +09:00
parent 0a9a4f7306
commit 78070c9e74
2 changed files with 31 additions and 16 deletions

View file

@ -128,9 +128,9 @@ public class FailureUrlService {
});
}
public void store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) {
public FailureUrl store(final CrawlingConfig crawlingConfig, final String errorName, final String url, final Throwable e) {
if (e instanceof ContainerNotAvailableException) {
return;
return null;
}
final FailureUrlBhv bhv = ComponentUtil.getComponent(FailureUrlBhv.class);
@ -160,6 +160,7 @@ public class FailureUrlService {
bhv.insertOrUpdate(failureUrl, op -> {
op.setRefreshPolicy(Constants.TRUE);
});
return failureUrl;
}
private String getStackTrace(final Throwable t) {

View file

@ -20,10 +20,12 @@ import org.apache.logging.log4j.Logger;
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.crawler.CrawlerContext;
import org.codelibs.fess.crawler.entity.UrlQueue;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
import org.codelibs.fess.crawler.helper.impl.LogHelperImpl;
import org.codelibs.fess.crawler.log.LogType;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.es.config.exentity.FailureUrl;
import org.codelibs.fess.exception.ContainerNotAvailableException;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
import org.codelibs.fess.util.ComponentUtil;
@ -84,25 +86,30 @@ public class CrawlerLogHelper extends LogHelperImpl {
@Override
protected void processCrawlingAccessException(final Object... objs) {
String failureUrlId = "?";
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
final CrawlingAccessException cae = (CrawlingAccessException) objs[2];
try {
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
Throwable e = (Throwable) objs[2];
if (e instanceof MultipleCrawlingAccessException) {
final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses();
Throwable t = cae;
if (t instanceof MultipleCrawlingAccessException mcae) {
final Throwable[] causes = mcae.getCauses();
if (causes.length > 0) {
e = causes[causes.length - 1];
t = causes[causes.length - 1];
}
}
String errorName;
final Throwable cause = e.getCause();
final Throwable cause = t.getCause();
if (cause != null) {
errorName = cause.getClass().getCanonicalName();
} else {
errorName = e.getClass().getCanonicalName();
errorName = t.getClass().getCanonicalName();
}
FailureUrl failureUrl = storeFailureUrl(crawlerContext, urlQueue, errorName, t);
if (failureUrl != null) {
failureUrlId = failureUrl.getId();
}
storeFailureUrl(crawlerContext, urlQueue, errorName, e);
} catch (final ContainerNotAvailableException e) {
if (logger.isDebugEnabled()) {
logger.debug("container was destroyed.");
@ -118,10 +125,17 @@ public class CrawlerLogHelper extends LogHelperImpl {
logger.warn("Failed to store a failure url.", e);
}
super.processCrawlingAccessException(objs);
if (objs.length > 1 && objs[1] instanceof final UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.ACCESS_EXCEPTION);
if (cae.isDebugEnabled()) {
logger.debug("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
} else if (cae.isInfoEnabled()) {
logger.info("[{}] {}", failureUrlId, cae.getMessage());
} else if (cae.isWarnEnabled()) {
logger.warn("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
} else if (cae.isErrorEnabled()) {
logger.error("[{}] Crawling Access Exception at {}", failureUrlId, urlQueue.getUrl(), cae);
}
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.ACCESS_EXCEPTION);
}
@Override
@ -153,14 +167,14 @@ public class CrawlerLogHelper extends LogHelperImpl {
}
}
protected void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
protected FailureUrl storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
final Throwable e) {
final CrawlingConfig crawlingConfig = getCrawlingConfig(crawlerContext.getSessionId());
final String url = urlQueue.getUrl();
final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
failureUrlService.store(crawlingConfig, errorName, url, e);
return failureUrlService.store(crawlingConfig, errorName, url, e);
}
protected CrawlingConfig getCrawlingConfig(final String sessionCountId) {