#2640 add StatsAction

This commit is contained in:
Shinsuke Sugaya 2022-04-12 21:39:58 +09:00
parent 4126a9dc69
commit 45b062c6e4
3 changed files with 32 additions and 11 deletions

View file

@ -49,6 +49,7 @@ import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.exception.DataStoreCrawlingException;
import org.codelibs.fess.helper.CrawlerStatsHelper;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsKeyObject;
import org.codelibs.fess.helper.IndexingHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
@ -147,20 +148,20 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
if (keyObj != null) {
keyObj.setUrl(processingUrl);
}
crawlerStatsHelper.record(keyObj, "prepared");
crawlerStatsHelper.record(keyObj, StatsAction.PREPARED);
processingUrl = processRequest(paramMap, localDataMap, processingUrl, client);
if (processingUrl == null) {
break;
}
counter++;
localDataMap.put(fessConfig.getIndexFieldUrl(), processingUrl);
crawlerStatsHelper.record(keyObj, "redirected");
crawlerStatsHelper.record(keyObj, StatsAction.REDIRECTED);
}
} catch (final ChildUrlsException e) {
crawlerStatsHelper.record(keyObj, "child_urls");
crawlerStatsHelper.record(keyObj, StatsAction.CHILD_URLS);
e.getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
} catch (final DataStoreCrawlingException e) {
crawlerStatsHelper.record(keyObj, "crawling_exception");
crawlerStatsHelper.record(keyObj, StatsAction.CRAWLING_EXCEPTION);
final Throwable cause = e.getCause();
if (cause instanceof ChildUrlsException) {
((ChildUrlsException) cause).getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
@ -225,7 +226,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
throw new CrawlerSystemException("Could not create an instance from bytes.", e);
}
}
crawlerStatsHelper.record(keyObj, "accessed");
crawlerStatsHelper.record(keyObj, StatsAction.ACCESSED);
// remove
String[] ignoreFields;
@ -237,7 +238,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
stream(ignoreFields).of(stream -> stream.map(String::trim).forEach(s -> dataMap.remove(s)));
indexUpdateCallback.store(paramMap, dataMap);
crawlerStatsHelper.record(keyObj, "processed");
crawlerStatsHelper.record(keyObj, StatsAction.PROCESSED);
} else {
logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: {}, Data: {}",
responseProcessor, dataMap);

View file

@ -25,6 +25,7 @@ import org.codelibs.fess.crawler.helper.impl.LogHelperImpl;
import org.codelibs.fess.crawler.log.LogType;
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
import org.codelibs.fess.exception.ContainerNotAvailableException;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
import org.codelibs.fess.util.ComponentUtil;
public class CrawlerLogHelper extends LogHelperImpl {
@ -61,7 +62,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
protected void processProcessChildUrlByException(Object... objs) {
super.processProcessChildUrlByException(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "child_url");
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.CHILD_URL);
}
}
@ -69,7 +70,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
protected void processProcessChildUrlsByException(Object... objs) {
super.processProcessChildUrlsByException(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "child_urls");
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.CHILD_URLS);
}
}
@ -77,7 +78,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
protected void processFinishedCrawling(Object... objs) {
super.processFinishedCrawling(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "finished");
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.FINISHED);
}
}
@ -119,7 +120,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
super.processCrawlingAccessException(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "access_exception");
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.CRAWLING_EXCEPTION);
}
}
@ -148,7 +149,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
super.processCrawlingException(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "exception");
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.CRAWLING_EXCEPTION);
}
}

View file

@ -17,6 +17,7 @@ package org.codelibs.fess.helper;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@ -76,6 +77,10 @@ public class CrawlerStatsHelper {
});
}
public void record(final Object keyObj, final StatsAction action) {
record(keyObj, action.name().toLowerCase(Locale.ENGLISH));
}
public void record(final Object keyObj, final String action) {
getCacheKey(keyObj).ifPresent(key -> {
try {
@ -195,4 +200,18 @@ public class CrawlerStatsHelper {
return id;
}
}
public enum StatsAction {
ACCESSED, //
CHILD_URL, //
CHILD_URLS, //
CRAWLING_EXCEPTION, //
EVALUATED, //
EXCEPTION, //
FINISHED, //
PARSED, //
PREPARED, //
REDIRECTED, //
PROCESSED,//
}
}