#2640 add StatsAction
This commit is contained in:
parent
4126a9dc69
commit
45b062c6e4
3 changed files with 32 additions and 11 deletions
|
@ -49,6 +49,7 @@ import org.codelibs.fess.entity.DataStoreParams;
|
|||
import org.codelibs.fess.es.client.SearchEngineClient;
|
||||
import org.codelibs.fess.exception.DataStoreCrawlingException;
|
||||
import org.codelibs.fess.helper.CrawlerStatsHelper;
|
||||
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
|
||||
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsKeyObject;
|
||||
import org.codelibs.fess.helper.IndexingHelper;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
|
@ -147,20 +148,20 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
if (keyObj != null) {
|
||||
keyObj.setUrl(processingUrl);
|
||||
}
|
||||
crawlerStatsHelper.record(keyObj, "prepared");
|
||||
crawlerStatsHelper.record(keyObj, StatsAction.PREPARED);
|
||||
processingUrl = processRequest(paramMap, localDataMap, processingUrl, client);
|
||||
if (processingUrl == null) {
|
||||
break;
|
||||
}
|
||||
counter++;
|
||||
localDataMap.put(fessConfig.getIndexFieldUrl(), processingUrl);
|
||||
crawlerStatsHelper.record(keyObj, "redirected");
|
||||
crawlerStatsHelper.record(keyObj, StatsAction.REDIRECTED);
|
||||
}
|
||||
} catch (final ChildUrlsException e) {
|
||||
crawlerStatsHelper.record(keyObj, "child_urls");
|
||||
crawlerStatsHelper.record(keyObj, StatsAction.CHILD_URLS);
|
||||
e.getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
|
||||
} catch (final DataStoreCrawlingException e) {
|
||||
crawlerStatsHelper.record(keyObj, "crawling_exception");
|
||||
crawlerStatsHelper.record(keyObj, StatsAction.CRAWLING_EXCEPTION);
|
||||
final Throwable cause = e.getCause();
|
||||
if (cause instanceof ChildUrlsException) {
|
||||
((ChildUrlsException) cause).getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
|
||||
|
@ -225,7 +226,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
throw new CrawlerSystemException("Could not create an instance from bytes.", e);
|
||||
}
|
||||
}
|
||||
crawlerStatsHelper.record(keyObj, "accessed");
|
||||
crawlerStatsHelper.record(keyObj, StatsAction.ACCESSED);
|
||||
|
||||
// remove
|
||||
String[] ignoreFields;
|
||||
|
@ -237,7 +238,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
stream(ignoreFields).of(stream -> stream.map(String::trim).forEach(s -> dataMap.remove(s)));
|
||||
|
||||
indexUpdateCallback.store(paramMap, dataMap);
|
||||
crawlerStatsHelper.record(keyObj, "processed");
|
||||
crawlerStatsHelper.record(keyObj, StatsAction.PROCESSED);
|
||||
} else {
|
||||
logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: {}, Data: {}",
|
||||
responseProcessor, dataMap);
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.codelibs.fess.crawler.helper.impl.LogHelperImpl;
|
|||
import org.codelibs.fess.crawler.log.LogType;
|
||||
import org.codelibs.fess.es.config.exentity.CrawlingConfig;
|
||||
import org.codelibs.fess.exception.ContainerNotAvailableException;
|
||||
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
|
||||
public class CrawlerLogHelper extends LogHelperImpl {
|
||||
|
@ -61,7 +62,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
|
|||
protected void processProcessChildUrlByException(Object... objs) {
|
||||
super.processProcessChildUrlByException(objs);
|
||||
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "child_url");
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.CHILD_URL);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,7 +70,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
|
|||
protected void processProcessChildUrlsByException(Object... objs) {
|
||||
super.processProcessChildUrlsByException(objs);
|
||||
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "child_urls");
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.CHILD_URLS);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,7 +78,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
|
|||
protected void processFinishedCrawling(Object... objs) {
|
||||
super.processFinishedCrawling(objs);
|
||||
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "finished");
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.FINISHED);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -119,7 +120,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
|
|||
|
||||
super.processCrawlingAccessException(objs);
|
||||
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "access_exception");
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.CRAWLING_EXCEPTION);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,7 +149,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
|
|||
|
||||
super.processCrawlingException(objs);
|
||||
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "exception");
|
||||
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, StatsAction.CRAWLING_EXCEPTION);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.codelibs.fess.helper;
|
|||
|
||||
import java.util.Date;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
|
@ -76,6 +77,10 @@ public class CrawlerStatsHelper {
|
|||
});
|
||||
}
|
||||
|
||||
public void record(final Object keyObj, final StatsAction action) {
|
||||
record(keyObj, action.name().toLowerCase(Locale.ENGLISH));
|
||||
}
|
||||
|
||||
public void record(final Object keyObj, final String action) {
|
||||
getCacheKey(keyObj).ifPresent(key -> {
|
||||
try {
|
||||
|
@ -195,4 +200,18 @@ public class CrawlerStatsHelper {
|
|||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
public enum StatsAction {
|
||||
ACCESSED, //
|
||||
CHILD_URL, //
|
||||
CHILD_URLS, //
|
||||
CRAWLING_EXCEPTION, //
|
||||
EVALUATED, //
|
||||
EXCEPTION, //
|
||||
FINISHED, //
|
||||
PARSED, //
|
||||
PREPARED, //
|
||||
REDIRECTED, //
|
||||
PROCESSED,//
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue