fix #2640 add fess-urls.log

This commit is contained in:
Shinsuke Sugaya 2022-04-09 11:14:47 +09:00
parent 7d05a2885b
commit 5870c8c8bd
7 changed files with 447 additions and 34 deletions

View file

@ -38,40 +38,70 @@ public class CrawlerLogHelper extends LogHelperImpl {
}
return;
}
super.log(key, objs);
}
@Override
protected void processStartCrawling(Object... objs) {
super.processStartCrawling(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().begin(urlQueue);
}
}
@Override
protected void processCleanupCrawling(Object... objs) {
super.processCleanupCrawling(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().done(urlQueue);
}
}
@Override
protected void processProcessChildUrlByException(Object... objs) {
super.processProcessChildUrlByException(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "child_url");
}
}
@Override
protected void processProcessChildUrlsByException(Object... objs) {
super.processProcessChildUrlsByException(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "child_urls");
}
}
@Override
protected void processFinishedCrawling(Object... objs) {
super.processFinishedCrawling(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "finished");
}
}
@Override
protected void processCrawlingAccessException(final Object... objs) {
try {
switch (key) {
case CRAWLING_ACCESS_EXCEPTION: {
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
Throwable e = (Throwable) objs[2];
if (e instanceof MultipleCrawlingAccessException) {
final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses();
if (causes.length > 0) {
e = causes[causes.length - 1];
}
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
Throwable e = (Throwable) objs[2];
if (e instanceof MultipleCrawlingAccessException) {
final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses();
if (causes.length > 0) {
e = causes[causes.length - 1];
}
}
String errorName;
final Throwable cause = e.getCause();
if (cause != null) {
errorName = cause.getClass().getCanonicalName();
} else {
errorName = e.getClass().getCanonicalName();
}
storeFailureUrl(crawlerContext, urlQueue, errorName, e);
break;
}
case CRAWLING_EXCETPION: {
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
final Throwable e = (Throwable) objs[2];
storeFailureUrl(crawlerContext, urlQueue, e.getClass().getCanonicalName(), e);
break;
}
default:
break;
String errorName;
final Throwable cause = e.getCause();
if (cause != null) {
errorName = cause.getClass().getCanonicalName();
} else {
errorName = e.getClass().getCanonicalName();
}
storeFailureUrl(crawlerContext, urlQueue, errorName, e);
} catch (final ContainerNotAvailableException e) {
if (logger.isDebugEnabled()) {
logger.debug("container was destroyed.");
@ -87,10 +117,42 @@ public class CrawlerLogHelper extends LogHelperImpl {
logger.warn("Failed to store a failure url.", e);
}
super.log(key, objs);
super.processCrawlingAccessException(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "access_exception");
}
}
private void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
@Override
protected void processCrawlingException(final Object... objs) {
try {
final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
final Throwable e = (Throwable) objs[2];
storeFailureUrl(crawlerContext, urlQueue, e.getClass().getCanonicalName(), e);
} catch (final ContainerNotAvailableException e) {
if (logger.isDebugEnabled()) {
logger.debug("container was destroyed.");
}
return;
} catch (final Exception e) {
if (!ComponentUtil.available()) {
if (logger.isDebugEnabled()) {
logger.debug("container was destroyed.");
}
return;
}
logger.warn("Failed to store a failure url.", e);
}
super.processCrawlingException(objs);
if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "exception");
}
}
protected void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
final Throwable e) {
final CrawlingConfig crawlingConfig = getCrawlingConfig(crawlerContext.getSessionId());
@ -100,7 +162,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
failureUrlService.store(crawlingConfig, errorName, url, e);
}
private CrawlingConfig getCrawlingConfig(final String sessionCountId) {
protected CrawlingConfig getCrawlingConfig(final String sessionCountId) {
return ComponentUtil.getCrawlingConfigHelper().get(sessionCountId);
}

View file

@ -0,0 +1,169 @@
/*
* Copyright 2012-2022 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.helper;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import javax.annotation.PostConstruct;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.fess.crawler.entity.UrlQueue;
import org.codelibs.fess.taglib.FessFunctions;
import org.dbflute.optional.OptionalThing;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
/**
* @author shinsuke
*
*/
public class CrawlerStatsHelper {
private static final String BEGIN_KEY = "begin";
protected Logger logger = null;
protected String loggerName = "fess.log.crawler.stats";
protected long maxCacheSize = 1000;
protected long cacheExpireAfterWrite = 10 * 60 * 1000L;
protected LoadingCache<String, Map<String, Long>> statsCache;
@PostConstruct
public void init() {
logger = LogManager.getLogger(loggerName);
statsCache = CacheBuilder.newBuilder().maximumSize(maxCacheSize).expireAfterWrite(cacheExpireAfterWrite, TimeUnit.MILLISECONDS)
.build(new CacheLoader<String, Map<String, Long>>() {
@Override
public Map<String, Long> load(String key) {
Map<String, Long> map = new LinkedHashMap<>();
map.put(BEGIN_KEY, System.currentTimeMillis());
return map;
}
});
}
public void begin(final Object keyObj) {
getCacheKey(keyObj).ifPresent(key -> {
try {
statsCache.get(key);
} catch (Exception e) {
final StringBuilder buf = createStringBuffer(keyObj, System.currentTimeMillis());
buf.append('\t').append("action:begin");
buf.append('\t').append("error:").append(escapeValue(e.getLocalizedMessage()).replaceAll("\\s", " "));
log(buf);
}
});
}
public void record(final Object keyObj, final String action) {
getCacheKey(keyObj).ifPresent(key -> {
try {
Map<String, Long> data = statsCache.getIfPresent(key);
if (data != null) {
data.put(escapeValue(action), System.currentTimeMillis());
}
} catch (Exception e) {
final StringBuilder buf = createStringBuffer(keyObj, System.currentTimeMillis());
buf.append('\t').append("action:record");
buf.append('\t').append("error:").append(escapeValue(e.getLocalizedMessage()).replaceAll("\\s", " "));
log(buf);
}
});
}
public void done(final Object keyObj) {
getCacheKey(keyObj).ifPresent(key -> {
try {
final Map<String, Long> data = statsCache.getIfPresent(key);
if (data != null) {
statsCache.invalidate(key);
final Long begin = data.remove(BEGIN_KEY);
if (begin != null) {
final StringBuilder buf = createStringBuffer(keyObj, begin.longValue());
buf.append('\t').append("done:").append(System.currentTimeMillis() - begin.longValue());
data.entrySet().stream().map(e -> escapeValue(e.getKey()) + ":" + (e.getValue().longValue() - begin.longValue()))
.map(s -> "\t" + s).forEach(s -> buf.append(s));
log(buf);
}
}
} catch (Exception e) {
final StringBuilder buf = createStringBuffer(keyObj, System.currentTimeMillis());
buf.append('\t').append("action:done");
buf.append('\t').append("error:").append(escapeValue(e.getLocalizedMessage()).replaceAll("\\s", " "));
log(buf);
}
});
}
private StringBuilder createStringBuffer(final Object keyObj, final long time) {
final StringBuilder buf = new StringBuilder(1000);
buf.append("url:").append(getUrl(keyObj));
buf.append('\t');
buf.append("time:").append(FessFunctions.formatDate(new Date(time)));
return buf;
}
protected String getUrl(final Object keyObj) {
if (keyObj instanceof UrlQueue<?> urlQueue) {
return escapeValue(urlQueue.getUrl());
} else if (keyObj instanceof String key) {
return escapeValue(key);
} else if (keyObj instanceof Number key) {
return key.toString();
}
return "-";
}
protected OptionalThing<String> getCacheKey(final Object keyObj) {
if (keyObj instanceof UrlQueue<?> urlQueue) {
return OptionalThing.of(urlQueue.getId().toString());
} else if (keyObj instanceof String key) {
return OptionalThing.of(key);
} else if (keyObj instanceof Number key) {
return OptionalThing.of(key.toString());
}
return OptionalThing.empty();
}
protected String escapeValue(String action) {
return action.replace('\t', ' ');
}
protected void log(final StringBuilder buf) {
logger.info(buf.toString());
}
public void setLoggerName(String loggerName) {
this.loggerName = loggerName;
}
public void setMaxCacheSize(long maxCacheSize) {
this.maxCacheSize = maxCacheSize;
}
public void setCacheExpireAfterWrite(long cacheExpireAfterWrite) {
this.cacheExpireAfterWrite = cacheExpireAfterWrite;
}
}

View file

@ -39,6 +39,7 @@ import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.exception.ContainerNotAvailableException;
import org.codelibs.fess.helper.AccessTokenHelper;
import org.codelibs.fess.helper.ActivityHelper;
import org.codelibs.fess.helper.CrawlerStatsHelper;
import org.codelibs.fess.helper.CrawlingConfigHelper;
import org.codelibs.fess.helper.CrawlingInfoHelper;
import org.codelibs.fess.helper.CurlHelper;
@ -208,6 +209,8 @@ public final class ComponentUtil {
private static final String RELATED_QUERY_HELPER = "relatedQueryHelper";
private static final String CRAWLER_STATS_HELPER = "crawlerStatsHelper";
private static IndexingHelper indexingHelper;
private static CrawlingConfigHelper crawlingConfigHelper;
@ -505,6 +508,10 @@ public final class ComponentUtil {
return getComponent(SCRIPT_ENGINE_FACTORY);
}
public static CrawlerStatsHelper getCrawlerStatsHelper() {
return getComponent(CRAWLER_STATS_HELPER);
}
public static <T> T getComponent(final Class<T> clazz) {
try {
return SingletonLaContainer.getComponent(clazz);

View file

@ -44,6 +44,9 @@
</postConstruct>
-->
</component>
<component name="crawlerStatsHelper"
class="org.codelibs.fess.helper.CrawlerStatsHelper">
</component>
<component name="fessCrawler" class="org.codelibs.fess.exec.Crawler"
instance="prototype">
</component>

View file

@ -0,0 +1,78 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Properties>
<Property name="domain.name" value="${sys:fess.log.name:-fess}" />
<Property name="log.level" value="${sys:fess.log.level:-debug}" />
<Property name="root.log.level" value="${sys:fess.log.level:-info}" />
<Property name="log.pattern" value="%d [%t] %-5p %msg%n" />
<Property name="log.file.basedir" value="${sys:fess.log.path:-target/logs}" />
<Property name="backup.date.suffix" value="_%d{yyyyMMdd}" />
<Property name="backup.max.history" value="90" />
<Property name="stats.log.pattern" value="%msg%n" />
</Properties>
<Appenders>
<Routing name="Console">
<Routes>
<Script name="RoutingInit" language="JavaScript"><![CDATA[
java.lang.System.getProperty("lasta.env") == null ? "Output" : "NoOutput";]]>
</Script>
<Route key="Output">
<Console name="OutputConsole" target="SYSTEM_OUT">
<PatternLayout pattern="${log.pattern}" />
</Console>
</Route>
<Route key="NoOutput">
<CountingNoOp name="DevNull" />
</Route>
</Routes>
</Routing>
<RollingFile name="AppFile" fileName="${log.file.basedir}/${domain.name}.log"
filePattern="${log.file.basedir}/${domain.name}${backup.date.suffix}-%i.log.gz">
<PatternLayout>
<Pattern>${log.pattern}</Pattern>
</PatternLayout>
<Policies>
<TimeBasedTriggeringPolicy />
<SizeBasedTriggeringPolicy size="100 MB" />
</Policies>
<DefaultRolloverStrategy fileIndex="max" min="1"
max="${backup.max.history}" compressionLevel="9" />
</RollingFile>
<RollingFile name="StatsFile" fileName="${log.file.basedir}/fess-urls.log"
filePattern="${log.file.basedir}/fess-urls${backup.date.suffix}-%i.log.gz">
<PatternLayout>
<Pattern>${stats.log.pattern}</Pattern>
</PatternLayout>
<Policies>
<TimeBasedTriggeringPolicy />
<SizeBasedTriggeringPolicy size="100 MB" />
</Policies>
<DefaultRolloverStrategy fileIndex="max" min="1"
max="${backup.max.history}" compressionLevel="9" />
</RollingFile>
</Appenders>
<Loggers>
<Logger name="org.codelibs" additivity="false" level="${log.level}">
<AppenderRef ref="Console" />
<AppenderRef ref="AppFile" />
</Logger>
<Logger name="org.dbflute" additivity="false" level="${log.level}">
<AppenderRef ref="Console" />
<AppenderRef ref="AppFile" />
</Logger>
<Logger name="org.lastaflute" additivity="false" level="${log.level}">
<AppenderRef ref="Console" />
<AppenderRef ref="AppFile" />
</Logger>
<Logger name="fess.log.crawler.stats" additivity="false" level="info">
<AppenderRef ref="StatsFile" />
</Logger>
<Root level="${root.log.level}">
<AppenderRef ref="Console" />
<AppenderRef ref="AppFile" />
</Root>
</Loggers>
</Configuration>

View file

@ -9,7 +9,6 @@
<Property name="log.file.basedir" value="${sys:fess.log.path:-target/logs}" />
<Property name="backup.date.suffix" value="_%d{yyyyMMdd}" />
<Property name="backup.max.history" value="90" />
<Property name="audit.log.pattern" value="%msg%n" />
</Properties>
<Appenders>

View file

@ -0,0 +1,95 @@
/*
* Copyright 2012-2022 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.helper;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.fess.unit.UnitFessTestCase;
public class CrawlerStatsHelperTest extends UnitFessTestCase {
private static final Logger logger = LogManager.getLogger(CrawlerStatsHelperTest.class);
private CrawlerStatsHelper crawlerStatsHelper;
private ThreadLocal<String> localLogMsg = new ThreadLocal<>();
@Override
public void setUp() throws Exception {
super.setUp();
crawlerStatsHelper = new CrawlerStatsHelper() {
@Override
protected void log(final StringBuilder buf) {
localLogMsg.set(buf.toString());
}
};
crawlerStatsHelper.init();
}
public void test_beginDone() {
String key = "test";
crawlerStatsHelper.begin(key);
crawlerStatsHelper.done(key);
logger.info(localLogMsg.get());
String[] values = localLogMsg.get().split("\t");
assertEquals(3, values.length);
assertEquals("url:test", values[0]);
assertTrue(values[1].startsWith("time:"));
assertTrue(values[2].startsWith("done:"));
localLogMsg.remove();
crawlerStatsHelper.done(key);
assertNull(localLogMsg.get());
}
public void test_beginDoneWithRecord1() {
String key = "test";
crawlerStatsHelper.begin(key);
crawlerStatsHelper.record(key, "aaa");
crawlerStatsHelper.done(key);
logger.info(localLogMsg.get());
String[] values = localLogMsg.get().split("\t");
assertEquals(4, values.length);
assertEquals("url:test", values[0]);
assertTrue(values[1].startsWith("time:"));
assertTrue(values[2].startsWith("done:"));
assertTrue(values[3].startsWith("aaa:"));
localLogMsg.remove();
crawlerStatsHelper.done(key);
assertNull(localLogMsg.get());
}
public void test_beginDoneWithRecord2() {
String key = "test";
crawlerStatsHelper.begin(key);
crawlerStatsHelper.record(key, "aaa");
crawlerStatsHelper.record(key, "bbb");
crawlerStatsHelper.done(key);
logger.info(localLogMsg.get());
String[] values = localLogMsg.get().split("\t");
assertEquals(5, values.length);
assertEquals("url:test", values[0]);
assertTrue(values[1].startsWith("time:"));
assertTrue(values[2].startsWith("done:"));
assertTrue(values[3].startsWith("aaa:"));
assertTrue(values[4].startsWith("bbb:"));
localLogMsg.remove();
crawlerStatsHelper.done(key);
assertNull(localLogMsg.get());
}
}