Przeglądaj źródła

fix #2640 add fess-urls.log

Shinsuke Sugaya 3 lat temu
rodzic
commit
5870c8c8bd

+ 93 - 31
src/main/java/org/codelibs/fess/helper/CrawlerLogHelper.java

@@ -38,40 +38,99 @@ public class CrawlerLogHelper extends LogHelperImpl {
             }
             return;
         }
-        try {
-            switch (key) {
-            case CRAWLING_ACCESS_EXCEPTION: {
-                final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
-                final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
-                Throwable e = (Throwable) objs[2];
-                if (e instanceof MultipleCrawlingAccessException) {
-                    final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses();
-                    if (causes.length > 0) {
-                        e = causes[causes.length - 1];
-                    }
-                }
+        super.log(key, objs);
+    }
+
+    @Override
+    protected void processStartCrawling(Object... objs) {
+        super.processStartCrawling(objs);
+        if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
+            ComponentUtil.getCrawlerStatsHelper().begin(urlQueue);
+        }
+    }
+
+    @Override
+    protected void processCleanupCrawling(Object... objs) {
+        super.processCleanupCrawling(objs);
+        if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
+            ComponentUtil.getCrawlerStatsHelper().done(urlQueue);
+        }
+    }
+
+    @Override
+    protected void processProcessChildUrlByException(Object... objs) {
+        super.processProcessChildUrlByException(objs);
+        if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
+            ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "child_url");
+        }
+    }
+
+    @Override
+    protected void processProcessChildUrlsByException(Object... objs) {
+        super.processProcessChildUrlsByException(objs);
+        if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
+            ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "child_urls");
+        }
+    }
+
+    @Override
+    protected void processFinishedCrawling(Object... objs) {
+        super.processFinishedCrawling(objs);
+        if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
+            ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "finished");
+        }
+    }
 
-                String errorName;
-                final Throwable cause = e.getCause();
-                if (cause != null) {
-                    errorName = cause.getClass().getCanonicalName();
-                } else {
-                    errorName = e.getClass().getCanonicalName();
+    @Override
+    protected void processCrawlingAccessException(final Object... objs) {
+        try {
+            final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
+            final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
+            Throwable e = (Throwable) objs[2];
+            if (e instanceof MultipleCrawlingAccessException) {
+                final Throwable[] causes = ((MultipleCrawlingAccessException) e).getCauses();
+                if (causes.length > 0) {
+                    e = causes[causes.length - 1];
                 }
-                storeFailureUrl(crawlerContext, urlQueue, errorName, e);
-                break;
             }
-            case CRAWLING_EXCETPION: {
-                final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
-                final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
-                final Throwable e = (Throwable) objs[2];
 
-                storeFailureUrl(crawlerContext, urlQueue, e.getClass().getCanonicalName(), e);
-                break;
+            String errorName;
+            final Throwable cause = e.getCause();
+            if (cause != null) {
+                errorName = cause.getClass().getCanonicalName();
+            } else {
+                errorName = e.getClass().getCanonicalName();
+            }
+            storeFailureUrl(crawlerContext, urlQueue, errorName, e);
+        } catch (final ContainerNotAvailableException e) {
+            if (logger.isDebugEnabled()) {
+                logger.debug("container was destroyed.");
             }
-            default:
-                break;
+            return;
+        } catch (final Exception e) {
+            if (!ComponentUtil.available()) {
+                if (logger.isDebugEnabled()) {
+                    logger.debug("container was destroyed.");
+                }
+                return;
             }
+            logger.warn("Failed to store a failure url.", e);
+        }
+
+        super.processCrawlingAccessException(objs);
+        if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
+            ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "access_exception");
+        }
+    }
+
+    @Override
+    protected void processCrawlingException(final Object... objs) {
+        try {
+            final CrawlerContext crawlerContext = (CrawlerContext) objs[0];
+            final UrlQueue<?> urlQueue = (UrlQueue<?>) objs[1];
+            final Throwable e = (Throwable) objs[2];
+
+            storeFailureUrl(crawlerContext, urlQueue, e.getClass().getCanonicalName(), e);
         } catch (final ContainerNotAvailableException e) {
             if (logger.isDebugEnabled()) {
                 logger.debug("container was destroyed.");
@@ -87,10 +146,13 @@ public class CrawlerLogHelper extends LogHelperImpl {
             logger.warn("Failed to store a failure url.", e);
         }
 
-        super.log(key, objs);
+        super.processCrawlingException(objs);
+        if (objs.length > 1 && objs[1] instanceof UrlQueue<?> urlQueue) {
+            ComponentUtil.getCrawlerStatsHelper().record(urlQueue, "exception");
+        }
     }
 
-    private void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
+    protected void storeFailureUrl(final CrawlerContext crawlerContext, final UrlQueue<?> urlQueue, final String errorName,
             final Throwable e) {
 
         final CrawlingConfig crawlingConfig = getCrawlingConfig(crawlerContext.getSessionId());
@@ -100,7 +162,7 @@ public class CrawlerLogHelper extends LogHelperImpl {
         failureUrlService.store(crawlingConfig, errorName, url, e);
     }
 
-    private CrawlingConfig getCrawlingConfig(final String sessionCountId) {
+    protected CrawlingConfig getCrawlingConfig(final String sessionCountId) {
         return ComponentUtil.getCrawlingConfigHelper().get(sessionCountId);
     }
 

+ 169 - 0
src/main/java/org/codelibs/fess/helper/CrawlerStatsHelper.java

@@ -0,0 +1,169 @@
+/*
+ * Copyright 2012-2022 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.helper;
+
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import javax.annotation.PostConstruct;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.codelibs.fess.crawler.entity.UrlQueue;
+import org.codelibs.fess.taglib.FessFunctions;
+import org.dbflute.optional.OptionalThing;
+
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+
+/**
+ * @author shinsuke
+ *
+ */
+public class CrawlerStatsHelper {
+    private static final String BEGIN_KEY = "begin";
+
+    protected Logger logger = null;
+
+    protected String loggerName = "fess.log.crawler.stats";
+
+    protected long maxCacheSize = 1000;
+
+    protected long cacheExpireAfterWrite = 10 * 60 * 1000L;
+
+    protected LoadingCache<String, Map<String, Long>> statsCache;
+
+    @PostConstruct
+    public void init() {
+        logger = LogManager.getLogger(loggerName);
+        statsCache = CacheBuilder.newBuilder().maximumSize(maxCacheSize).expireAfterWrite(cacheExpireAfterWrite, TimeUnit.MILLISECONDS)
+                .build(new CacheLoader<String, Map<String, Long>>() {
+                    @Override
+                    public Map<String, Long> load(String key) {
+                        Map<String, Long> map = new LinkedHashMap<>();
+                        map.put(BEGIN_KEY, System.currentTimeMillis());
+                        return map;
+                    }
+                });
+    }
+
+    public void begin(final Object keyObj) {
+        getCacheKey(keyObj).ifPresent(key -> {
+            try {
+                statsCache.get(key);
+            } catch (Exception e) {
+                final StringBuilder buf = createStringBuffer(keyObj, System.currentTimeMillis());
+                buf.append('\t').append("action:begin");
+                buf.append('\t').append("error:").append(escapeValue(e.getLocalizedMessage()).replaceAll("\\s", " "));
+                log(buf);
+            }
+        });
+    }
+
+    public void record(final Object keyObj, final String action) {
+        getCacheKey(keyObj).ifPresent(key -> {
+            try {
+                Map<String, Long> data = statsCache.getIfPresent(key);
+                if (data != null) {
+                    data.put(escapeValue(action), System.currentTimeMillis());
+                }
+            } catch (Exception e) {
+                final StringBuilder buf = createStringBuffer(keyObj, System.currentTimeMillis());
+                buf.append('\t').append("action:record");
+                buf.append('\t').append("error:").append(escapeValue(e.getLocalizedMessage()).replaceAll("\\s", " "));
+                log(buf);
+            }
+        });
+    }
+
+    public void done(final Object keyObj) {
+        getCacheKey(keyObj).ifPresent(key -> {
+            try {
+                final Map<String, Long> data = statsCache.getIfPresent(key);
+                if (data != null) {
+                    statsCache.invalidate(key);
+                    final Long begin = data.remove(BEGIN_KEY);
+                    if (begin != null) {
+                        final StringBuilder buf = createStringBuffer(keyObj, begin.longValue());
+                        buf.append('\t').append("done:").append(System.currentTimeMillis() - begin.longValue());
+                        data.entrySet().stream().map(e -> escapeValue(e.getKey()) + ":" + (e.getValue().longValue() - begin.longValue()))
+                                .map(s -> "\t" + s).forEach(s -> buf.append(s));
+                        log(buf);
+                    }
+                }
+            } catch (Exception e) {
+                final StringBuilder buf = createStringBuffer(keyObj, System.currentTimeMillis());
+                buf.append('\t').append("action:done");
+                buf.append('\t').append("error:").append(escapeValue(e.getLocalizedMessage()).replaceAll("\\s", " "));
+                log(buf);
+            }
+        });
+    }
+
+    private StringBuilder createStringBuffer(final Object keyObj, final long time) {
+        final StringBuilder buf = new StringBuilder(1000);
+        buf.append("url:").append(getUrl(keyObj));
+        buf.append('\t');
+        buf.append("time:").append(FessFunctions.formatDate(new Date(time)));
+        return buf;
+    }
+
+    protected String getUrl(final Object keyObj) {
+        if (keyObj instanceof UrlQueue<?> urlQueue) {
+            return escapeValue(urlQueue.getUrl());
+        } else if (keyObj instanceof String key) {
+            return escapeValue(key);
+        } else if (keyObj instanceof Number key) {
+            return key.toString();
+        }
+        return "-";
+    }
+
+    protected OptionalThing<String> getCacheKey(final Object keyObj) {
+        if (keyObj instanceof UrlQueue<?> urlQueue) {
+            return OptionalThing.of(urlQueue.getId().toString());
+        } else if (keyObj instanceof String key) {
+            return OptionalThing.of(key);
+        } else if (keyObj instanceof Number key) {
+            return OptionalThing.of(key.toString());
+        }
+        return OptionalThing.empty();
+    }
+
+    protected String escapeValue(String action) {
+        return action.replace('\t', ' ');
+    }
+
+    protected void log(final StringBuilder buf) {
+        logger.info(buf.toString());
+    }
+
+    public void setLoggerName(String loggerName) {
+        this.loggerName = loggerName;
+    }
+
+    public void setMaxCacheSize(long maxCacheSize) {
+        this.maxCacheSize = maxCacheSize;
+    }
+
+    public void setCacheExpireAfterWrite(long cacheExpireAfterWrite) {
+        this.cacheExpireAfterWrite = cacheExpireAfterWrite;
+    }
+
+}

+ 7 - 0
src/main/java/org/codelibs/fess/util/ComponentUtil.java

@@ -39,6 +39,7 @@ import org.codelibs.fess.es.client.SearchEngineClient;
 import org.codelibs.fess.exception.ContainerNotAvailableException;
 import org.codelibs.fess.helper.AccessTokenHelper;
 import org.codelibs.fess.helper.ActivityHelper;
+import org.codelibs.fess.helper.CrawlerStatsHelper;
 import org.codelibs.fess.helper.CrawlingConfigHelper;
 import org.codelibs.fess.helper.CrawlingInfoHelper;
 import org.codelibs.fess.helper.CurlHelper;
@@ -208,6 +209,8 @@ public final class ComponentUtil {
 
     private static final String RELATED_QUERY_HELPER = "relatedQueryHelper";
 
+    private static final String CRAWLER_STATS_HELPER = "crawlerStatsHelper";
+
     private static IndexingHelper indexingHelper;
 
     private static CrawlingConfigHelper crawlingConfigHelper;
@@ -505,6 +508,10 @@ public final class ComponentUtil {
         return getComponent(SCRIPT_ENGINE_FACTORY);
     }
 
+    public static CrawlerStatsHelper getCrawlerStatsHelper() {
+        return getComponent(CRAWLER_STATS_HELPER);
+    }
+
     public static <T> T getComponent(final Class<T> clazz) {
         try {
             return SingletonLaContainer.getComponent(clazz);

+ 3 - 0
src/main/webapp/WEB-INF/env/crawler/resources/app.xml

@@ -44,6 +44,9 @@
 		</postConstruct>
 		 -->
 	</component>
+	<component name="crawlerStatsHelper"
+		class="org.codelibs.fess.helper.CrawlerStatsHelper">
+	</component>
 	<component name="fessCrawler" class="org.codelibs.fess.exec.Crawler"
 		instance="prototype">
 	</component>

+ 78 - 0
src/main/webapp/WEB-INF/env/crawler/resources/log4j2.xml

@@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Configuration status="WARN">
+
+	<Properties>
+		<Property name="domain.name" value="${sys:fess.log.name:-fess}" />
+		<Property name="log.level" value="${sys:fess.log.level:-debug}" />
+		<Property name="root.log.level" value="${sys:fess.log.level:-info}" />
+		<Property name="log.pattern" value="%d [%t] %-5p %msg%n" />
+		<Property name="log.file.basedir" value="${sys:fess.log.path:-target/logs}" />
+		<Property name="backup.date.suffix" value="_%d{yyyyMMdd}" />
+		<Property name="backup.max.history" value="90" />
+		<Property name="stats.log.pattern" value="%msg%n" />
+	</Properties>
+
+	<Appenders>
+		<Routing name="Console">
+			<Routes>
+				<Script name="RoutingInit" language="JavaScript"><![CDATA[
+					java.lang.System.getProperty("lasta.env") == null ? "Output" : "NoOutput";]]>
+				</Script>
+				<Route key="Output">
+					<Console name="OutputConsole" target="SYSTEM_OUT">
+						<PatternLayout pattern="${log.pattern}" />
+					</Console>
+				</Route>
+				<Route key="NoOutput">
+					<CountingNoOp name="DevNull" />
+				</Route>
+			</Routes>
+		</Routing>
+		<RollingFile name="AppFile" fileName="${log.file.basedir}/${domain.name}.log"
+			filePattern="${log.file.basedir}/${domain.name}${backup.date.suffix}-%i.log.gz">
+			<PatternLayout>
+				<Pattern>${log.pattern}</Pattern>
+			</PatternLayout>
+			<Policies>
+				<TimeBasedTriggeringPolicy />
+				<SizeBasedTriggeringPolicy size="100 MB" />
+			</Policies>
+			<DefaultRolloverStrategy fileIndex="max" min="1"
+				max="${backup.max.history}" compressionLevel="9" />
+		</RollingFile>
+		<RollingFile name="StatsFile" fileName="${log.file.basedir}/fess-urls.log"
+			filePattern="${log.file.basedir}/fess-urls${backup.date.suffix}-%i.log.gz">
+			<PatternLayout>
+				<Pattern>${stats.log.pattern}</Pattern>
+			</PatternLayout>
+			<Policies>
+				<TimeBasedTriggeringPolicy />
+				<SizeBasedTriggeringPolicy size="100 MB" />
+			</Policies>
+			<DefaultRolloverStrategy fileIndex="max" min="1"
+				max="${backup.max.history}" compressionLevel="9" />
+		</RollingFile>
+	</Appenders>
+
+	<Loggers>
+		<Logger name="org.codelibs" additivity="false" level="${log.level}">
+			<AppenderRef ref="Console" />
+			<AppenderRef ref="AppFile" />
+		</Logger>
+		<Logger name="org.dbflute" additivity="false" level="${log.level}">
+			<AppenderRef ref="Console" />
+			<AppenderRef ref="AppFile" />
+		</Logger>
+		<Logger name="org.lastaflute" additivity="false" level="${log.level}">
+			<AppenderRef ref="Console" />
+			<AppenderRef ref="AppFile" />
+		</Logger>
+		<Logger name="fess.log.crawler.stats" additivity="false" level="info">
+			<AppenderRef ref="StatsFile" />
+		</Logger>
+		<Root level="${root.log.level}">
+			<AppenderRef ref="Console" />
+			<AppenderRef ref="AppFile" />
+		</Root>
+	</Loggers>
+</Configuration>

+ 0 - 1
src/main/webapp/WEB-INF/env/thumbnail/resources/log4j2.xml

@@ -9,7 +9,6 @@
 		<Property name="log.file.basedir" value="${sys:fess.log.path:-target/logs}" />
 		<Property name="backup.date.suffix" value="_%d{yyyyMMdd}" />
 		<Property name="backup.max.history" value="90" />
-		<Property name="audit.log.pattern" value="%msg%n" />
 	</Properties>
 
 	<Appenders>

+ 95 - 0
src/test/java/org/codelibs/fess/helper/CrawlerStatsHelperTest.java

@@ -0,0 +1,95 @@
+/*
+ * Copyright 2012-2022 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.helper;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.codelibs.fess.unit.UnitFessTestCase;
+
+public class CrawlerStatsHelperTest extends UnitFessTestCase {
+
+    private static final Logger logger = LogManager.getLogger(CrawlerStatsHelperTest.class);
+
+    private CrawlerStatsHelper crawlerStatsHelper;
+
+    private ThreadLocal<String> localLogMsg = new ThreadLocal<>();
+
+    @Override
+    public void setUp() throws Exception {
+        super.setUp();
+        crawlerStatsHelper = new CrawlerStatsHelper() {
+            @Override
+            protected void log(final StringBuilder buf) {
+                localLogMsg.set(buf.toString());
+            }
+        };
+        crawlerStatsHelper.init();
+    }
+
+    public void test_beginDone() {
+        String key = "test";
+        crawlerStatsHelper.begin(key);
+        crawlerStatsHelper.done(key);
+        logger.info(localLogMsg.get());
+        String[] values = localLogMsg.get().split("\t");
+        assertEquals(3, values.length);
+        assertEquals("url:test", values[0]);
+        assertTrue(values[1].startsWith("time:"));
+        assertTrue(values[2].startsWith("done:"));
+
+        localLogMsg.remove();
+        crawlerStatsHelper.done(key);
+        assertNull(localLogMsg.get());
+    }
+
+    public void test_beginDoneWithRecord1() {
+        String key = "test";
+        crawlerStatsHelper.begin(key);
+        crawlerStatsHelper.record(key, "aaa");
+        crawlerStatsHelper.done(key);
+        logger.info(localLogMsg.get());
+        String[] values = localLogMsg.get().split("\t");
+        assertEquals(4, values.length);
+        assertEquals("url:test", values[0]);
+        assertTrue(values[1].startsWith("time:"));
+        assertTrue(values[2].startsWith("done:"));
+        assertTrue(values[3].startsWith("aaa:"));
+
+        localLogMsg.remove();
+        crawlerStatsHelper.done(key);
+        assertNull(localLogMsg.get());
+    }
+
+    public void test_beginDoneWithRecord2() {
+        String key = "test";
+        crawlerStatsHelper.begin(key);
+        crawlerStatsHelper.record(key, "aaa");
+        crawlerStatsHelper.record(key, "bbb");
+        crawlerStatsHelper.done(key);
+        logger.info(localLogMsg.get());
+        String[] values = localLogMsg.get().split("\t");
+        assertEquals(5, values.length);
+        assertEquals("url:test", values[0]);
+        assertTrue(values[1].startsWith("time:"));
+        assertTrue(values[2].startsWith("done:"));
+        assertTrue(values[3].startsWith("aaa:"));
+        assertTrue(values[4].startsWith("bbb:"));
+
+        localLogMsg.remove();
+        crawlerStatsHelper.done(key);
+        assertNull(localLogMsg.get());
+    }
+}