fix #2666 add HotThreadMonitorTarget

This commit is contained in:
Shinsuke Sugaya 2022-07-09 15:22:17 +09:00
parent 1c25ecf20c
commit b776e427b5
7 changed files with 286 additions and 27 deletions

View file

@ -59,6 +59,7 @@ import org.codelibs.fess.helper.PathMappingHelper;
import org.codelibs.fess.helper.WebFsIndexHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.mylasta.mail.CrawlerPostcard;
import org.codelibs.fess.timer.HotThreadMonitorTarget;
import org.codelibs.fess.timer.SystemMonitorTarget;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.ThreadDumpUtil;
@ -130,6 +131,9 @@ public class Crawler {
@Option(name = "-e", aliases = "--expires", metaVar = "expires", usage = "Expires for documents")
public String expires;
@Option(name = "-h", aliases = "--hotThread", metaVar = "hotThread", usage = "Interval for Hot Thread logging")
public Integer hotThread;
protected Options() {
// nothing
}
@ -168,7 +172,7 @@ public class Crawler {
public String toString() {
return "Options [sessionId=" + sessionId + ", name=" + name + ", webConfigIds=" + webConfigIds + ", fileConfigIds="
+ fileConfigIds + ", dataConfigIds=" + dataConfigIds + ", propertiesPath=" + propertiesPath + ", expires=" + expires
+ "]";
+ ", hotThread=" + hotThread + "]";
}
}
@ -212,6 +216,7 @@ public class Crawler {
}
TimeoutTask systemMonitorTask = null;
TimeoutTask hotThreadMonitorTask = null;
Thread commandThread = null;
int exitCode;
try {
@ -263,6 +268,10 @@ public class Crawler {
systemMonitorTask = TimeoutManager.getInstance().addTimeoutTarget(new SystemMonitorTarget(),
ComponentUtil.getFessConfig().getCrawlerSystemMonitorIntervalAsInteger(), true);
if (options.hotThread != null) {
hotThreadMonitorTask = TimeoutManager.getInstance().addTimeoutTarget(new HotThreadMonitorTarget(), options.hotThread, true);
}
exitCode = process(options);
} catch (final ContainerNotAvailableException e) {
if (logger.isDebugEnabled()) {
@ -281,6 +290,9 @@ public class Crawler {
if (systemMonitorTask != null) {
systemMonitorTask.cancel();
}
if (hotThreadMonitorTask != null) {
hotThreadMonitorTask.cancel();
}
destroyContainer();
}

View file

@ -58,6 +58,8 @@ public class CrawlJob extends ExecJob {
protected int documentExpires = -2;
protected int hotThreadInterval = -1;
public CrawlJob namespace(final String namespace) {
this.namespace = namespace;
return this;
@ -83,6 +85,11 @@ public class CrawlJob extends ExecJob {
return this;
}
public CrawlJob hotThread(final int hotThreadInterval) {
this.hotThreadInterval = hotThreadInterval;
return this;
}
@Override
public String execute() {
// check # of crawler processes
@ -322,6 +329,10 @@ public class CrawlJob extends ExecJob {
cmdList.add("-e");
cmdList.add(Integer.toString(documentExpires));
}
if (hotThreadInterval > -1) {
cmdList.add("-h");
cmdList.add(Integer.toString(hotThreadInterval));
}
final File propFile = ComponentUtil.getSystemHelper().createTempFile(getExecuteType() + "_", ".properties");
try {

View file

@ -358,6 +358,24 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. 60 */
String CRAWLER_SYSTEM_MONITOR_INTERVAL = "crawler.system.monitor.interval";
/** The key of the configuration. e.g. true */
String CRAWLER_HOTTHREAD_ignore_idle_threads = "crawler.hotthread.ignore_idle_threads";
/** The key of the configuration. e.g. 500ms */
String CRAWLER_HOTTHREAD_INTERVAL = "crawler.hotthread.interval";
/** The key of the configuration. e.g. 10 */
String CRAWLER_HOTTHREAD_SNAPSHOTS = "crawler.hotthread.snapshots";
/** The key of the configuration. e.g. 3 */
String CRAWLER_HOTTHREAD_THREADS = "crawler.hotthread.threads";
/** The key of the configuration. e.g. 30s */
String CRAWLER_HOTTHREAD_TIMEOUT = "crawler.hotthread.timeout";
/** The key of the configuration. e.g. cpu */
String CRAWLER_HOTTHREAD_TYPE = "crawler.hotthread.type";
/** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.* */
String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";
@ -2713,6 +2731,71 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
Integer getCrawlerSystemMonitorIntervalAsInteger();
/**
* Get the value for the key 'crawler.hotthread.ignore_idle_threads'. <br>
* The value is, e.g. true <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerHotthreadIgnoreIdleThreads();
/**
* Is the property for the key 'crawler.hotthread.ignore_idle_threads' true? <br>
* The value is, e.g. true <br>
* @return The determination, true or false. (if not found, exception but basically no way)
*/
boolean isCrawlerHotthreadIgnoreIdleThreads();
/**
* Get the value for the key 'crawler.hotthread.interval'. <br>
* The value is, e.g. 500ms <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerHotthreadInterval();
/**
* Get the value for the key 'crawler.hotthread.snapshots'. <br>
* The value is, e.g. 10 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerHotthreadSnapshots();
/**
* Get the value for the key 'crawler.hotthread.snapshots' as {@link Integer}. <br>
* The value is, e.g. 10 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getCrawlerHotthreadSnapshotsAsInteger();
/**
* Get the value for the key 'crawler.hotthread.threads'. <br>
* The value is, e.g. 3 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerHotthreadThreads();
/**
* Get the value for the key 'crawler.hotthread.threads' as {@link Integer}. <br>
* The value is, e.g. 3 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getCrawlerHotthreadThreadsAsInteger();
/**
* Get the value for the key 'crawler.hotthread.timeout'. <br>
* The value is, e.g. 30s <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerHotthreadTimeout();
/**
* Get the value for the key 'crawler.hotthread.type'. <br>
* The value is, e.g. cpu <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerHotthreadType();
/**
* Get the value for the key 'crawler.metadata.content.excludes'. <br>
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.* <br>
@ -7766,6 +7849,42 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return getAsInteger(FessConfig.CRAWLER_SYSTEM_MONITOR_INTERVAL);
}
public String getCrawlerHotthreadIgnoreIdleThreads() {
return get(FessConfig.CRAWLER_HOTTHREAD_ignore_idle_threads);
}
public boolean isCrawlerHotthreadIgnoreIdleThreads() {
return is(FessConfig.CRAWLER_HOTTHREAD_ignore_idle_threads);
}
public String getCrawlerHotthreadInterval() {
return get(FessConfig.CRAWLER_HOTTHREAD_INTERVAL);
}
public String getCrawlerHotthreadSnapshots() {
return get(FessConfig.CRAWLER_HOTTHREAD_SNAPSHOTS);
}
public Integer getCrawlerHotthreadSnapshotsAsInteger() {
return getAsInteger(FessConfig.CRAWLER_HOTTHREAD_SNAPSHOTS);
}
public String getCrawlerHotthreadThreads() {
return get(FessConfig.CRAWLER_HOTTHREAD_THREADS);
}
public Integer getCrawlerHotthreadThreadsAsInteger() {
return getAsInteger(FessConfig.CRAWLER_HOTTHREAD_THREADS);
}
public String getCrawlerHotthreadTimeout() {
return get(FessConfig.CRAWLER_HOTTHREAD_TIMEOUT);
}
public String getCrawlerHotthreadType() {
return get(FessConfig.CRAWLER_HOTTHREAD_TYPE);
}
public String getCrawlerMetadataContentExcludes() {
return get(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES);
}
@ -10292,6 +10411,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.CRAWLER_IGNORE_CONTENT_EXCEPTION, "true");
defaultMap.put(FessConfig.CRAWLER_FAILURE_URL_STATUS_CODES, "404");
defaultMap.put(FessConfig.CRAWLER_SYSTEM_MONITOR_INTERVAL, "60");
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_ignore_idle_threads, "true");
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_INTERVAL, "500ms");
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_SNAPSHOTS, "10");
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_THREADS, "3");
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TIMEOUT, "30s");
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TYPE, "cpu");
defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES,
"resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*");
defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, "title=title:string\nTitle=title:string\n");

View file

@ -0,0 +1,70 @@
/*
* Copyright 2012-2022 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.timer;
import java.util.stream.Collectors;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.fess.Constants;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.opensearch.action.admin.cluster.node.hotthreads.NodesHotThreadsResponse;
import org.opensearch.common.unit.TimeValue;
public class HotThreadMonitorTarget extends MonitorTarget {
private static final Logger logger = LogManager.getLogger(HotThreadMonitorTarget.class);
@Override
public void expired() {
final StringBuilder buf = new StringBuilder(1000);
buf.append("[HOTTHREAD MONITOR] ");
final FessConfig fessConfig = ComponentUtil.getFessConfig();
buf.append('{');
final boolean ignoreIdleThreads = Constants.TRUE.equalsIgnoreCase(fessConfig.getCrawlerHotthreadIgnoreIdleThreads());
final TimeValue interval = TimeValue.parseTimeValue(fessConfig.getCrawlerHotthreadInterval(), "crawler.hotthread.interval");
final int threads = fessConfig.getCrawlerHotthreadThreadsAsInteger();
final String timeout = fessConfig.getCrawlerHotthreadTimeout();
final String type = fessConfig.getCrawlerHotthreadType();
try {
final SearchEngineClient esClient = ComponentUtil.getSearchEngineClient();
final NodesHotThreadsResponse response =
esClient.admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(ignoreIdleThreads).setInterval(interval)
.setThreads(threads).setTimeout(timeout).setType(type).execute().actionGet(timeout);
append(buf, "cluster_name", () -> response.getClusterName().value()).append(',');
final String hotThreads = response.getNodesMap().entrySet().stream().map(e -> {
StringBuilder tempBuf = new StringBuilder();
append(tempBuf, StringEscapeUtils.escapeJson(e.getKey()), () -> StringEscapeUtils.escapeJson(e.getValue().getHotThreads()));
return tempBuf.toString();
}).collect(Collectors.joining(","));
buf.append(hotThreads).append(',');
} catch (final Exception e) {
appendException(buf, e).append(',');
}
appendTimestamp(buf);
buf.append('}');
logger.info(buf.toString());
}
}

View file

@ -0,0 +1,58 @@
package org.codelibs.fess.timer;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.function.Supplier;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.text.StringEscapeUtils;
import org.codelibs.core.timer.TimeoutTarget;
import org.codelibs.fess.Constants;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.taglib.FessFunctions;
import org.codelibs.fess.util.ComponentUtil;
public abstract class MonitorTarget implements TimeoutTarget {
protected StringBuilder append(final StringBuilder buf, final String key, final Supplier<Object> supplier) {
final StringBuilder tempBuf = new StringBuilder();
tempBuf.append('"').append(key).append("\":");
try {
final Object value = supplier.get();
if (value == null) {
tempBuf.append("null");
} else if ((value instanceof Integer) || (value instanceof Long)) {
tempBuf.append((value));
} else if (value instanceof Short) {
tempBuf.append(((Short) value).shortValue());
} else if (value instanceof double[]) {
tempBuf.append(Arrays.toString((double[]) value));
} else {
tempBuf.append('"').append(StringEscapeUtils.escapeJson(value.toString())).append('"');
}
} catch (final Exception e) {
tempBuf.append("null");
}
buf.append(tempBuf.toString());
return buf;
}
protected StringBuilder appendTimestamp(final StringBuilder buf) {
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
append(buf, "timestamp", () -> FessFunctions.formatDate(systemHelper.getCurrentTime()));
return buf;
}
protected StringBuilder appendException(final StringBuilder buf, final Exception exception) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintWriter writer = new PrintWriter(baos, false, Constants.CHARSET_UTF_8)) {
exception.printStackTrace(writer);
writer.flush();
append(buf, "exception", () -> StringEscapeUtils.escapeJson(new String(baos.toByteArray(), Constants.CHARSET_UTF_8)));
} catch (IOException e) {
append(buf, "exception", () -> StringEscapeUtils.escapeJson(e.getMessage()));
}
return buf;
}
}

View file

@ -19,13 +19,11 @@ import java.io.ByteArrayOutputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.List;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.timer.TimeoutTarget;
import org.codelibs.fess.Constants;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.util.ComponentUtil;
@ -43,30 +41,9 @@ import org.opensearch.monitor.os.OsProbe;
import org.opensearch.monitor.os.OsStats;
import org.opensearch.monitor.process.ProcessProbe;
public class SystemMonitorTarget implements TimeoutTarget {
public class SystemMonitorTarget extends MonitorTarget {
private static final Logger logger = LogManager.getLogger(SystemMonitorTarget.class);
protected StringBuilder append(final StringBuilder buf, final String key, final Supplier<Object> supplier) {
buf.append('"').append(key).append("\":");
try {
final Object value = supplier.get();
if (value == null) {
buf.append("null");
} else if ((value instanceof Integer) || (value instanceof Long)) {
buf.append((value));
} else if (value instanceof Short) {
buf.append(((Short) value).shortValue());
} else if (value instanceof double[]) {
buf.append(Arrays.toString((double[]) value));
} else {
buf.append('"').append(StringEscapeUtils.escapeJson(value.toString())).append('"');
}
} catch (final Exception e) {
buf.append("null");
}
return buf;
}
@Override
public void expired() {
final StringBuilder buf = new StringBuilder(1000);
@ -79,7 +56,7 @@ public class SystemMonitorTarget implements TimeoutTarget {
appendJvmStats(buf);
appendFesenStats(buf);
append(buf, "timestamp", System::currentTimeMillis);
appendTimestamp(buf);
buf.append('}');
logger.info(buf.toString());
@ -189,7 +166,7 @@ public class SystemMonitorTarget implements TimeoutTarget {
stats = ((ByteArrayOutputStream) out).toString(Constants.UTF_8);
}
} catch (final Exception e) {
logger.debug("Failed to access Fesen stats.", e);
appendException(buf, e).append(',');
}
buf.append("\"elasticsearch\":").append(stats).append(',');
}

View file

@ -216,6 +216,12 @@ crawler.ignore.robots.tags=false
crawler.ignore.content.exception=true
crawler.failure.url.status.codes=404
crawler.system.monitor.interval=60
crawler.hotthread.ignore_idle_threads=true
crawler.hotthread.interval=500ms
crawler.hotthread.snapshots=10
crawler.hotthread.threads=3
crawler.hotthread.timeout=30s
crawler.hotthread.type=cpu
crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*
crawler.metadata.name.mapping=\
title=title:string\n\