diff --git a/src/main/java/org/codelibs/fess/ds/IndexUpdateCallback.java b/src/main/java/org/codelibs/fess/ds/IndexUpdateCallback.java index 6a152485d..a9947b2f6 100644 --- a/src/main/java/org/codelibs/fess/ds/IndexUpdateCallback.java +++ b/src/main/java/org/codelibs/fess/ds/IndexUpdateCallback.java @@ -17,16 +17,10 @@ package org.codelibs.fess.ds; import java.util.Map; -import org.codelibs.fess.es.client.FessEsClient; - public interface IndexUpdateCallback { boolean store(Map dataMap); - void setEsClient(FessEsClient fessEsClient); - - FessEsClient getsClient(); - long getDocumentSize(); long getExecuteTime(); diff --git a/src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java index 8709aad48..e2cfa9d86 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java @@ -121,7 +121,7 @@ public abstract class AbstractDataStoreImpl implements DataStore { } return value; } catch (final Exception e) { - logger.warn("Invalid value format: " + template, e); + logger.warn("Invalid value format: " + template + " => " + paramMap, e); return null; } } diff --git a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java index d3464d4c2..f169960ef 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java @@ -192,12 +192,16 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl { resultMap.putAll(paramMap); resultMap.put("csvfile", csvFile.getAbsolutePath()); resultMap.put("csvfilename", csvFile.getName()); + boolean foundValues = false; for (int i = 0; i < list.size(); i++) { String key = null; String value = list.get(i); if (value == null) { value = StringUtil.EMPTY; } + if (StringUtil.isNotBlank(value)) { + foundValues = true; + } if (headerList != null && headerList.size() > i) { key = headerList.get(i); if (StringUtil.isNotBlank(key)) { @@ -207,6 +211,10 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl { key = CELL_PREFIX + Integer.toString(i + 1); resultMap.put(key, value); } + if (!foundValues) { + logger.debug("No data in line: {}", resultMap); + continue; + } if (logger.isDebugEnabled()) { for (final Map.Entry entry : resultMap.entrySet()) { @@ -230,6 +238,8 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl { try { loop = callback.store(dataMap); } catch (final CrawlingAccessException e) { + logger.warn("Crawling Access Exception at : " + dataMap, e); + Throwable target = e; if (target instanceof MultipleCrawlingAccessException) { final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses(); @@ -251,12 +261,9 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl { url = ((DataStoreCrawlingException) target).getUrl(); } else { url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber(); - } final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class); failureUrlService.store(dataConfig, errorName, url, target); - - logger.warn("Crawling Access Exception at : " + dataMap, e); } catch (final Exception e) { final String url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber(); final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class); diff --git a/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java index b4eee8237..49438f57c 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java @@ -239,9 +239,10 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { deleteIdList.add(crawlingInfoHelper.generateId(dataMap)); if (deleteIdList.size() >= maxDeleteDocumentCacheSize) { + final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper(); for (final String id : deleteIdList) { - indexingHelper.deleteDocument(indexUpdateCallback.getsClient(), id); + indexingHelper.deleteDocument(fessEsClient, id); } if (logger.isDebugEnabled()) { logger.debug("Deleted " + deleteIdList); @@ -256,9 +257,10 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { @Override public void commit() { if (!deleteIdList.isEmpty()) { + final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper(); for (final String id : deleteIdList) { - indexingHelper.deleteDocument(indexUpdateCallback.getsClient(), id); + indexingHelper.deleteDocument(fessEsClient, id); } if (logger.isDebugEnabled()) { logger.debug("Deleted " + deleteIdList); @@ -267,11 +269,6 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { indexUpdateCallback.commit(); } - @Override - public void setEsClient(final FessEsClient fessEsClient) { - indexUpdateCallback.setEsClient(fessEsClient); - } - @Override public long getDocumentSize() { return indexUpdateCallback.getDocumentSize(); @@ -282,9 +279,5 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl { return indexUpdateCallback.getExecuteTime(); } - @Override - public FessEsClient getsClient() { - return indexUpdateCallback.getsClient(); - } } } diff --git a/src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java b/src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java index 4866af0ea..9d7dda4ee 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java @@ -35,14 +35,6 @@ import org.slf4j.LoggerFactory; public class IndexUpdateCallbackImpl implements IndexUpdateCallback { private static final Logger logger = LoggerFactory.getLogger(IndexUpdateCallbackImpl.class); - protected FessEsClient fessEsClient; - - public int maxDocumentCacheSize = 5; - - public boolean clickCountEnabled = true; - - public boolean favoriteCountEnabled = true; - protected volatile AtomicLong documentSize = new AtomicLong(0); protected volatile long executeTime = 0; @@ -56,6 +48,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback { public synchronized boolean store(final Map dataMap) { final long startTime = System.currentTimeMillis(); final FessConfig fessConfig = ComponentUtil.getFessConfig(); + final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); if (logger.isDebugEnabled()) { logger.debug("Adding " + dataMap); @@ -73,11 +66,11 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback { final String url = dataMap.get(fessConfig.getIndexFieldUrl()).toString(); - if (clickCountEnabled) { + if (fessConfig.getIndexerClickCountEnabledAsBoolean()) { addClickCountField(dataMap, url, fessConfig.getIndexFieldClickCount()); } - if (favoriteCountEnabled) { + if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) { addFavoriteCountField(dataMap, url, fessConfig.getIndexFieldFavoriteCount()); } @@ -91,14 +84,11 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback { logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + "."); } - if (docList.size() >= maxDocumentCacheSize) { + if (docList.size() >= fessConfig.getIndexerDataMaxDocumentCacheSizeAsInteger().intValue()) { indexingHelper.sendDocuments(fessEsClient, docList); } documentSize.getAndIncrement(); - if (!docList.isEmpty()) { - indexingHelper.sendDocuments(fessEsClient, docList); - } if (logger.isDebugEnabled()) { logger.debug("The number of an added document is " + documentSize.get() + "."); } @@ -111,6 +101,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback { public void commit() { if (!docList.isEmpty()) { final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper(); + final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient(); indexingHelper.sendDocuments(fessEsClient, docList); } } @@ -143,14 +134,4 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback { return executeTime; } - @Override - public FessEsClient getsClient() { - return fessEsClient; - } - - @Override - public void setEsClient(final FessEsClient fessEsClient) { - this.fessEsClient = fessEsClient; - } - } diff --git a/src/main/java/org/codelibs/fess/exec/Crawler.java b/src/main/java/org/codelibs/fess/exec/Crawler.java index c1ddaea95..a3d4f748f 100644 --- a/src/main/java/org/codelibs/fess/exec/Crawler.java +++ b/src/main/java/org/codelibs/fess/exec/Crawler.java @@ -30,7 +30,6 @@ import java.util.Map; import javax.annotation.Resource; import org.codelibs.core.CoreLibConstants; -import org.codelibs.core.beans.util.BeanUtil; import org.codelibs.core.lang.StringUtil; import org.codelibs.core.misc.DynamicProperties; import org.codelibs.fess.Constants; @@ -276,14 +275,11 @@ public class Crawler implements Serializable { final String toStrs = (String) crawlerProperties.get(Constants.NOTIFICATION_TO_PROPERTY); if (StringUtil.isNotBlank(toStrs)) { final String[] toAddresses = toStrs.split(","); - final Map dataMap = new HashMap(); + final Map dataMap = new HashMap<>(); for (final Map.Entry entry : infoMap.entrySet()) { dataMap.put(StringUtil.decapitalize(entry.getKey()), entry.getValue()); } - if (Constants.T.equals(infoMap.get(Constants.CRAWLER_STATUS))) { - dataMap.put("success", true); - } try { dataMap.put("hostname", InetAddress.getLocalHost().getHostAddress()); } catch (final UnknownHostException e) { @@ -298,11 +294,40 @@ public class Crawler implements Serializable { StreamUtil.of(toAddresses).forEach(address -> { postcard.addTo(address); }); - BeanUtil.copyMapToBean(dataMap, postcard); + postcard.setCommitEndTime(getValueOrEmpty(dataMap, "commitEndTime")); + postcard.setCommitExecTime(getValueOrEmpty(dataMap, "commitExecTime")); + postcard.setCommitStartTime(getValueOrEmpty(dataMap, "commitStartTime")); + postcard.setCrawlerEndTime(getValueOrEmpty(dataMap, "crawlerEndTime")); + postcard.setCrawlerExecTime(getValueOrEmpty(dataMap, "crawlerExecTime")); + postcard.setCrawlerStartTime(getValueOrEmpty(dataMap, "crawlerStartTime")); + postcard.setDataCrawlEndTime(getValueOrEmpty(dataMap, "dataCrawlEndTime")); + postcard.setDataCrawlExecTime(getValueOrEmpty(dataMap, "dataCrawlExecTime")); + postcard.setDataCrawlStartTime(getValueOrEmpty(dataMap, "dataCrawlStartTime")); + postcard.setDataFsIndexSize(getValueOrEmpty(dataMap, "dataFsIndexSize")); + postcard.setDataIndexExecTime(getValueOrEmpty(dataMap, "dataIndexExecTime")); + postcard.setHostname(getValueOrEmpty(dataMap, "hostname")); + postcard.setWebFsCrawlEndTime(getValueOrEmpty(dataMap, "webFsCrawlEndTime")); + postcard.setWebFsCrawlExecTime(getValueOrEmpty(dataMap, "webFsCrawlExecTime")); + postcard.setWebFsCrawlStartTime(getValueOrEmpty(dataMap, "webFsCrawlStartTime")); + postcard.setWebFsIndexExecTime(getValueOrEmpty(dataMap, "webFsIndexExecTime")); + postcard.setWebFsIndexSize(getValueOrEmpty(dataMap, "webFsIndexSize")); + if (Constants.T.equals(infoMap.get(Constants.CRAWLER_STATUS))) { + postcard.setStatus(Constants.OK); + } else { + postcard.setStatus(Constants.FAIL); + } }); } } + private String getValueOrEmpty(Map dataMap, String key) { + String value = dataMap.get(key); + if (value == null) { + return StringUtil.EMPTY; + } + return value; + } + public int doCrawl(final Options options) { if (logger.isInfoEnabled()) { logger.info("Starting Crawler.."); diff --git a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java index 41c28e23f..2778f7094 100644 --- a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java +++ b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java @@ -28,6 +28,7 @@ import org.codelibs.core.lang.StringUtil; import org.codelibs.core.misc.DynamicProperties; import org.codelibs.fess.Constants; import org.codelibs.fess.app.service.DataConfigService; +import org.codelibs.fess.app.service.FailureUrlService; import org.codelibs.fess.ds.DataStore; import org.codelibs.fess.ds.DataStoreFactory; import org.codelibs.fess.ds.IndexUpdateCallback; @@ -235,6 +236,8 @@ public class DataIndexHelper implements Serializable { dataStore.store(dataConfig, indexUpdateCallback, initParamMap); } catch (final Exception e) { logger.error("Failed to process a data crawling: " + dataConfig.getName(), e); + ComponentUtil.getComponent(FailureUrlService.class).store(dataConfig, e.getClass().getCanonicalName(), + dataConfig.getConfigId() + ":" + dataConfig.getName(), e); } finally { indexUpdateCallback.commit(); deleteOldDocs(); diff --git a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java index 8c0ee77a0..306b858cd 100644 --- a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java +++ b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java @@ -83,14 +83,8 @@ public class IndexUpdater extends Thread { @Resource protected IndexingHelper indexingHelper; - public int maxDocumentCacheSize = 5; - - public int maxInvalidDocumentSize = 100; - protected boolean finishCrawling = false; - public long updateInterval = 60000; // 1 min - protected long executeTime; protected long documentSize; @@ -103,16 +97,6 @@ public class IndexUpdater extends Thread { protected List finishedSessionIdList = new ArrayList<>(); - public long commitMarginTime = 10000; // 10ms - - public int maxEmptyListCount = 60; // 1hour - - public boolean threadDump = false; - - public boolean clickCountEnabled = true; - - public boolean favoriteCountEnabled = true; - private final List docBoostMatcherList = new ArrayList<>(); private final Map docValueMap = new HashMap<>(); @@ -160,6 +144,9 @@ public class IndexUpdater extends Thread { executeTime = 0; documentSize = 0; + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue(); + final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListConuntAsInteger().intValue(); final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper(); try { final Consumer cb = @@ -172,10 +159,8 @@ public class IndexUpdater extends Thread { org.codelibs.fess.crawler.Constants.OK_STATUS)); builder.setQuery(queryBuilder); builder.setFrom(0); - if (maxDocumentCacheSize <= 0) { - maxDocumentCacheSize = 1; - } - builder.setSize(maxDocumentCacheSize); + final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue(); + builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize); builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC); }; @@ -269,7 +254,7 @@ public class IndexUpdater extends Thread { // terminate crawling finishCrawling = true; forceStop(); - if (threadDump) { + if (fessConfig.getIndexerThreadDumpEnabledAsBoolean()) { printThreadDump(); } @@ -304,6 +289,8 @@ public class IndexUpdater extends Thread { private void processAccessResults(final List> docList, final List accessResultList, final List arList) { + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue(); for (final EsAccessResult accessResult : arList) { if (logger.isDebugEnabled()) { logger.debug("Indexing " + accessResult.getUrl()); @@ -319,9 +306,9 @@ public class IndexUpdater extends Thread { continue; } - final AccessResultData accessResultData = accessResult.getAccessResultData(); + final AccessResultData accessResultData = accessResult.getAccessResultData(); if (accessResultData != null) { - accessResult.setAccessResultData((AccessResultData) null); + accessResult.setAccessResultData(null); try { final Transformer transformer = SingletonLaContainer.getComponent(accessResultData.getTransformerName()); if (transformer == null) { @@ -373,11 +360,13 @@ public class IndexUpdater extends Thread { } protected void updateDocument(final Map map) { - if (clickCountEnabled) { + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + + if (fessConfig.getIndexerClickCountEnabledAsBoolean()) { addClickCountField(map); } - if (favoriteCountEnabled) { + if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) { addFavoriteCountField(map); } @@ -402,7 +391,6 @@ public class IndexUpdater extends Thread { addBoostValue(map, documentBoost); } - final FessConfig fessConfig = ComponentUtil.getFessConfig(); if (!map.containsKey(fessConfig.getIndexFieldDocId())) { map.put(fessConfig.getIndexFieldDocId(), systemHelper.generateDocId(map)); } @@ -460,7 +448,9 @@ public class IndexUpdater extends Thread { final long execTime = System.currentTimeMillis(); final List arList = ((EsDataService) dataService).getAccessResultList(cb); if (!arList.isEmpty()) { - for (final AccessResult ar : arList.toArray(new AccessResult[arList.size()])) { + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue(); + for (final AccessResult ar : arList.toArray(new AccessResult[arList.size()])) { if (ar.getCreateTime().longValue() > execTime - commitMarginTime) { arList.remove(ar); } diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index 97ee77f42..5ae47d362 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -135,6 +135,30 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. text/html */ String CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES = "crawler.document.cache.html.mimetypes"; + /** The key of the configuration. e.g. true */ + String INDEXER_THREAD_DUMP_ENABLED = "indexer.thread.dump.enabled"; + + /** The key of the configuration. e.g. true */ + String INDEXER_CLICK_COUNT_ENABLED = "indexer.click.count.enabled"; + + /** The key of the configuration. e.g. true */ + String INDEXER_FAVORITE_COUNT_ENABLED = "indexer.favorite.count.enabled"; + + /** The key of the configuration. e.g. 10000 */ + String INDEXER_WEBFS_COMMIT_MARGIN_TIME = "indexer.webfs.commit.margin.time"; + + /** The key of the configuration. e.g. 60 */ + String INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT = "indexer.webfs.max.empty.list.conunt"; + + /** The key of the configuration. e.g. 60000 */ + String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval"; + + /** The key of the configuration. e.g. 5 */ + String INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE = "indexer.webfs.max.document.cache.size"; + + /** The key of the configuration. e.g. 5 */ + String INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE = "indexer.data.max.document.cache.size"; + /** The key of the configuration. e.g. favorite_count */ String INDEX_FIELD_favorite_count = "index.field.favorite_count"; @@ -810,6 +834,125 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction */ String getCrawlerDocumentCacheHtmlMimetypes(); + /** + * Get the value for the key 'indexer.thread.dump.enabled'.
+ * The value is, e.g. true
+ * comment: indexer + * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerThreadDumpEnabled(); + + /** + * Is the property for the key 'indexer.thread.dump.enabled' true?
+ * The value is, e.g. true
+ * comment: indexer + * @return The determination, true or false. (if not found, exception but basically no way) + */ + boolean isIndexerThreadDumpEnabled(); + + /** + * Get the value for the key 'indexer.click.count.enabled'.
+ * The value is, e.g. true
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerClickCountEnabled(); + + /** + * Is the property for the key 'indexer.click.count.enabled' true?
+ * The value is, e.g. true
+ * @return The determination, true or false. (if not found, exception but basically no way) + */ + boolean isIndexerClickCountEnabled(); + + /** + * Get the value for the key 'indexer.favorite.count.enabled'.
+ * The value is, e.g. true
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerFavoriteCountEnabled(); + + /** + * Is the property for the key 'indexer.favorite.count.enabled' true?
+ * The value is, e.g. true
+ * @return The determination, true or false. (if not found, exception but basically no way) + */ + boolean isIndexerFavoriteCountEnabled(); + + /** + * Get the value for the key 'indexer.webfs.commit.margin.time'.
+ * The value is, e.g. 10000
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerWebfsCommitMarginTime(); + + /** + * Get the value for the key 'indexer.webfs.commit.margin.time' as {@link Integer}.
+ * The value is, e.g. 10000
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerWebfsCommitMarginTimeAsInteger(); + + /** + * Get the value for the key 'indexer.webfs.max.empty.list.conunt'.
+ * The value is, e.g. 60
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerWebfsMaxEmptyListConunt(); + + /** + * Get the value for the key 'indexer.webfs.max.empty.list.conunt' as {@link Integer}.
+ * The value is, e.g. 60
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerWebfsMaxEmptyListConuntAsInteger(); + + /** + * Get the value for the key 'indexer.webfs.update.interval'.
+ * The value is, e.g. 60000
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerWebfsUpdateInterval(); + + /** + * Get the value for the key 'indexer.webfs.update.interval' as {@link Integer}.
+ * The value is, e.g. 60000
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerWebfsUpdateIntervalAsInteger(); + + /** + * Get the value for the key 'indexer.webfs.max.document.cache.size'.
+ * The value is, e.g. 5
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerWebfsMaxDocumentCacheSize(); + + /** + * Get the value for the key 'indexer.webfs.max.document.cache.size' as {@link Integer}.
+ * The value is, e.g. 5
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger(); + + /** + * Get the value for the key 'indexer.data.max.document.cache.size'.
+ * The value is, e.g. 5
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerDataMaxDocumentCacheSize(); + + /** + * Get the value for the key 'indexer.data.max.document.cache.size' as {@link Integer}.
+ * The value is, e.g. 5
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerDataMaxDocumentCacheSizeAsInteger(); + /** * Get the value for the key 'index.field.favorite_count'.
* The value is, e.g. favorite_count
@@ -1998,6 +2141,70 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return get(FessConfig.CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES); } + public String getIndexerThreadDumpEnabled() { + return get(FessConfig.INDEXER_THREAD_DUMP_ENABLED); + } + + public boolean isIndexerThreadDumpEnabled() { + return is(FessConfig.INDEXER_THREAD_DUMP_ENABLED); + } + + public String getIndexerClickCountEnabled() { + return get(FessConfig.INDEXER_CLICK_COUNT_ENABLED); + } + + public boolean isIndexerClickCountEnabled() { + return is(FessConfig.INDEXER_CLICK_COUNT_ENABLED); + } + + public String getIndexerFavoriteCountEnabled() { + return get(FessConfig.INDEXER_FAVORITE_COUNT_ENABLED); + } + + public boolean isIndexerFavoriteCountEnabled() { + return is(FessConfig.INDEXER_FAVORITE_COUNT_ENABLED); + } + + public String getIndexerWebfsCommitMarginTime() { + return get(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME); + } + + public Integer getIndexerWebfsCommitMarginTimeAsInteger() { + return getAsInteger(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME); + } + + public String getIndexerWebfsMaxEmptyListConunt() { + return get(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT); + } + + public Integer getIndexerWebfsMaxEmptyListConuntAsInteger() { + return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT); + } + + public String getIndexerWebfsUpdateInterval() { + return get(FessConfig.INDEXER_WEBFS_UPDATE_INTERVAL); + } + + public Integer getIndexerWebfsUpdateIntervalAsInteger() { + return getAsInteger(FessConfig.INDEXER_WEBFS_UPDATE_INTERVAL); + } + + public String getIndexerWebfsMaxDocumentCacheSize() { + return get(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE); + } + + public Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger() { + return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE); + } + + public String getIndexerDataMaxDocumentCacheSize() { + return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE); + } + + public Integer getIndexerDataMaxDocumentCacheSizeAsInteger() { + return getAsInteger(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE); + } + public String getIndexFieldFavoriteCount() { return get(FessConfig.INDEX_FIELD_favorite_count); } diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java index 536790da7..8a4334ae4 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java @@ -106,4 +106,22 @@ public interface FessProp { return StreamUtil.of(mimetypes).anyMatch(s -> s.equalsIgnoreCase(mimetype)); } + String getIndexerClickCountEnabled(); + + public default boolean getIndexerClickCountEnabledAsBoolean() { + return Constants.TRUE.equalsIgnoreCase(getIndexerClickCountEnabled()); + } + + String getIndexerFavoriteCountEnabled(); + + public default boolean getIndexerFavoriteCountEnabledAsBoolean() { + return Constants.TRUE.equalsIgnoreCase(getIndexerFavoriteCountEnabled()); + } + + String getIndexerThreadDumpEnabled(); + + public default boolean getIndexerThreadDumpEnabledAsBoolean() { + return Constants.TRUE.equalsIgnoreCase(getIndexerThreadDumpEnabled()); + } + } diff --git a/src/main/java/org/codelibs/fess/mylasta/mail/CrawlerPostcard.java b/src/main/java/org/codelibs/fess/mylasta/mail/CrawlerPostcard.java index ed46c9b1e..afb9951af 100644 --- a/src/main/java/org/codelibs/fess/mylasta/mail/CrawlerPostcard.java +++ b/src/main/java/org/codelibs/fess/mylasta/mail/CrawlerPostcard.java @@ -52,7 +52,7 @@ public class CrawlerPostcard extends LaTypicalPostcard { protected String[] getPropertyNames() { return new String[] { "hostname", "webFsCrawlStartTime", "webFsCrawlEndTime", "webFsCrawlExecTime", "webFsIndexExecTime", "webFsIndexSize", "dataCrawlStartTime", "dataCrawlEndTime", "dataCrawlExecTime", "dataIndexExecTime", "dataFsIndexSize", - "commitStartTime", "commitEndTime", "commitExecTime", "crawlerStartTime", "crawlerEndTime", "crawlerExecTime" }; + "commitStartTime", "commitEndTime", "commitExecTime", "crawlerStartTime", "crawlerEndTime", "crawlerExecTime", "status" }; } // =================================================================================== @@ -252,4 +252,13 @@ public class CrawlerPostcard extends LaTypicalPostcard { public void setCrawlerExecTime(String crawlerExecTime) { registerVariable("crawlerExecTime", crawlerExecTime); } + + /** + * Set the value of status, used in parameter comment.
+ * Even if empty string, treated as empty plainly. So "IF pmb != null" is false if empty. + * @param status The parameter value of status. (NotNull) + */ + public void setStatus(String status) { + registerVariable("status", status); + } } diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index 6d68f3c58..c640dab84 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -82,6 +82,16 @@ crawler.document.cache.supported.mimetypes=text/html #,text/plain,application/xml,application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-powerpoint,application/vnd.openxmlformats-officedocument.presentationml.presentation crawler.document.cache.html.mimetypes=text/html +# indexer +indexer.thread.dump.enabled=true +indexer.click.count.enabled=true +indexer.favorite.count.enabled=true +indexer.webfs.commit.margin.time=10000 +indexer.webfs.max.empty.list.conunt=60 +indexer.webfs.update.interval=60000 +indexer.webfs.max.document.cache.size=5 +indexer.data.max.document.cache.size=5 + # field names index.field.favorite_count=favorite_count index.field.click_count=click_count diff --git a/src/main/resources/fess_ds.xml b/src/main/resources/fess_ds.xml index 5bbb7a444..7edc1c10c 100644 --- a/src/main/resources/fess_ds.xml +++ b/src/main/resources/fess_ds.xml @@ -28,9 +28,6 @@ - diff --git a/src/main/resources/mail/crawler.dfmail b/src/main/resources/mail/crawler.dfmail index dd7227467..c8675e8aa 100644 --- a/src/main/resources/mail/crawler.dfmail +++ b/src/main/resources/mail/crawler.dfmail @@ -7,32 +7,32 @@ subject: [FESS] Crawler completed: /*pmb.hostname*/ --- Server Info --- Host Name: /*IF pmb.hostname != null*//*pmb.hostname*//*END*//*IF pmb.hostname == null*/Unknown/*END*/ /*IF pmb.webFsIndexSize != null*/ ---- Web/FileSystem Crawler --- -Start Time: /*pmb.webFsCrawlStartTime*/ -End Time: /*pmb.webFsCrawlEndTime*/ -Exec Time: /*pmb.webFsCrawlExecTime*/ms +--- Web/FileSystem Crawler ---/*IF pmb.webFsCrawlStartTime != null*/ +Start Time: /*pmb.webFsCrawlStartTime*//*END*//*IF pmb.webFsCrawlEndTime != null*/ +End Time: /*pmb.webFsCrawlEndTime*//*END*//*IF pmb.webFsCrawlExecTime != null*/ +Exec Time: /*pmb.webFsCrawlExecTime*/ms/*END*/ ---- Web/FileSystem Indexer --- -Exec Time: /*pmb.webFsIndexExecTime*/ -Num of Doc: /*pmb.webFsIndexSize*/ docs +--- Web/FileSystem Indexer ---/*IF pmb.webFsIndexExecTime != null*/ +Exec Time: /*pmb.webFsIndexExecTime*//*END*//*IF pmb.webFsIndexSize != null*/ +Num of Doc: /*pmb.webFsIndexSize*/ docs/*END*/ /*END*//*IF pmb.dataFsIndexSize != null*/ ---- Data Store Crawler --- -Start Time: /*pmb.dataCrawlStartTime*/ -End Time: /*pmb.dataCrawlEndTime*/ -Exec Time: /*pmb.dataCrawlExecTime*/ms +--- Data Store Crawler ---/*IF pmb.dataCrawlStartTime != null*/ +Start Time: /*pmb.dataCrawlStartTime*//*END*//*IF pmb.dataCrawlEndTime != null*/ +End Time: /*pmb.dataCrawlEndTime*//*END*//*IF pmb.dataCrawlExecTime != null*/ +Exec Time: /*pmb.dataCrawlExecTime*/ms/*END*/ ---- Data Store Indexer --- -Exec Time: /*pmb.dataIndexExecTime*/ -Num of Doc: /*pmb.dataFsIndexSize*/ docs +--- Data Store Indexer ---/*IF pmb.dataIndexExecTime != null*/ +Exec Time: /*pmb.dataIndexExecTime*//*END*//*IF pmb.dataFsIndexSize != null*/ +Num of Doc: /*pmb.dataFsIndexSize*/ docs/*END*/ /*END*//*IF pmb.commitExecTime != null*/ ---- Indexer(Commit) --- -Start Time: /*pmb.commitStartTime*/ -End Time: /*pmb.commitEndTime*/ -Exec Time: /*pmb.commitExecTime*/ms +--- Indexer(Commit) ---/*IF pmb.commitStartTime != null*/ +Start Time: /*pmb.commitStartTime*//*END*//*IF pmb.commitEndTime != null*/ +End Time: /*pmb.commitEndTime*//*END*//*IF pmb.commitExecTime != null*/ +Exec Time: /*pmb.commitExecTime*/ms/*END*/ /*END*/ ---- Total --- -Start Time: /*pmb.crawlerStartTime*/ -End Time: /*pmb.crawlerEndTime*/ -Exec Time: /*pmb.crawlerExecTime*/ms -Status: /*IF pmb.success != null*/Success/*END*//*IF pmb.success == null*/Fail/*END*/ +--- Total ---/*IF pmb.crawlerStartTime != null*/ +Start Time: /*pmb.crawlerStartTime*//*END*//*IF pmb.crawlerEndTime != null*/ +End Time: /*pmb.crawlerEndTime*//*END*//*IF pmb.crawlerExecTime != null*/ +Exec Time: /*pmb.crawlerExecTime*/ms/*END*/ +Status: /*pmb.status*/