Browse Source

modify data store crawling

Shinsuke Sugaya 9 years ago
parent
commit
5adaf030a3

+ 0 - 6
src/main/java/org/codelibs/fess/ds/IndexUpdateCallback.java

@@ -17,16 +17,10 @@ package org.codelibs.fess.ds;
 
 import java.util.Map;
 
-import org.codelibs.fess.es.client.FessEsClient;
-
 public interface IndexUpdateCallback {
 
     boolean store(Map<String, Object> dataMap);
 
-    void setEsClient(FessEsClient fessEsClient);
-
-    FessEsClient getsClient();
-
     long getDocumentSize();
 
     long getExecuteTime();

+ 1 - 1
src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java

@@ -121,7 +121,7 @@ public abstract class AbstractDataStoreImpl implements DataStore {
             }
             return value;
         } catch (final Exception e) {
-            logger.warn("Invalid value format: " + template, e);
+            logger.warn("Invalid value format: " + template + " => " + paramMap, e);
             return null;
         }
     }

+ 10 - 3
src/main/java/org/codelibs/fess/ds/impl/CsvDataStoreImpl.java

@@ -192,12 +192,16 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
                 resultMap.putAll(paramMap);
                 resultMap.put("csvfile", csvFile.getAbsolutePath());
                 resultMap.put("csvfilename", csvFile.getName());
+                boolean foundValues = false;
                 for (int i = 0; i < list.size(); i++) {
                     String key = null;
                     String value = list.get(i);
                     if (value == null) {
                         value = StringUtil.EMPTY;
                     }
+                    if (StringUtil.isNotBlank(value)) {
+                        foundValues = true;
+                    }
                     if (headerList != null && headerList.size() > i) {
                         key = headerList.get(i);
                         if (StringUtil.isNotBlank(key)) {
@@ -207,6 +211,10 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
                     key = CELL_PREFIX + Integer.toString(i + 1);
                     resultMap.put(key, value);
                 }
+                if (!foundValues) {
+                    logger.debug("No data in line: {}", resultMap);
+                    continue;
+                }
 
                 if (logger.isDebugEnabled()) {
                     for (final Map.Entry<String, String> entry : resultMap.entrySet()) {
@@ -230,6 +238,8 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
                 try {
                     loop = callback.store(dataMap);
                 } catch (final CrawlingAccessException e) {
+                    logger.warn("Crawling Access Exception at : " + dataMap, e);
+
                     Throwable target = e;
                     if (target instanceof MultipleCrawlingAccessException) {
                         final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses();
@@ -251,12 +261,9 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
                         url = ((DataStoreCrawlingException) target).getUrl();
                     } else {
                         url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
-
                     }
                     final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class);
                     failureUrlService.store(dataConfig, errorName, url, target);
-
-                    logger.warn("Crawling Access Exception at : " + dataMap, e);
                 } catch (final Exception e) {
                     final String url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
                     final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class);

+ 4 - 11
src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java

@@ -239,9 +239,10 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
                 deleteIdList.add(crawlingInfoHelper.generateId(dataMap));
 
                 if (deleteIdList.size() >= maxDeleteDocumentCacheSize) {
+                    final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
                     final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
                     for (final String id : deleteIdList) {
-                        indexingHelper.deleteDocument(indexUpdateCallback.getsClient(), id);
+                        indexingHelper.deleteDocument(fessEsClient, id);
                     }
                     if (logger.isDebugEnabled()) {
                         logger.debug("Deleted " + deleteIdList);
@@ -256,9 +257,10 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
         @Override
         public void commit() {
             if (!deleteIdList.isEmpty()) {
+                final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
                 final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
                 for (final String id : deleteIdList) {
-                    indexingHelper.deleteDocument(indexUpdateCallback.getsClient(), id);
+                    indexingHelper.deleteDocument(fessEsClient, id);
                 }
                 if (logger.isDebugEnabled()) {
                     logger.debug("Deleted " + deleteIdList);
@@ -267,11 +269,6 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
             indexUpdateCallback.commit();
         }
 
-        @Override
-        public void setEsClient(final FessEsClient fessEsClient) {
-            indexUpdateCallback.setEsClient(fessEsClient);
-        }
-
         @Override
         public long getDocumentSize() {
             return indexUpdateCallback.getDocumentSize();
@@ -282,9 +279,5 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
             return indexUpdateCallback.getExecuteTime();
         }
 
-        @Override
-        public FessEsClient getsClient() {
-            return indexUpdateCallback.getsClient();
-        }
     }
 }

+ 5 - 24
src/main/java/org/codelibs/fess/ds/impl/IndexUpdateCallbackImpl.java

@@ -35,14 +35,6 @@ import org.slf4j.LoggerFactory;
 public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
     private static final Logger logger = LoggerFactory.getLogger(IndexUpdateCallbackImpl.class);
 
-    protected FessEsClient fessEsClient;
-
-    public int maxDocumentCacheSize = 5;
-
-    public boolean clickCountEnabled = true;
-
-    public boolean favoriteCountEnabled = true;
-
     protected volatile AtomicLong documentSize = new AtomicLong(0);
 
     protected volatile long executeTime = 0;
@@ -56,6 +48,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
     public synchronized boolean store(final Map<String, Object> dataMap) {
         final long startTime = System.currentTimeMillis();
         final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
 
         if (logger.isDebugEnabled()) {
             logger.debug("Adding " + dataMap);
@@ -73,11 +66,11 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
 
         final String url = dataMap.get(fessConfig.getIndexFieldUrl()).toString();
 
-        if (clickCountEnabled) {
+        if (fessConfig.getIndexerClickCountEnabledAsBoolean()) {
             addClickCountField(dataMap, url, fessConfig.getIndexFieldClickCount());
         }
 
-        if (favoriteCountEnabled) {
+        if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) {
             addFavoriteCountField(dataMap, url, fessConfig.getIndexFieldFavoriteCount());
         }
 
@@ -91,14 +84,11 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
             logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + ".");
         }
 
-        if (docList.size() >= maxDocumentCacheSize) {
+        if (docList.size() >= fessConfig.getIndexerDataMaxDocumentCacheSizeAsInteger().intValue()) {
             indexingHelper.sendDocuments(fessEsClient, docList);
         }
         documentSize.getAndIncrement();
 
-        if (!docList.isEmpty()) {
-            indexingHelper.sendDocuments(fessEsClient, docList);
-        }
         if (logger.isDebugEnabled()) {
             logger.debug("The number of an added document is " + documentSize.get() + ".");
         }
@@ -111,6 +101,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
     public void commit() {
         if (!docList.isEmpty()) {
             final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
+            final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
             indexingHelper.sendDocuments(fessEsClient, docList);
         }
     }
@@ -143,14 +134,4 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
         return executeTime;
     }
 
-    @Override
-    public FessEsClient getsClient() {
-        return fessEsClient;
-    }
-
-    @Override
-    public void setEsClient(final FessEsClient fessEsClient) {
-        this.fessEsClient = fessEsClient;
-    }
-
 }

+ 31 - 6
src/main/java/org/codelibs/fess/exec/Crawler.java

@@ -30,7 +30,6 @@ import java.util.Map;
 import javax.annotation.Resource;
 
 import org.codelibs.core.CoreLibConstants;
-import org.codelibs.core.beans.util.BeanUtil;
 import org.codelibs.core.lang.StringUtil;
 import org.codelibs.core.misc.DynamicProperties;
 import org.codelibs.fess.Constants;
@@ -276,14 +275,11 @@ public class Crawler implements Serializable {
         final String toStrs = (String) crawlerProperties.get(Constants.NOTIFICATION_TO_PROPERTY);
         if (StringUtil.isNotBlank(toStrs)) {
             final String[] toAddresses = toStrs.split(",");
-            final Map<String, Object> dataMap = new HashMap<String, Object>();
+            final Map<String, String> dataMap = new HashMap<>();
             for (final Map.Entry<String, String> entry : infoMap.entrySet()) {
                 dataMap.put(StringUtil.decapitalize(entry.getKey()), entry.getValue());
             }
 
-            if (Constants.T.equals(infoMap.get(Constants.CRAWLER_STATUS))) {
-                dataMap.put("success", true);
-            }
             try {
                 dataMap.put("hostname", InetAddress.getLocalHost().getHostAddress());
             } catch (final UnknownHostException e) {
@@ -298,11 +294,40 @@ public class Crawler implements Serializable {
                 StreamUtil.of(toAddresses).forEach(address -> {
                     postcard.addTo(address);
                 });
-                BeanUtil.copyMapToBean(dataMap, postcard);
+                postcard.setCommitEndTime(getValueOrEmpty(dataMap, "commitEndTime"));
+                postcard.setCommitExecTime(getValueOrEmpty(dataMap, "commitExecTime"));
+                postcard.setCommitStartTime(getValueOrEmpty(dataMap, "commitStartTime"));
+                postcard.setCrawlerEndTime(getValueOrEmpty(dataMap, "crawlerEndTime"));
+                postcard.setCrawlerExecTime(getValueOrEmpty(dataMap, "crawlerExecTime"));
+                postcard.setCrawlerStartTime(getValueOrEmpty(dataMap, "crawlerStartTime"));
+                postcard.setDataCrawlEndTime(getValueOrEmpty(dataMap, "dataCrawlEndTime"));
+                postcard.setDataCrawlExecTime(getValueOrEmpty(dataMap, "dataCrawlExecTime"));
+                postcard.setDataCrawlStartTime(getValueOrEmpty(dataMap, "dataCrawlStartTime"));
+                postcard.setDataFsIndexSize(getValueOrEmpty(dataMap, "dataFsIndexSize"));
+                postcard.setDataIndexExecTime(getValueOrEmpty(dataMap, "dataIndexExecTime"));
+                postcard.setHostname(getValueOrEmpty(dataMap, "hostname"));
+                postcard.setWebFsCrawlEndTime(getValueOrEmpty(dataMap, "webFsCrawlEndTime"));
+                postcard.setWebFsCrawlExecTime(getValueOrEmpty(dataMap, "webFsCrawlExecTime"));
+                postcard.setWebFsCrawlStartTime(getValueOrEmpty(dataMap, "webFsCrawlStartTime"));
+                postcard.setWebFsIndexExecTime(getValueOrEmpty(dataMap, "webFsIndexExecTime"));
+                postcard.setWebFsIndexSize(getValueOrEmpty(dataMap, "webFsIndexSize"));
+                if (Constants.T.equals(infoMap.get(Constants.CRAWLER_STATUS))) {
+                    postcard.setStatus(Constants.OK);
+                } else {
+                    postcard.setStatus(Constants.FAIL);
+                }
             });
         }
     }
 
+    private String getValueOrEmpty(Map<String, String> dataMap, String key) {
+        String value = dataMap.get(key);
+        if (value == null) {
+            return StringUtil.EMPTY;
+        }
+        return value;
+    }
+
     public int doCrawl(final Options options) {
         if (logger.isInfoEnabled()) {
             logger.info("Starting Crawler..");

+ 3 - 0
src/main/java/org/codelibs/fess/helper/DataIndexHelper.java

@@ -28,6 +28,7 @@ import org.codelibs.core.lang.StringUtil;
 import org.codelibs.core.misc.DynamicProperties;
 import org.codelibs.fess.Constants;
 import org.codelibs.fess.app.service.DataConfigService;
+import org.codelibs.fess.app.service.FailureUrlService;
 import org.codelibs.fess.ds.DataStore;
 import org.codelibs.fess.ds.DataStoreFactory;
 import org.codelibs.fess.ds.IndexUpdateCallback;
@@ -235,6 +236,8 @@ public class DataIndexHelper implements Serializable {
                     dataStore.store(dataConfig, indexUpdateCallback, initParamMap);
                 } catch (final Exception e) {
                     logger.error("Failed to process a data crawling: " + dataConfig.getName(), e);
+                    ComponentUtil.getComponent(FailureUrlService.class).store(dataConfig, e.getClass().getCanonicalName(),
+                            dataConfig.getConfigId() + ":" + dataConfig.getName(), e);
                 } finally {
                     indexUpdateCallback.commit();
                     deleteOldDocs();

+ 17 - 27
src/main/java/org/codelibs/fess/indexer/IndexUpdater.java

@@ -83,14 +83,8 @@ public class IndexUpdater extends Thread {
     @Resource
     protected IndexingHelper indexingHelper;
 
-    public int maxDocumentCacheSize = 5;
-
-    public int maxInvalidDocumentSize = 100;
-
     protected boolean finishCrawling = false;
 
-    public long updateInterval = 60000; // 1 min
-
     protected long executeTime;
 
     protected long documentSize;
@@ -103,16 +97,6 @@ public class IndexUpdater extends Thread {
 
     protected List<String> finishedSessionIdList = new ArrayList<>();
 
-    public long commitMarginTime = 10000; // 10ms
-
-    public int maxEmptyListCount = 60; // 1hour
-
-    public boolean threadDump = false;
-
-    public boolean clickCountEnabled = true;
-
-    public boolean favoriteCountEnabled = true;
-
     private final List<DocBoostMatcher> docBoostMatcherList = new ArrayList<>();
 
     private final Map<String, Object> docValueMap = new HashMap<>();
@@ -160,6 +144,9 @@ public class IndexUpdater extends Thread {
         executeTime = 0;
         documentSize = 0;
 
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue();
+        final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListConuntAsInteger().intValue();
         final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
         try {
             final Consumer<SearchRequestBuilder> cb =
@@ -172,10 +159,8 @@ public class IndexUpdater extends Thread {
                                                 org.codelibs.fess.crawler.Constants.OK_STATUS));
                         builder.setQuery(queryBuilder);
                         builder.setFrom(0);
-                        if (maxDocumentCacheSize <= 0) {
-                            maxDocumentCacheSize = 1;
-                        }
-                        builder.setSize(maxDocumentCacheSize);
+                        final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
+                        builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize);
                         builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC);
                     };
 
@@ -269,7 +254,7 @@ public class IndexUpdater extends Thread {
                     // terminate crawling
                     finishCrawling = true;
                     forceStop();
-                    if (threadDump) {
+                    if (fessConfig.getIndexerThreadDumpEnabledAsBoolean()) {
                         printThreadDump();
                     }
 
@@ -304,6 +289,8 @@ public class IndexUpdater extends Thread {
 
     private void processAccessResults(final List<Map<String, Object>> docList, final List<EsAccessResult> accessResultList,
             final List<EsAccessResult> arList) {
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
         for (final EsAccessResult accessResult : arList) {
             if (logger.isDebugEnabled()) {
                 logger.debug("Indexing " + accessResult.getUrl());
@@ -319,9 +306,9 @@ public class IndexUpdater extends Thread {
                 continue;
             }
 
-            final AccessResultData accessResultData = accessResult.getAccessResultData();
+            final AccessResultData<?> accessResultData = accessResult.getAccessResultData();
             if (accessResultData != null) {
-                accessResult.setAccessResultData((AccessResultData) null);
+                accessResult.setAccessResultData(null);
                 try {
                     final Transformer transformer = SingletonLaContainer.getComponent(accessResultData.getTransformerName());
                     if (transformer == null) {
@@ -373,11 +360,13 @@ public class IndexUpdater extends Thread {
     }
 
     protected void updateDocument(final Map<String, Object> map) {
-        if (clickCountEnabled) {
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+
+        if (fessConfig.getIndexerClickCountEnabledAsBoolean()) {
             addClickCountField(map);
         }
 
-        if (favoriteCountEnabled) {
+        if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) {
             addFavoriteCountField(map);
         }
 
@@ -402,7 +391,6 @@ public class IndexUpdater extends Thread {
             addBoostValue(map, documentBoost);
         }
 
-        final FessConfig fessConfig = ComponentUtil.getFessConfig();
         if (!map.containsKey(fessConfig.getIndexFieldDocId())) {
             map.put(fessConfig.getIndexFieldDocId(), systemHelper.generateDocId(map));
         }
@@ -460,7 +448,9 @@ public class IndexUpdater extends Thread {
         final long execTime = System.currentTimeMillis();
         final List<EsAccessResult> arList = ((EsDataService) dataService).getAccessResultList(cb);
         if (!arList.isEmpty()) {
-            for (final AccessResult ar : arList.toArray(new AccessResult[arList.size()])) {
+            final FessConfig fessConfig = ComponentUtil.getFessConfig();
+            final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue();
+            for (final AccessResult<?> ar : arList.toArray(new AccessResult[arList.size()])) {
                 if (ar.getCreateTime().longValue() > execTime - commitMarginTime) {
                     arList.remove(ar);
                 }

+ 207 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -135,6 +135,30 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. text/html */
     String CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES = "crawler.document.cache.html.mimetypes";
 
+    /** The key of the configuration. e.g. true */
+    String INDEXER_THREAD_DUMP_ENABLED = "indexer.thread.dump.enabled";
+
+    /** The key of the configuration. e.g. true */
+    String INDEXER_CLICK_COUNT_ENABLED = "indexer.click.count.enabled";
+
+    /** The key of the configuration. e.g. true */
+    String INDEXER_FAVORITE_COUNT_ENABLED = "indexer.favorite.count.enabled";
+
+    /** The key of the configuration. e.g. 10000 */
+    String INDEXER_WEBFS_COMMIT_MARGIN_TIME = "indexer.webfs.commit.margin.time";
+
+    /** The key of the configuration. e.g. 60 */
+    String INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT = "indexer.webfs.max.empty.list.conunt";
+
+    /** The key of the configuration. e.g. 60000 */
+    String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval";
+
+    /** The key of the configuration. e.g. 5 */
+    String INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE = "indexer.webfs.max.document.cache.size";
+
+    /** The key of the configuration. e.g. 5 */
+    String INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE = "indexer.data.max.document.cache.size";
+
     /** The key of the configuration. e.g. favorite_count */
     String INDEX_FIELD_favorite_count = "index.field.favorite_count";
 
@@ -810,6 +834,125 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     String getCrawlerDocumentCacheHtmlMimetypes();
 
+    /**
+     * Get the value for the key 'indexer.thread.dump.enabled'. <br>
+     * The value is, e.g. true <br>
+     * comment: indexer
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerThreadDumpEnabled();
+
+    /**
+     * Is the property for the key 'indexer.thread.dump.enabled' true? <br>
+     * The value is, e.g. true <br>
+     * comment: indexer
+     * @return The determination, true or false. (if not found, exception but basically no way)
+     */
+    boolean isIndexerThreadDumpEnabled();
+
+    /**
+     * Get the value for the key 'indexer.click.count.enabled'. <br>
+     * The value is, e.g. true <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerClickCountEnabled();
+
+    /**
+     * Is the property for the key 'indexer.click.count.enabled' true? <br>
+     * The value is, e.g. true <br>
+     * @return The determination, true or false. (if not found, exception but basically no way)
+     */
+    boolean isIndexerClickCountEnabled();
+
+    /**
+     * Get the value for the key 'indexer.favorite.count.enabled'. <br>
+     * The value is, e.g. true <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerFavoriteCountEnabled();
+
+    /**
+     * Is the property for the key 'indexer.favorite.count.enabled' true? <br>
+     * The value is, e.g. true <br>
+     * @return The determination, true or false. (if not found, exception but basically no way)
+     */
+    boolean isIndexerFavoriteCountEnabled();
+
+    /**
+     * Get the value for the key 'indexer.webfs.commit.margin.time'. <br>
+     * The value is, e.g. 10000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerWebfsCommitMarginTime();
+
+    /**
+     * Get the value for the key 'indexer.webfs.commit.margin.time' as {@link Integer}. <br>
+     * The value is, e.g. 10000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getIndexerWebfsCommitMarginTimeAsInteger();
+
+    /**
+     * Get the value for the key 'indexer.webfs.max.empty.list.conunt'. <br>
+     * The value is, e.g. 60 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerWebfsMaxEmptyListConunt();
+
+    /**
+     * Get the value for the key 'indexer.webfs.max.empty.list.conunt' as {@link Integer}. <br>
+     * The value is, e.g. 60 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getIndexerWebfsMaxEmptyListConuntAsInteger();
+
+    /**
+     * Get the value for the key 'indexer.webfs.update.interval'. <br>
+     * The value is, e.g. 60000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerWebfsUpdateInterval();
+
+    /**
+     * Get the value for the key 'indexer.webfs.update.interval' as {@link Integer}. <br>
+     * The value is, e.g. 60000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getIndexerWebfsUpdateIntervalAsInteger();
+
+    /**
+     * Get the value for the key 'indexer.webfs.max.document.cache.size'. <br>
+     * The value is, e.g. 5 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerWebfsMaxDocumentCacheSize();
+
+    /**
+     * Get the value for the key 'indexer.webfs.max.document.cache.size' as {@link Integer}. <br>
+     * The value is, e.g. 5 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger();
+
+    /**
+     * Get the value for the key 'indexer.data.max.document.cache.size'. <br>
+     * The value is, e.g. 5 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerDataMaxDocumentCacheSize();
+
+    /**
+     * Get the value for the key 'indexer.data.max.document.cache.size' as {@link Integer}. <br>
+     * The value is, e.g. 5 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getIndexerDataMaxDocumentCacheSizeAsInteger();
+
     /**
      * Get the value for the key 'index.field.favorite_count'. <br>
      * The value is, e.g. favorite_count <br>
@@ -1998,6 +2141,70 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return get(FessConfig.CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES);
         }
 
+        public String getIndexerThreadDumpEnabled() {
+            return get(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
+        }
+
+        public boolean isIndexerThreadDumpEnabled() {
+            return is(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
+        }
+
+        public String getIndexerClickCountEnabled() {
+            return get(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
+        }
+
+        public boolean isIndexerClickCountEnabled() {
+            return is(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
+        }
+
+        public String getIndexerFavoriteCountEnabled() {
+            return get(FessConfig.INDEXER_FAVORITE_COUNT_ENABLED);
+        }
+
+        public boolean isIndexerFavoriteCountEnabled() {
+            return is(FessConfig.INDEXER_FAVORITE_COUNT_ENABLED);
+        }
+
+        public String getIndexerWebfsCommitMarginTime() {
+            return get(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME);
+        }
+
+        public Integer getIndexerWebfsCommitMarginTimeAsInteger() {
+            return getAsInteger(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME);
+        }
+
+        public String getIndexerWebfsMaxEmptyListConunt() {
+            return get(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT);
+        }
+
+        public Integer getIndexerWebfsMaxEmptyListConuntAsInteger() {
+            return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT);
+        }
+
+        public String getIndexerWebfsUpdateInterval() {
+            return get(FessConfig.INDEXER_WEBFS_UPDATE_INTERVAL);
+        }
+
+        public Integer getIndexerWebfsUpdateIntervalAsInteger() {
+            return getAsInteger(FessConfig.INDEXER_WEBFS_UPDATE_INTERVAL);
+        }
+
+        public String getIndexerWebfsMaxDocumentCacheSize() {
+            return get(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE);
+        }
+
+        public Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger() {
+            return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE);
+        }
+
+        public String getIndexerDataMaxDocumentCacheSize() {
+            return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE);
+        }
+
+        public Integer getIndexerDataMaxDocumentCacheSizeAsInteger() {
+            return getAsInteger(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE);
+        }
+
         public String getIndexFieldFavoriteCount() {
             return get(FessConfig.INDEX_FIELD_favorite_count);
         }

+ 18 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -106,4 +106,22 @@ public interface FessProp {
         return StreamUtil.of(mimetypes).anyMatch(s -> s.equalsIgnoreCase(mimetype));
     }
 
+    String getIndexerClickCountEnabled();
+
+    public default boolean getIndexerClickCountEnabledAsBoolean() {
+        return Constants.TRUE.equalsIgnoreCase(getIndexerClickCountEnabled());
+    }
+
+    String getIndexerFavoriteCountEnabled();
+
+    public default boolean getIndexerFavoriteCountEnabledAsBoolean() {
+        return Constants.TRUE.equalsIgnoreCase(getIndexerFavoriteCountEnabled());
+    }
+
+    String getIndexerThreadDumpEnabled();
+
+    public default boolean getIndexerThreadDumpEnabledAsBoolean() {
+        return Constants.TRUE.equalsIgnoreCase(getIndexerThreadDumpEnabled());
+    }
+
 }

+ 10 - 1
src/main/java/org/codelibs/fess/mylasta/mail/CrawlerPostcard.java

@@ -52,7 +52,7 @@ public class CrawlerPostcard extends LaTypicalPostcard {
     protected String[] getPropertyNames() {
         return new String[] { "hostname", "webFsCrawlStartTime", "webFsCrawlEndTime", "webFsCrawlExecTime", "webFsIndexExecTime",
                 "webFsIndexSize", "dataCrawlStartTime", "dataCrawlEndTime", "dataCrawlExecTime", "dataIndexExecTime", "dataFsIndexSize",
-                "commitStartTime", "commitEndTime", "commitExecTime", "crawlerStartTime", "crawlerEndTime", "crawlerExecTime" };
+                "commitStartTime", "commitEndTime", "commitExecTime", "crawlerStartTime", "crawlerEndTime", "crawlerExecTime", "status" };
     }
 
     // ===================================================================================
@@ -252,4 +252,13 @@ public class CrawlerPostcard extends LaTypicalPostcard {
     public void setCrawlerExecTime(String crawlerExecTime) {
         registerVariable("crawlerExecTime", crawlerExecTime);
     }
+
+    /**
+     * Set the value of status, used in parameter comment. <br>
+     * Even if empty string, treated as empty plainly. So "IF pmb != null" is false if empty.
+     * @param status The parameter value of status. (NotNull)
+     */
+    public void setStatus(String status) {
+        registerVariable("status", status);
+    }
 }

+ 10 - 0
src/main/resources/fess_config.properties

@@ -82,6 +82,16 @@ crawler.document.cache.supported.mimetypes=text/html
 #,text/plain,application/xml,application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-powerpoint,application/vnd.openxmlformats-officedocument.presentationml.presentation
 crawler.document.cache.html.mimetypes=text/html
 
+# indexer
+indexer.thread.dump.enabled=true
+indexer.click.count.enabled=true
+indexer.favorite.count.enabled=true
+indexer.webfs.commit.margin.time=10000
+indexer.webfs.max.empty.list.conunt=60
+indexer.webfs.update.interval=60000
+indexer.webfs.max.document.cache.size=5
+indexer.data.max.document.cache.size=5
+
 # field names
 index.field.favorite_count=favorite_count
 index.field.click_count=click_count

+ 0 - 3
src/main/resources/fess_ds.xml

@@ -28,9 +28,6 @@
 	</component>
 
 	<component name="indexUpdateCallback" class="org.codelibs.fess.ds.impl.IndexUpdateCallbackImpl" instance="prototype">
-		<!-- 
-		<property name="maxDocumentCacheSize">10</property>
-		 -->
 	</component>
 
 </components>

+ 23 - 23
src/main/resources/mail/crawler.dfmail

@@ -7,32 +7,32 @@ subject: [FESS] Crawler completed: /*pmb.hostname*/
 --- Server Info ---
 Host Name: /*IF pmb.hostname != null*//*pmb.hostname*//*END*//*IF pmb.hostname == null*/Unknown/*END*/
 /*IF pmb.webFsIndexSize != null*/
---- Web/FileSystem Crawler ---
-Start Time: /*pmb.webFsCrawlStartTime*/
-End Time:   /*pmb.webFsCrawlEndTime*/
-Exec Time:  /*pmb.webFsCrawlExecTime*/ms
+--- Web/FileSystem Crawler ---/*IF pmb.webFsCrawlStartTime != null*/
+Start Time: /*pmb.webFsCrawlStartTime*//*END*//*IF pmb.webFsCrawlEndTime != null*/
+End Time:   /*pmb.webFsCrawlEndTime*//*END*//*IF pmb.webFsCrawlExecTime != null*/
+Exec Time:  /*pmb.webFsCrawlExecTime*/ms/*END*/
 
---- Web/FileSystem Indexer ---
-Exec Time:  /*pmb.webFsIndexExecTime*/
-Num of Doc: /*pmb.webFsIndexSize*/ docs
+--- Web/FileSystem Indexer ---/*IF pmb.webFsIndexExecTime != null*/
+Exec Time:  /*pmb.webFsIndexExecTime*//*END*//*IF pmb.webFsIndexSize != null*/
+Num of Doc: /*pmb.webFsIndexSize*/ docs/*END*/
 /*END*//*IF pmb.dataFsIndexSize != null*/
---- Data Store Crawler ---
-Start Time: /*pmb.dataCrawlStartTime*/
-End Time:   /*pmb.dataCrawlEndTime*/
-Exec Time:  /*pmb.dataCrawlExecTime*/ms
+--- Data Store Crawler ---/*IF pmb.dataCrawlStartTime != null*/
+Start Time: /*pmb.dataCrawlStartTime*//*END*//*IF pmb.dataCrawlEndTime != null*/
+End Time:   /*pmb.dataCrawlEndTime*//*END*//*IF pmb.dataCrawlExecTime != null*/
+Exec Time:  /*pmb.dataCrawlExecTime*/ms/*END*/
 
---- Data Store Indexer ---
-Exec Time:  /*pmb.dataIndexExecTime*/
-Num of Doc: /*pmb.dataFsIndexSize*/ docs
+--- Data Store Indexer ---/*IF pmb.dataIndexExecTime != null*/
+Exec Time:  /*pmb.dataIndexExecTime*//*END*//*IF pmb.dataFsIndexSize != null*/
+Num of Doc: /*pmb.dataFsIndexSize*/ docs/*END*/
 /*END*//*IF pmb.commitExecTime != null*/
---- Indexer(Commit) ---
-Start Time: /*pmb.commitStartTime*/
-End Time:   /*pmb.commitEndTime*/
-Exec Time:  /*pmb.commitExecTime*/ms
+--- Indexer(Commit) ---/*IF pmb.commitStartTime != null*/
+Start Time: /*pmb.commitStartTime*//*END*//*IF pmb.commitEndTime != null*/
+End Time:   /*pmb.commitEndTime*//*END*//*IF pmb.commitExecTime != null*/
+Exec Time:  /*pmb.commitExecTime*/ms/*END*/
 /*END*/
---- Total ---
-Start Time: /*pmb.crawlerStartTime*/
-End Time:   /*pmb.crawlerEndTime*/
-Exec Time:  /*pmb.crawlerExecTime*/ms
-Status:     /*IF pmb.success != null*/Success/*END*//*IF pmb.success == null*/Fail/*END*/
+--- Total ---/*IF pmb.crawlerStartTime != null*/
+Start Time: /*pmb.crawlerStartTime*//*END*//*IF pmb.crawlerEndTime != null*/
+End Time:   /*pmb.crawlerEndTime*//*END*//*IF pmb.crawlerExecTime != null*/
+Exec Time:  /*pmb.crawlerExecTime*/ms/*END*/
+Status:     /*pmb.status*/