Browse Source

fix #401 : add indexer.unprocessed.document.size

Shinsuke Sugaya 9 years ago
parent
commit
c6f827e15d

+ 6 - 14
src/main/java/org/codelibs/fess/indexer/IndexUpdater.java

@@ -93,8 +93,6 @@ public class IndexUpdater extends Thread {
 
     protected int maxErrorCount = 2;
 
-    protected int unprocessedDocumentSize = 100;
-
     protected List<String> finishedSessionIdList = new ArrayList<>();
 
     private final List<DocBoostMatcher> docBoostMatcherList = new ArrayList<>();
@@ -181,7 +179,7 @@ public class IndexUpdater extends Thread {
                     if (interval > 0) {
                         // sleep
                         try {
-                            Thread.sleep(interval); // 1 min (default)
+                            Thread.sleep(interval); // 10 sec (default)
                         } catch (final InterruptedException e) {
                             logger.warn("Interrupted index update.", e);
                         }
@@ -206,15 +204,9 @@ public class IndexUpdater extends Thread {
                     }
                     while (!arList.isEmpty()) {
                         processAccessResults(docList, accessResultList, arList);
-
                         cleanupAccessResults(accessResultList);
-
-                        if (logger.isDebugEnabled()) {
-                            logger.debug("Getting documents in IndexUpdater queue.");
-                        }
                         arList = getAccessResultList(cb);
                     }
-
                     if (!docList.isEmpty()) {
                         indexingHelper.sendDocuments(fessEsClient, docList);
                     }
@@ -445,10 +437,13 @@ public class IndexUpdater extends Thread {
     }
 
     private List<EsAccessResult> getAccessResultList(final Consumer<SearchRequestBuilder> cb) {
+        if (logger.isDebugEnabled()) {
+            logger.debug("Getting documents in IndexUpdater queue.");
+        }
         final long execTime = System.currentTimeMillis();
         final List<EsAccessResult> arList = ((EsDataService) dataService).getAccessResultList(cb);
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
         if (!arList.isEmpty()) {
-            final FessConfig fessConfig = ComponentUtil.getFessConfig();
             final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue();
             for (final AccessResult<?> ar : arList.toArray(new AccessResult[arList.size()])) {
                 if (ar.getCreateTime().longValue() > execTime - commitMarginTime) {
@@ -460,6 +455,7 @@ public class IndexUpdater extends Thread {
         if (logger.isInfoEnabled()) {
             logger.info("Processing " + arList.size() + "/" + totalHits + " docs (" + (System.currentTimeMillis() - execTime) + "ms)");
         }
+        final long unprocessedDocumentSize = fessConfig.getIndexerUnprocessedDocumentSizeAsInteger().longValue();
         if (totalHits > unprocessedDocumentSize) {
             if (logger.isInfoEnabled()) {
                 logger.info("Stopped all crawler threads. " + " You have " + totalHits + " (>" + unprocessedDocumentSize + ") "
@@ -532,10 +528,6 @@ public class IndexUpdater extends Thread {
         this.maxIndexerErrorCount = maxIndexerErrorCount;
     }
 
-    public void setUnprocessedDocumentSize(final int unprocessedDocumentSize) {
-        this.unprocessedDocumentSize = unprocessedDocumentSize;
-    }
-
     public void addDocBoostMatcher(final DocBoostMatcher rule) {
         docBoostMatcherList.add(rule);
     }

+ 42 - 8
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -63,7 +63,11 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     -XX:CMSInitiatingOccupancyFraction=75
     -XX:+UseParNewGC
     -XX:+UseTLAB
-    -XX:+DisableExplicitGC */
+    -XX:+DisableExplicitGC
+    -Djcifs.smb.client.connTimeout=60000
+    -Djcifs.smb.client.soTimeout=35000
+    -Djcifs.smb.client.responseTimeout=30000
+    */
     String JVM_SUGGEST_OPTIONS = "jvm.suggest.options";
 
     /** The key of the configuration. e.g. default_crawler */
@@ -200,19 +204,22 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. true */
     String INDEXER_THREAD_DUMP_ENABLED = "indexer.thread.dump.enabled";
 
+    /** The key of the configuration. e.g. 1000 */
+    String INDEXER_UNPROCESSED_DOCUMENT_SIZE = "indexer.unprocessed.document.size";
+
     /** The key of the configuration. e.g. true */
     String INDEXER_CLICK_COUNT_ENABLED = "indexer.click.count.enabled";
 
     /** The key of the configuration. e.g. true */
     String INDEXER_FAVORITE_COUNT_ENABLED = "indexer.favorite.count.enabled";
 
-    /** The key of the configuration. e.g. 10000 */
+    /** The key of the configuration. e.g. 1000 */
     String INDEXER_WEBFS_COMMIT_MARGIN_TIME = "indexer.webfs.commit.margin.time";
 
     /** The key of the configuration. e.g. 60 */
     String INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT = "indexer.webfs.max.empty.list.conunt";
 
-    /** The key of the configuration. e.g. 60000 */
+    /** The key of the configuration. e.g. 10000 */
     String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval";
 
     /** The key of the configuration. e.g. 5 */
@@ -777,7 +784,11 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     -XX:CMSInitiatingOccupancyFraction=75
     -XX:+UseParNewGC
     -XX:+UseTLAB
-    -XX:+DisableExplicitGC <br>
+    -XX:+DisableExplicitGC
+    -Djcifs.smb.client.connTimeout=60000
+    -Djcifs.smb.client.soTimeout=35000
+    -Djcifs.smb.client.responseTimeout=30000
+    <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
     String getJvmSuggestOptions();
@@ -1231,6 +1242,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
     boolean isIndexerThreadDumpEnabled();
 
+    /**
+     * Get the value for the key 'indexer.unprocessed.document.size'. <br>
+     * The value is, e.g. 1000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getIndexerUnprocessedDocumentSize();
+
+    /**
+     * Get the value for the key 'indexer.unprocessed.document.size' as {@link Integer}. <br>
+     * The value is, e.g. 1000 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getIndexerUnprocessedDocumentSizeAsInteger();
+
     /**
      * Get the value for the key 'indexer.click.count.enabled'. <br>
      * The value is, e.g. true <br>
@@ -1261,14 +1287,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
 
     /**
      * Get the value for the key 'indexer.webfs.commit.margin.time'. <br>
-     * The value is, e.g. 10000 <br>
+     * The value is, e.g. 1000 <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
     String getIndexerWebfsCommitMarginTime();
 
     /**
      * Get the value for the key 'indexer.webfs.commit.margin.time' as {@link Integer}. <br>
-     * The value is, e.g. 10000 <br>
+     * The value is, e.g. 1000 <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      * @throws NumberFormatException When the property is not integer.
      */
@@ -1291,14 +1317,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
 
     /**
      * Get the value for the key 'indexer.webfs.update.interval'. <br>
-     * The value is, e.g. 60000 <br>
+     * The value is, e.g. 10000 <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      */
     String getIndexerWebfsUpdateInterval();
 
     /**
      * Get the value for the key 'indexer.webfs.update.interval' as {@link Integer}. <br>
-     * The value is, e.g. 60000 <br>
+     * The value is, e.g. 10000 <br>
      * @return The value of found property. (NotNull: if not found, exception but basically no way)
      * @throws NumberFormatException When the property is not integer.
      */
@@ -3010,6 +3036,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return is(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
         }
 
+        public String getIndexerUnprocessedDocumentSize() {
+            return get(FessConfig.INDEXER_UNPROCESSED_DOCUMENT_SIZE);
+        }
+
+        public Integer getIndexerUnprocessedDocumentSizeAsInteger() {
+            return getAsInteger(FessConfig.INDEXER_UNPROCESSED_DOCUMENT_SIZE);
+        }
+
         public String getIndexerClickCountEnabled() {
             return get(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
         }

+ 0 - 1
src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java

@@ -25,7 +25,6 @@ import java.util.stream.Collectors;
 
 import javax.naming.directory.Attribute;
 import javax.naming.directory.BasicAttribute;
-import javax.servlet.http.HttpServletRequest;
 
 import org.codelibs.core.exception.ClassNotFoundRuntimeException;
 import org.codelibs.core.lang.StringUtil;

+ 2 - 1
src/main/resources/fess_config.properties

@@ -113,9 +113,10 @@ crawler.document.cache.html.mimetypes=text/html
 
 # indexer
 indexer.thread.dump.enabled=true
+indexer.unprocessed.document.size=1000
 indexer.click.count.enabled=true
 indexer.favorite.count.enabled=true
-indexer.webfs.commit.margin.time=10000
+indexer.webfs.commit.margin.time=1000
 indexer.webfs.max.empty.list.conunt=60
 indexer.webfs.update.interval=10000
 indexer.webfs.max.document.cache.size=5