diff --git a/pom.xml b/pom.xml index 91a05d1fc..e28c0a43c 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ 0.6.0F - 1.0.6 + 1.0.7-SNAPSHOT 2.1.1 diff --git a/src/main/java/org/codelibs/fess/helper/IndexingHelper.java b/src/main/java/org/codelibs/fess/helper/IndexingHelper.java index ed3a0d031..cd2c66387 100644 --- a/src/main/java/org/codelibs/fess/helper/IndexingHelper.java +++ b/src/main/java/org/codelibs/fess/helper/IndexingHelper.java @@ -38,6 +38,9 @@ public class IndexingHelper { public long requestInterval = 500; public void sendDocuments(final FessEsClient fessEsClient, final List> docList) { + if (docList.isEmpty()) { + return; + } final long execTime = System.currentTimeMillis(); if (logger.isDebugEnabled()) { logger.debug("Sending " + docList.size() + " documents to a server."); diff --git a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java index 9a22aa43a..abe361ef1 100644 --- a/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java +++ b/src/main/java/org/codelibs/fess/indexer/IndexUpdater.java @@ -282,7 +282,8 @@ public class IndexUpdater extends Thread { private void processAccessResults(final List> docList, final List accessResultList, final List arList) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); - final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue(); + final long maxDocumentRequestSize = fessConfig.getIndexerWebfsMaxDocumentRequestSizeAsInteger().longValue(); + long contentSize = 0; for (final EsAccessResult accessResult : arList) { if (logger.isDebugEnabled()) { logger.debug("Indexing " + accessResult.getUrl()); @@ -329,11 +330,19 @@ public class IndexUpdater extends Thread { docList.add(map); if (logger.isDebugEnabled()) { - logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + "."); + logger.debug("Added the document(" + contentSize + " bytes). " + "The number of a document cache is " + + docList.size() + "."); } - if (docList.size() >= maxDocumentCacheSize) { + if (accessResult.getContentLength() == null) { indexingHelper.sendDocuments(fessEsClient, docList); + contentSize = 0; + } else { + contentSize += accessResult.getContentLength().longValue(); + if (contentSize >= maxDocumentRequestSize) { + indexingHelper.sendDocuments(fessEsClient, docList); + contentSize = 0; + } } documentSize++; if (logger.isDebugEnabled()) { diff --git a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java index 4d910daee..72cf65f38 100644 --- a/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java +++ b/src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java @@ -222,9 +222,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** The key of the configuration. e.g. 10000 */ String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval"; - /** The key of the configuration. e.g. 5 */ + /** The key of the configuration. e.g. 100 */ String INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE = "indexer.webfs.max.document.cache.size"; + /** The key of the configuration. e.g. 10485760 */ + String INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE = "indexer.webfs.max.document.request.size"; + /** The key of the configuration. e.g. 5 */ String INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE = "indexer.data.max.document.cache.size"; @@ -1401,19 +1404,34 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction /** * Get the value for the key 'indexer.webfs.max.document.cache.size'.
- * The value is, e.g. 5
+ * The value is, e.g. 100
* @return The value of found property. (NotNull: if not found, exception but basically no way) */ String getIndexerWebfsMaxDocumentCacheSize(); /** * Get the value for the key 'indexer.webfs.max.document.cache.size' as {@link Integer}.
- * The value is, e.g. 5
+ * The value is, e.g. 100
* @return The value of found property. (NotNull: if not found, exception but basically no way) * @throws NumberFormatException When the property is not integer. */ Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger(); + /** + * Get the value for the key 'indexer.webfs.max.document.request.size'.
+ * The value is, e.g. 10485760
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + */ + String getIndexerWebfsMaxDocumentRequestSize(); + + /** + * Get the value for the key 'indexer.webfs.max.document.request.size' as {@link Integer}.
+ * The value is, e.g. 10485760
+ * @return The value of found property. (NotNull: if not found, exception but basically no way) + * @throws NumberFormatException When the property is not integer. + */ + Integer getIndexerWebfsMaxDocumentRequestSizeAsInteger(); + /** * Get the value for the key 'indexer.data.max.document.cache.size'.
* The value is, e.g. 5
@@ -3451,6 +3469,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE); } + public String getIndexerWebfsMaxDocumentRequestSize() { + return get(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE); + } + + public Integer getIndexerWebfsMaxDocumentRequestSizeAsInteger() { + return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE); + } + public String getIndexerDataMaxDocumentCacheSize() { return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE); } diff --git a/src/main/resources/fess_config.properties b/src/main/resources/fess_config.properties index 8112c90e0..8ac39a003 100644 --- a/src/main/resources/fess_config.properties +++ b/src/main/resources/fess_config.properties @@ -116,10 +116,11 @@ indexer.thread.dump.enabled=true indexer.unprocessed.document.size=1000 indexer.click.count.enabled=true indexer.favorite.count.enabled=true -indexer.webfs.commit.margin.time=1000 +indexer.webfs.commit.margin.time=5000 indexer.webfs.max.empty.list.conunt=60 indexer.webfs.update.interval=10000 -indexer.webfs.max.document.cache.size=5 +indexer.webfs.max.document.cache.size=100 +indexer.webfs.max.document.request.size=10485760 indexer.data.max.document.cache.size=5 # field names