fix #419 : lazy loading for access result data

This commit is contained in:
Shinsuke Sugaya 2016-03-07 23:25:42 +09:00
parent 203a43a24e
commit 6d64a84865
5 changed files with 48 additions and 9 deletions

View file

@ -59,7 +59,7 @@
<utflute.version>0.6.0F</utflute.version>
<!-- Crawler -->
<crawler.version>1.0.6</crawler.version>
<crawler.version>1.0.7-SNAPSHOT</crawler.version>
<!-- Suggest -->
<suggest.version>2.1.1</suggest.version>

View file

@ -38,6 +38,9 @@ public class IndexingHelper {
public long requestInterval = 500;
public void sendDocuments(final FessEsClient fessEsClient, final List<Map<String, Object>> docList) {
if (docList.isEmpty()) {
return;
}
final long execTime = System.currentTimeMillis();
if (logger.isDebugEnabled()) {
logger.debug("Sending " + docList.size() + " documents to a server.");

View file

@ -282,7 +282,8 @@ public class IndexUpdater extends Thread {
private void processAccessResults(final List<Map<String, Object>> docList, final List<EsAccessResult> accessResultList,
final List<EsAccessResult> arList) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
final long maxDocumentRequestSize = fessConfig.getIndexerWebfsMaxDocumentRequestSizeAsInteger().longValue();
long contentSize = 0;
for (final EsAccessResult accessResult : arList) {
if (logger.isDebugEnabled()) {
logger.debug("Indexing " + accessResult.getUrl());
@ -329,11 +330,19 @@ public class IndexUpdater extends Thread {
docList.add(map);
if (logger.isDebugEnabled()) {
logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + ".");
logger.debug("Added the document(" + contentSize + " bytes). " + "The number of a document cache is "
+ docList.size() + ".");
}
if (docList.size() >= maxDocumentCacheSize) {
if (accessResult.getContentLength() == null) {
indexingHelper.sendDocuments(fessEsClient, docList);
contentSize = 0;
} else {
contentSize += accessResult.getContentLength().longValue();
if (contentSize >= maxDocumentRequestSize) {
indexingHelper.sendDocuments(fessEsClient, docList);
contentSize = 0;
}
}
documentSize++;
if (logger.isDebugEnabled()) {

View file

@ -222,9 +222,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. 10000 */
String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval";
/** The key of the configuration. e.g. 5 */
/** The key of the configuration. e.g. 100 */
String INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE = "indexer.webfs.max.document.cache.size";
/** The key of the configuration. e.g. 10485760 */
String INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE = "indexer.webfs.max.document.request.size";
/** The key of the configuration. e.g. 5 */
String INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE = "indexer.data.max.document.cache.size";
@ -1401,19 +1404,34 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/**
* Get the value for the key 'indexer.webfs.max.document.cache.size'. <br>
* The value is, e.g. 5 <br>
* The value is, e.g. 100 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerWebfsMaxDocumentCacheSize();
/**
* Get the value for the key 'indexer.webfs.max.document.cache.size' as {@link Integer}. <br>
* The value is, e.g. 5 <br>
* The value is, e.g. 100 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger();
/**
* Get the value for the key 'indexer.webfs.max.document.request.size'. <br>
* The value is, e.g. 10485760 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerWebfsMaxDocumentRequestSize();
/**
* Get the value for the key 'indexer.webfs.max.document.request.size' as {@link Integer}. <br>
* The value is, e.g. 10485760 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerWebfsMaxDocumentRequestSizeAsInteger();
/**
* Get the value for the key 'indexer.data.max.document.cache.size'. <br>
* The value is, e.g. 5 <br>
@ -3451,6 +3469,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE);
}
public String getIndexerWebfsMaxDocumentRequestSize() {
return get(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE);
}
public Integer getIndexerWebfsMaxDocumentRequestSizeAsInteger() {
return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE);
}
public String getIndexerDataMaxDocumentCacheSize() {
return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE);
}

View file

@ -116,10 +116,11 @@ indexer.thread.dump.enabled=true
indexer.unprocessed.document.size=1000
indexer.click.count.enabled=true
indexer.favorite.count.enabled=true
indexer.webfs.commit.margin.time=1000
indexer.webfs.commit.margin.time=5000
indexer.webfs.max.empty.list.conunt=60
indexer.webfs.update.interval=10000
indexer.webfs.max.document.cache.size=5
indexer.webfs.max.document.cache.size=100
indexer.webfs.max.document.request.size=10485760
indexer.data.max.document.cache.size=5
# field names