fix #419 : lazy loading for access result data
This commit is contained in:
parent
203a43a24e
commit
6d64a84865
5 changed files with 48 additions and 9 deletions
2
pom.xml
2
pom.xml
|
@ -59,7 +59,7 @@
|
|||
<utflute.version>0.6.0F</utflute.version>
|
||||
|
||||
<!-- Crawler -->
|
||||
<crawler.version>1.0.6</crawler.version>
|
||||
<crawler.version>1.0.7-SNAPSHOT</crawler.version>
|
||||
|
||||
<!-- Suggest -->
|
||||
<suggest.version>2.1.1</suggest.version>
|
||||
|
|
|
@ -38,6 +38,9 @@ public class IndexingHelper {
|
|||
public long requestInterval = 500;
|
||||
|
||||
public void sendDocuments(final FessEsClient fessEsClient, final List<Map<String, Object>> docList) {
|
||||
if (docList.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
final long execTime = System.currentTimeMillis();
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Sending " + docList.size() + " documents to a server.");
|
||||
|
|
|
@ -282,7 +282,8 @@ public class IndexUpdater extends Thread {
|
|||
private void processAccessResults(final List<Map<String, Object>> docList, final List<EsAccessResult> accessResultList,
|
||||
final List<EsAccessResult> arList) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
|
||||
final long maxDocumentRequestSize = fessConfig.getIndexerWebfsMaxDocumentRequestSizeAsInteger().longValue();
|
||||
long contentSize = 0;
|
||||
for (final EsAccessResult accessResult : arList) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Indexing " + accessResult.getUrl());
|
||||
|
@ -329,11 +330,19 @@ public class IndexUpdater extends Thread {
|
|||
|
||||
docList.add(map);
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + ".");
|
||||
logger.debug("Added the document(" + contentSize + " bytes). " + "The number of a document cache is "
|
||||
+ docList.size() + ".");
|
||||
}
|
||||
|
||||
if (docList.size() >= maxDocumentCacheSize) {
|
||||
if (accessResult.getContentLength() == null) {
|
||||
indexingHelper.sendDocuments(fessEsClient, docList);
|
||||
contentSize = 0;
|
||||
} else {
|
||||
contentSize += accessResult.getContentLength().longValue();
|
||||
if (contentSize >= maxDocumentRequestSize) {
|
||||
indexingHelper.sendDocuments(fessEsClient, docList);
|
||||
contentSize = 0;
|
||||
}
|
||||
}
|
||||
documentSize++;
|
||||
if (logger.isDebugEnabled()) {
|
||||
|
|
|
@ -222,9 +222,12 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. 10000 */
|
||||
String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval";
|
||||
|
||||
/** The key of the configuration. e.g. 5 */
|
||||
/** The key of the configuration. e.g. 100 */
|
||||
String INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE = "indexer.webfs.max.document.cache.size";
|
||||
|
||||
/** The key of the configuration. e.g. 10485760 */
|
||||
String INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE = "indexer.webfs.max.document.request.size";
|
||||
|
||||
/** The key of the configuration. e.g. 5 */
|
||||
String INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE = "indexer.data.max.document.cache.size";
|
||||
|
||||
|
@ -1401,19 +1404,34 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.max.document.cache.size'. <br>
|
||||
* The value is, e.g. 5 <br>
|
||||
* The value is, e.g. 100 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerWebfsMaxDocumentCacheSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.max.document.cache.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 5 <br>
|
||||
* The value is, e.g. 100 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.max.document.request.size'. <br>
|
||||
* The value is, e.g. 10485760 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerWebfsMaxDocumentRequestSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.max.document.request.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 10485760 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerWebfsMaxDocumentRequestSizeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.data.max.document.cache.size'. <br>
|
||||
* The value is, e.g. 5 <br>
|
||||
|
@ -3451,6 +3469,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE);
|
||||
}
|
||||
|
||||
public String getIndexerWebfsMaxDocumentRequestSize() {
|
||||
return get(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE);
|
||||
}
|
||||
|
||||
public Integer getIndexerWebfsMaxDocumentRequestSizeAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_REQUEST_SIZE);
|
||||
}
|
||||
|
||||
public String getIndexerDataMaxDocumentCacheSize() {
|
||||
return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE);
|
||||
}
|
||||
|
|
|
@ -116,10 +116,11 @@ indexer.thread.dump.enabled=true
|
|||
indexer.unprocessed.document.size=1000
|
||||
indexer.click.count.enabled=true
|
||||
indexer.favorite.count.enabled=true
|
||||
indexer.webfs.commit.margin.time=1000
|
||||
indexer.webfs.commit.margin.time=5000
|
||||
indexer.webfs.max.empty.list.conunt=60
|
||||
indexer.webfs.update.interval=10000
|
||||
indexer.webfs.max.document.cache.size=5
|
||||
indexer.webfs.max.document.cache.size=100
|
||||
indexer.webfs.max.document.request.size=10485760
|
||||
indexer.data.max.document.cache.size=5
|
||||
|
||||
# field names
|
||||
|
|
Loading…
Add table
Reference in a new issue