fix #401 : add indexer.unprocessed.document.size
This commit is contained in:
parent
5bdd1c86cc
commit
c6f827e15d
4 changed files with 50 additions and 24 deletions
|
@ -93,8 +93,6 @@ public class IndexUpdater extends Thread {
|
|||
|
||||
protected int maxErrorCount = 2;
|
||||
|
||||
protected int unprocessedDocumentSize = 100;
|
||||
|
||||
protected List<String> finishedSessionIdList = new ArrayList<>();
|
||||
|
||||
private final List<DocBoostMatcher> docBoostMatcherList = new ArrayList<>();
|
||||
|
@ -181,7 +179,7 @@ public class IndexUpdater extends Thread {
|
|||
if (interval > 0) {
|
||||
// sleep
|
||||
try {
|
||||
Thread.sleep(interval); // 1 min (default)
|
||||
Thread.sleep(interval); // 10 sec (default)
|
||||
} catch (final InterruptedException e) {
|
||||
logger.warn("Interrupted index update.", e);
|
||||
}
|
||||
|
@ -206,15 +204,9 @@ public class IndexUpdater extends Thread {
|
|||
}
|
||||
while (!arList.isEmpty()) {
|
||||
processAccessResults(docList, accessResultList, arList);
|
||||
|
||||
cleanupAccessResults(accessResultList);
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Getting documents in IndexUpdater queue.");
|
||||
}
|
||||
arList = getAccessResultList(cb);
|
||||
}
|
||||
|
||||
if (!docList.isEmpty()) {
|
||||
indexingHelper.sendDocuments(fessEsClient, docList);
|
||||
}
|
||||
|
@ -445,10 +437,13 @@ public class IndexUpdater extends Thread {
|
|||
}
|
||||
|
||||
private List<EsAccessResult> getAccessResultList(final Consumer<SearchRequestBuilder> cb) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Getting documents in IndexUpdater queue.");
|
||||
}
|
||||
final long execTime = System.currentTimeMillis();
|
||||
final List<EsAccessResult> arList = ((EsDataService) dataService).getAccessResultList(cb);
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
if (!arList.isEmpty()) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue();
|
||||
for (final AccessResult<?> ar : arList.toArray(new AccessResult[arList.size()])) {
|
||||
if (ar.getCreateTime().longValue() > execTime - commitMarginTime) {
|
||||
|
@ -460,6 +455,7 @@ public class IndexUpdater extends Thread {
|
|||
if (logger.isInfoEnabled()) {
|
||||
logger.info("Processing " + arList.size() + "/" + totalHits + " docs (" + (System.currentTimeMillis() - execTime) + "ms)");
|
||||
}
|
||||
final long unprocessedDocumentSize = fessConfig.getIndexerUnprocessedDocumentSizeAsInteger().longValue();
|
||||
if (totalHits > unprocessedDocumentSize) {
|
||||
if (logger.isInfoEnabled()) {
|
||||
logger.info("Stopped all crawler threads. " + " You have " + totalHits + " (>" + unprocessedDocumentSize + ") "
|
||||
|
@ -532,10 +528,6 @@ public class IndexUpdater extends Thread {
|
|||
this.maxIndexerErrorCount = maxIndexerErrorCount;
|
||||
}
|
||||
|
||||
public void setUnprocessedDocumentSize(final int unprocessedDocumentSize) {
|
||||
this.unprocessedDocumentSize = unprocessedDocumentSize;
|
||||
}
|
||||
|
||||
public void addDocBoostMatcher(final DocBoostMatcher rule) {
|
||||
docBoostMatcherList.add(rule);
|
||||
}
|
||||
|
|
|
@ -63,7 +63,11 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
-XX:CMSInitiatingOccupancyFraction=75
|
||||
-XX:+UseParNewGC
|
||||
-XX:+UseTLAB
|
||||
-XX:+DisableExplicitGC */
|
||||
-XX:+DisableExplicitGC
|
||||
-Djcifs.smb.client.connTimeout=60000
|
||||
-Djcifs.smb.client.soTimeout=35000
|
||||
-Djcifs.smb.client.responseTimeout=30000
|
||||
*/
|
||||
String JVM_SUGGEST_OPTIONS = "jvm.suggest.options";
|
||||
|
||||
/** The key of the configuration. e.g. default_crawler */
|
||||
|
@ -200,19 +204,22 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. true */
|
||||
String INDEXER_THREAD_DUMP_ENABLED = "indexer.thread.dump.enabled";
|
||||
|
||||
/** The key of the configuration. e.g. 1000 */
|
||||
String INDEXER_UNPROCESSED_DOCUMENT_SIZE = "indexer.unprocessed.document.size";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String INDEXER_CLICK_COUNT_ENABLED = "indexer.click.count.enabled";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String INDEXER_FAVORITE_COUNT_ENABLED = "indexer.favorite.count.enabled";
|
||||
|
||||
/** The key of the configuration. e.g. 10000 */
|
||||
/** The key of the configuration. e.g. 1000 */
|
||||
String INDEXER_WEBFS_COMMIT_MARGIN_TIME = "indexer.webfs.commit.margin.time";
|
||||
|
||||
/** The key of the configuration. e.g. 60 */
|
||||
String INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT = "indexer.webfs.max.empty.list.conunt";
|
||||
|
||||
/** The key of the configuration. e.g. 60000 */
|
||||
/** The key of the configuration. e.g. 10000 */
|
||||
String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval";
|
||||
|
||||
/** The key of the configuration. e.g. 5 */
|
||||
|
@ -777,7 +784,11 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
-XX:CMSInitiatingOccupancyFraction=75
|
||||
-XX:+UseParNewGC
|
||||
-XX:+UseTLAB
|
||||
-XX:+DisableExplicitGC <br>
|
||||
-XX:+DisableExplicitGC
|
||||
-Djcifs.smb.client.connTimeout=60000
|
||||
-Djcifs.smb.client.soTimeout=35000
|
||||
-Djcifs.smb.client.responseTimeout=30000
|
||||
<br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getJvmSuggestOptions();
|
||||
|
@ -1231,6 +1242,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
boolean isIndexerThreadDumpEnabled();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.unprocessed.document.size'. <br>
|
||||
* The value is, e.g. 1000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerUnprocessedDocumentSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.unprocessed.document.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 1000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerUnprocessedDocumentSizeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.click.count.enabled'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
|
@ -1261,14 +1287,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.commit.margin.time'. <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* The value is, e.g. 1000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerWebfsCommitMarginTime();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.commit.margin.time' as {@link Integer}. <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* The value is, e.g. 1000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
|
@ -1291,14 +1317,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.update.interval'. <br>
|
||||
* The value is, e.g. 60000 <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerWebfsUpdateInterval();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.update.interval' as {@link Integer}. <br>
|
||||
* The value is, e.g. 60000 <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
|
@ -3010,6 +3036,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return is(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
|
||||
}
|
||||
|
||||
public String getIndexerUnprocessedDocumentSize() {
|
||||
return get(FessConfig.INDEXER_UNPROCESSED_DOCUMENT_SIZE);
|
||||
}
|
||||
|
||||
public Integer getIndexerUnprocessedDocumentSizeAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_UNPROCESSED_DOCUMENT_SIZE);
|
||||
}
|
||||
|
||||
public String getIndexerClickCountEnabled() {
|
||||
return get(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.util.stream.Collectors;
|
|||
|
||||
import javax.naming.directory.Attribute;
|
||||
import javax.naming.directory.BasicAttribute;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.codelibs.core.exception.ClassNotFoundRuntimeException;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
|
|
|
@ -113,9 +113,10 @@ crawler.document.cache.html.mimetypes=text/html
|
|||
|
||||
# indexer
|
||||
indexer.thread.dump.enabled=true
|
||||
indexer.unprocessed.document.size=1000
|
||||
indexer.click.count.enabled=true
|
||||
indexer.favorite.count.enabled=true
|
||||
indexer.webfs.commit.margin.time=10000
|
||||
indexer.webfs.commit.margin.time=1000
|
||||
indexer.webfs.max.empty.list.conunt=60
|
||||
indexer.webfs.update.interval=10000
|
||||
indexer.webfs.max.document.cache.size=5
|
||||
|
|
Loading…
Add table
Reference in a new issue