fix #401 : add indexer.unprocessed.document.size

This commit is contained in:
Shinsuke Sugaya 2016-02-28 19:13:37 +09:00
parent 5bdd1c86cc
commit c6f827e15d
4 changed files with 50 additions and 24 deletions

View file

@ -93,8 +93,6 @@ public class IndexUpdater extends Thread {
protected int maxErrorCount = 2;
protected int unprocessedDocumentSize = 100;
protected List<String> finishedSessionIdList = new ArrayList<>();
private final List<DocBoostMatcher> docBoostMatcherList = new ArrayList<>();
@ -181,7 +179,7 @@ public class IndexUpdater extends Thread {
if (interval > 0) {
// sleep
try {
Thread.sleep(interval); // 1 min (default)
Thread.sleep(interval); // 10 sec (default)
} catch (final InterruptedException e) {
logger.warn("Interrupted index update.", e);
}
@ -206,15 +204,9 @@ public class IndexUpdater extends Thread {
}
while (!arList.isEmpty()) {
processAccessResults(docList, accessResultList, arList);
cleanupAccessResults(accessResultList);
if (logger.isDebugEnabled()) {
logger.debug("Getting documents in IndexUpdater queue.");
}
arList = getAccessResultList(cb);
}
if (!docList.isEmpty()) {
indexingHelper.sendDocuments(fessEsClient, docList);
}
@ -445,10 +437,13 @@ public class IndexUpdater extends Thread {
}
private List<EsAccessResult> getAccessResultList(final Consumer<SearchRequestBuilder> cb) {
if (logger.isDebugEnabled()) {
logger.debug("Getting documents in IndexUpdater queue.");
}
final long execTime = System.currentTimeMillis();
final List<EsAccessResult> arList = ((EsDataService) dataService).getAccessResultList(cb);
final FessConfig fessConfig = ComponentUtil.getFessConfig();
if (!arList.isEmpty()) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue();
for (final AccessResult<?> ar : arList.toArray(new AccessResult[arList.size()])) {
if (ar.getCreateTime().longValue() > execTime - commitMarginTime) {
@ -460,6 +455,7 @@ public class IndexUpdater extends Thread {
if (logger.isInfoEnabled()) {
logger.info("Processing " + arList.size() + "/" + totalHits + " docs (" + (System.currentTimeMillis() - execTime) + "ms)");
}
final long unprocessedDocumentSize = fessConfig.getIndexerUnprocessedDocumentSizeAsInteger().longValue();
if (totalHits > unprocessedDocumentSize) {
if (logger.isInfoEnabled()) {
logger.info("Stopped all crawler threads. " + " You have " + totalHits + " (>" + unprocessedDocumentSize + ") "
@ -532,10 +528,6 @@ public class IndexUpdater extends Thread {
this.maxIndexerErrorCount = maxIndexerErrorCount;
}
public void setUnprocessedDocumentSize(final int unprocessedDocumentSize) {
this.unprocessedDocumentSize = unprocessedDocumentSize;
}
public void addDocBoostMatcher(final DocBoostMatcher rule) {
docBoostMatcherList.add(rule);
}

View file

@ -63,7 +63,11 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseParNewGC
-XX:+UseTLAB
-XX:+DisableExplicitGC */
-XX:+DisableExplicitGC
-Djcifs.smb.client.connTimeout=60000
-Djcifs.smb.client.soTimeout=35000
-Djcifs.smb.client.responseTimeout=30000
*/
String JVM_SUGGEST_OPTIONS = "jvm.suggest.options";
/** The key of the configuration. e.g. default_crawler */
@ -200,19 +204,22 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. true */
String INDEXER_THREAD_DUMP_ENABLED = "indexer.thread.dump.enabled";
/** The key of the configuration. e.g. 1000 */
String INDEXER_UNPROCESSED_DOCUMENT_SIZE = "indexer.unprocessed.document.size";
/** The key of the configuration. e.g. true */
String INDEXER_CLICK_COUNT_ENABLED = "indexer.click.count.enabled";
/** The key of the configuration. e.g. true */
String INDEXER_FAVORITE_COUNT_ENABLED = "indexer.favorite.count.enabled";
/** The key of the configuration. e.g. 10000 */
/** The key of the configuration. e.g. 1000 */
String INDEXER_WEBFS_COMMIT_MARGIN_TIME = "indexer.webfs.commit.margin.time";
/** The key of the configuration. e.g. 60 */
String INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT = "indexer.webfs.max.empty.list.conunt";
/** The key of the configuration. e.g. 60000 */
/** The key of the configuration. e.g. 10000 */
String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval";
/** The key of the configuration. e.g. 5 */
@ -777,7 +784,11 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseParNewGC
-XX:+UseTLAB
-XX:+DisableExplicitGC <br>
-XX:+DisableExplicitGC
-Djcifs.smb.client.connTimeout=60000
-Djcifs.smb.client.soTimeout=35000
-Djcifs.smb.client.responseTimeout=30000
<br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getJvmSuggestOptions();
@ -1231,6 +1242,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
boolean isIndexerThreadDumpEnabled();
/**
* Get the value for the key 'indexer.unprocessed.document.size'. <br>
* The value is, e.g. 1000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerUnprocessedDocumentSize();
/**
* Get the value for the key 'indexer.unprocessed.document.size' as {@link Integer}. <br>
* The value is, e.g. 1000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerUnprocessedDocumentSizeAsInteger();
/**
* Get the value for the key 'indexer.click.count.enabled'. <br>
* The value is, e.g. true <br>
@ -1261,14 +1287,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/**
* Get the value for the key 'indexer.webfs.commit.margin.time'. <br>
* The value is, e.g. 10000 <br>
* The value is, e.g. 1000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerWebfsCommitMarginTime();
/**
* Get the value for the key 'indexer.webfs.commit.margin.time' as {@link Integer}. <br>
* The value is, e.g. 10000 <br>
* The value is, e.g. 1000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
@ -1291,14 +1317,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/**
* Get the value for the key 'indexer.webfs.update.interval'. <br>
* The value is, e.g. 60000 <br>
* The value is, e.g. 10000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerWebfsUpdateInterval();
/**
* Get the value for the key 'indexer.webfs.update.interval' as {@link Integer}. <br>
* The value is, e.g. 60000 <br>
* The value is, e.g. 10000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
@ -3010,6 +3036,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return is(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
}
public String getIndexerUnprocessedDocumentSize() {
return get(FessConfig.INDEXER_UNPROCESSED_DOCUMENT_SIZE);
}
public Integer getIndexerUnprocessedDocumentSizeAsInteger() {
return getAsInteger(FessConfig.INDEXER_UNPROCESSED_DOCUMENT_SIZE);
}
public String getIndexerClickCountEnabled() {
return get(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
}

View file

@ -25,7 +25,6 @@ import java.util.stream.Collectors;
import javax.naming.directory.Attribute;
import javax.naming.directory.BasicAttribute;
import javax.servlet.http.HttpServletRequest;
import org.codelibs.core.exception.ClassNotFoundRuntimeException;
import org.codelibs.core.lang.StringUtil;

View file

@ -113,9 +113,10 @@ crawler.document.cache.html.mimetypes=text/html
# indexer
indexer.thread.dump.enabled=true
indexer.unprocessed.document.size=1000
indexer.click.count.enabled=true
indexer.favorite.count.enabled=true
indexer.webfs.commit.margin.time=10000
indexer.webfs.commit.margin.time=1000
indexer.webfs.max.empty.list.conunt=60
indexer.webfs.update.interval=10000
indexer.webfs.max.document.cache.size=5