modify data store crawling

This commit is contained in:
Shinsuke Sugaya 2015-12-30 08:44:07 +09:00
parent 0d3a299e8a
commit 5adaf030a3
14 changed files with 339 additions and 105 deletions

View file

@ -17,16 +17,10 @@ package org.codelibs.fess.ds;
import java.util.Map;
import org.codelibs.fess.es.client.FessEsClient;
public interface IndexUpdateCallback {
boolean store(Map<String, Object> dataMap);
void setEsClient(FessEsClient fessEsClient);
FessEsClient getsClient();
long getDocumentSize();
long getExecuteTime();

View file

@ -121,7 +121,7 @@ public abstract class AbstractDataStoreImpl implements DataStore {
}
return value;
} catch (final Exception e) {
logger.warn("Invalid value format: " + template, e);
logger.warn("Invalid value format: " + template + " => " + paramMap, e);
return null;
}
}

View file

@ -192,12 +192,16 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
resultMap.putAll(paramMap);
resultMap.put("csvfile", csvFile.getAbsolutePath());
resultMap.put("csvfilename", csvFile.getName());
boolean foundValues = false;
for (int i = 0; i < list.size(); i++) {
String key = null;
String value = list.get(i);
if (value == null) {
value = StringUtil.EMPTY;
}
if (StringUtil.isNotBlank(value)) {
foundValues = true;
}
if (headerList != null && headerList.size() > i) {
key = headerList.get(i);
if (StringUtil.isNotBlank(key)) {
@ -207,6 +211,10 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
key = CELL_PREFIX + Integer.toString(i + 1);
resultMap.put(key, value);
}
if (!foundValues) {
logger.debug("No data in line: {}", resultMap);
continue;
}
if (logger.isDebugEnabled()) {
for (final Map.Entry<String, String> entry : resultMap.entrySet()) {
@ -230,6 +238,8 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
try {
loop = callback.store(dataMap);
} catch (final CrawlingAccessException e) {
logger.warn("Crawling Access Exception at : " + dataMap, e);
Throwable target = e;
if (target instanceof MultipleCrawlingAccessException) {
final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses();
@ -251,12 +261,9 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
url = ((DataStoreCrawlingException) target).getUrl();
} else {
url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
}
final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class);
failureUrlService.store(dataConfig, errorName, url, target);
logger.warn("Crawling Access Exception at : " + dataMap, e);
} catch (final Exception e) {
final String url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class);

View file

@ -239,9 +239,10 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
deleteIdList.add(crawlingInfoHelper.generateId(dataMap));
if (deleteIdList.size() >= maxDeleteDocumentCacheSize) {
final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
for (final String id : deleteIdList) {
indexingHelper.deleteDocument(indexUpdateCallback.getsClient(), id);
indexingHelper.deleteDocument(fessEsClient, id);
}
if (logger.isDebugEnabled()) {
logger.debug("Deleted " + deleteIdList);
@ -256,9 +257,10 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
@Override
public void commit() {
if (!deleteIdList.isEmpty()) {
final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
for (final String id : deleteIdList) {
indexingHelper.deleteDocument(indexUpdateCallback.getsClient(), id);
indexingHelper.deleteDocument(fessEsClient, id);
}
if (logger.isDebugEnabled()) {
logger.debug("Deleted " + deleteIdList);
@ -267,11 +269,6 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
indexUpdateCallback.commit();
}
@Override
public void setEsClient(final FessEsClient fessEsClient) {
indexUpdateCallback.setEsClient(fessEsClient);
}
@Override
public long getDocumentSize() {
return indexUpdateCallback.getDocumentSize();
@ -282,9 +279,5 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
return indexUpdateCallback.getExecuteTime();
}
@Override
public FessEsClient getsClient() {
return indexUpdateCallback.getsClient();
}
}
}

View file

@ -35,14 +35,6 @@ import org.slf4j.LoggerFactory;
public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
private static final Logger logger = LoggerFactory.getLogger(IndexUpdateCallbackImpl.class);
protected FessEsClient fessEsClient;
public int maxDocumentCacheSize = 5;
public boolean clickCountEnabled = true;
public boolean favoriteCountEnabled = true;
protected volatile AtomicLong documentSize = new AtomicLong(0);
protected volatile long executeTime = 0;
@ -56,6 +48,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
public synchronized boolean store(final Map<String, Object> dataMap) {
final long startTime = System.currentTimeMillis();
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
if (logger.isDebugEnabled()) {
logger.debug("Adding " + dataMap);
@ -73,11 +66,11 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
final String url = dataMap.get(fessConfig.getIndexFieldUrl()).toString();
if (clickCountEnabled) {
if (fessConfig.getIndexerClickCountEnabledAsBoolean()) {
addClickCountField(dataMap, url, fessConfig.getIndexFieldClickCount());
}
if (favoriteCountEnabled) {
if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) {
addFavoriteCountField(dataMap, url, fessConfig.getIndexFieldFavoriteCount());
}
@ -91,14 +84,11 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + ".");
}
if (docList.size() >= maxDocumentCacheSize) {
if (docList.size() >= fessConfig.getIndexerDataMaxDocumentCacheSizeAsInteger().intValue()) {
indexingHelper.sendDocuments(fessEsClient, docList);
}
documentSize.getAndIncrement();
if (!docList.isEmpty()) {
indexingHelper.sendDocuments(fessEsClient, docList);
}
if (logger.isDebugEnabled()) {
logger.debug("The number of an added document is " + documentSize.get() + ".");
}
@ -111,6 +101,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
public void commit() {
if (!docList.isEmpty()) {
final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
indexingHelper.sendDocuments(fessEsClient, docList);
}
}
@ -143,14 +134,4 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
return executeTime;
}
@Override
public FessEsClient getsClient() {
return fessEsClient;
}
@Override
public void setEsClient(final FessEsClient fessEsClient) {
this.fessEsClient = fessEsClient;
}
}

View file

@ -30,7 +30,6 @@ import java.util.Map;
import javax.annotation.Resource;
import org.codelibs.core.CoreLibConstants;
import org.codelibs.core.beans.util.BeanUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.misc.DynamicProperties;
import org.codelibs.fess.Constants;
@ -276,14 +275,11 @@ public class Crawler implements Serializable {
final String toStrs = (String) crawlerProperties.get(Constants.NOTIFICATION_TO_PROPERTY);
if (StringUtil.isNotBlank(toStrs)) {
final String[] toAddresses = toStrs.split(",");
final Map<String, Object> dataMap = new HashMap<String, Object>();
final Map<String, String> dataMap = new HashMap<>();
for (final Map.Entry<String, String> entry : infoMap.entrySet()) {
dataMap.put(StringUtil.decapitalize(entry.getKey()), entry.getValue());
}
if (Constants.T.equals(infoMap.get(Constants.CRAWLER_STATUS))) {
dataMap.put("success", true);
}
try {
dataMap.put("hostname", InetAddress.getLocalHost().getHostAddress());
} catch (final UnknownHostException e) {
@ -298,11 +294,40 @@ public class Crawler implements Serializable {
StreamUtil.of(toAddresses).forEach(address -> {
postcard.addTo(address);
});
BeanUtil.copyMapToBean(dataMap, postcard);
postcard.setCommitEndTime(getValueOrEmpty(dataMap, "commitEndTime"));
postcard.setCommitExecTime(getValueOrEmpty(dataMap, "commitExecTime"));
postcard.setCommitStartTime(getValueOrEmpty(dataMap, "commitStartTime"));
postcard.setCrawlerEndTime(getValueOrEmpty(dataMap, "crawlerEndTime"));
postcard.setCrawlerExecTime(getValueOrEmpty(dataMap, "crawlerExecTime"));
postcard.setCrawlerStartTime(getValueOrEmpty(dataMap, "crawlerStartTime"));
postcard.setDataCrawlEndTime(getValueOrEmpty(dataMap, "dataCrawlEndTime"));
postcard.setDataCrawlExecTime(getValueOrEmpty(dataMap, "dataCrawlExecTime"));
postcard.setDataCrawlStartTime(getValueOrEmpty(dataMap, "dataCrawlStartTime"));
postcard.setDataFsIndexSize(getValueOrEmpty(dataMap, "dataFsIndexSize"));
postcard.setDataIndexExecTime(getValueOrEmpty(dataMap, "dataIndexExecTime"));
postcard.setHostname(getValueOrEmpty(dataMap, "hostname"));
postcard.setWebFsCrawlEndTime(getValueOrEmpty(dataMap, "webFsCrawlEndTime"));
postcard.setWebFsCrawlExecTime(getValueOrEmpty(dataMap, "webFsCrawlExecTime"));
postcard.setWebFsCrawlStartTime(getValueOrEmpty(dataMap, "webFsCrawlStartTime"));
postcard.setWebFsIndexExecTime(getValueOrEmpty(dataMap, "webFsIndexExecTime"));
postcard.setWebFsIndexSize(getValueOrEmpty(dataMap, "webFsIndexSize"));
if (Constants.T.equals(infoMap.get(Constants.CRAWLER_STATUS))) {
postcard.setStatus(Constants.OK);
} else {
postcard.setStatus(Constants.FAIL);
}
});
}
}
private String getValueOrEmpty(Map<String, String> dataMap, String key) {
String value = dataMap.get(key);
if (value == null) {
return StringUtil.EMPTY;
}
return value;
}
public int doCrawl(final Options options) {
if (logger.isInfoEnabled()) {
logger.info("Starting Crawler..");

View file

@ -28,6 +28,7 @@ import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.misc.DynamicProperties;
import org.codelibs.fess.Constants;
import org.codelibs.fess.app.service.DataConfigService;
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.ds.DataStore;
import org.codelibs.fess.ds.DataStoreFactory;
import org.codelibs.fess.ds.IndexUpdateCallback;
@ -235,6 +236,8 @@ public class DataIndexHelper implements Serializable {
dataStore.store(dataConfig, indexUpdateCallback, initParamMap);
} catch (final Exception e) {
logger.error("Failed to process a data crawling: " + dataConfig.getName(), e);
ComponentUtil.getComponent(FailureUrlService.class).store(dataConfig, e.getClass().getCanonicalName(),
dataConfig.getConfigId() + ":" + dataConfig.getName(), e);
} finally {
indexUpdateCallback.commit();
deleteOldDocs();

View file

@ -83,14 +83,8 @@ public class IndexUpdater extends Thread {
@Resource
protected IndexingHelper indexingHelper;
public int maxDocumentCacheSize = 5;
public int maxInvalidDocumentSize = 100;
protected boolean finishCrawling = false;
public long updateInterval = 60000; // 1 min
protected long executeTime;
protected long documentSize;
@ -103,16 +97,6 @@ public class IndexUpdater extends Thread {
protected List<String> finishedSessionIdList = new ArrayList<>();
public long commitMarginTime = 10000; // 10ms
public int maxEmptyListCount = 60; // 1hour
public boolean threadDump = false;
public boolean clickCountEnabled = true;
public boolean favoriteCountEnabled = true;
private final List<DocBoostMatcher> docBoostMatcherList = new ArrayList<>();
private final Map<String, Object> docValueMap = new HashMap<>();
@ -160,6 +144,9 @@ public class IndexUpdater extends Thread {
executeTime = 0;
documentSize = 0;
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue();
final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListConuntAsInteger().intValue();
final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
try {
final Consumer<SearchRequestBuilder> cb =
@ -172,10 +159,8 @@ public class IndexUpdater extends Thread {
org.codelibs.fess.crawler.Constants.OK_STATUS));
builder.setQuery(queryBuilder);
builder.setFrom(0);
if (maxDocumentCacheSize <= 0) {
maxDocumentCacheSize = 1;
}
builder.setSize(maxDocumentCacheSize);
final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize);
builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC);
};
@ -269,7 +254,7 @@ public class IndexUpdater extends Thread {
// terminate crawling
finishCrawling = true;
forceStop();
if (threadDump) {
if (fessConfig.getIndexerThreadDumpEnabledAsBoolean()) {
printThreadDump();
}
@ -304,6 +289,8 @@ public class IndexUpdater extends Thread {
private void processAccessResults(final List<Map<String, Object>> docList, final List<EsAccessResult> accessResultList,
final List<EsAccessResult> arList) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
for (final EsAccessResult accessResult : arList) {
if (logger.isDebugEnabled()) {
logger.debug("Indexing " + accessResult.getUrl());
@ -319,9 +306,9 @@ public class IndexUpdater extends Thread {
continue;
}
final AccessResultData accessResultData = accessResult.getAccessResultData();
final AccessResultData<?> accessResultData = accessResult.getAccessResultData();
if (accessResultData != null) {
accessResult.setAccessResultData((AccessResultData) null);
accessResult.setAccessResultData(null);
try {
final Transformer transformer = SingletonLaContainer.getComponent(accessResultData.getTransformerName());
if (transformer == null) {
@ -373,11 +360,13 @@ public class IndexUpdater extends Thread {
}
protected void updateDocument(final Map<String, Object> map) {
if (clickCountEnabled) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
if (fessConfig.getIndexerClickCountEnabledAsBoolean()) {
addClickCountField(map);
}
if (favoriteCountEnabled) {
if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) {
addFavoriteCountField(map);
}
@ -402,7 +391,6 @@ public class IndexUpdater extends Thread {
addBoostValue(map, documentBoost);
}
final FessConfig fessConfig = ComponentUtil.getFessConfig();
if (!map.containsKey(fessConfig.getIndexFieldDocId())) {
map.put(fessConfig.getIndexFieldDocId(), systemHelper.generateDocId(map));
}
@ -460,7 +448,9 @@ public class IndexUpdater extends Thread {
final long execTime = System.currentTimeMillis();
final List<EsAccessResult> arList = ((EsDataService) dataService).getAccessResultList(cb);
if (!arList.isEmpty()) {
for (final AccessResult ar : arList.toArray(new AccessResult[arList.size()])) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue();
for (final AccessResult<?> ar : arList.toArray(new AccessResult[arList.size()])) {
if (ar.getCreateTime().longValue() > execTime - commitMarginTime) {
arList.remove(ar);
}

View file

@ -135,6 +135,30 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. text/html */
String CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES = "crawler.document.cache.html.mimetypes";
/** The key of the configuration. e.g. true */
String INDEXER_THREAD_DUMP_ENABLED = "indexer.thread.dump.enabled";
/** The key of the configuration. e.g. true */
String INDEXER_CLICK_COUNT_ENABLED = "indexer.click.count.enabled";
/** The key of the configuration. e.g. true */
String INDEXER_FAVORITE_COUNT_ENABLED = "indexer.favorite.count.enabled";
/** The key of the configuration. e.g. 10000 */
String INDEXER_WEBFS_COMMIT_MARGIN_TIME = "indexer.webfs.commit.margin.time";
/** The key of the configuration. e.g. 60 */
String INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT = "indexer.webfs.max.empty.list.conunt";
/** The key of the configuration. e.g. 60000 */
String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval";
/** The key of the configuration. e.g. 5 */
String INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE = "indexer.webfs.max.document.cache.size";
/** The key of the configuration. e.g. 5 */
String INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE = "indexer.data.max.document.cache.size";
/** The key of the configuration. e.g. favorite_count */
String INDEX_FIELD_favorite_count = "index.field.favorite_count";
@ -810,6 +834,125 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
String getCrawlerDocumentCacheHtmlMimetypes();
/**
* Get the value for the key 'indexer.thread.dump.enabled'. <br>
* The value is, e.g. true <br>
* comment: indexer
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerThreadDumpEnabled();
/**
* Is the property for the key 'indexer.thread.dump.enabled' true? <br>
* The value is, e.g. true <br>
* comment: indexer
* @return The determination, true or false. (if not found, exception but basically no way)
*/
boolean isIndexerThreadDumpEnabled();
/**
* Get the value for the key 'indexer.click.count.enabled'. <br>
* The value is, e.g. true <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerClickCountEnabled();
/**
* Is the property for the key 'indexer.click.count.enabled' true? <br>
* The value is, e.g. true <br>
* @return The determination, true or false. (if not found, exception but basically no way)
*/
boolean isIndexerClickCountEnabled();
/**
* Get the value for the key 'indexer.favorite.count.enabled'. <br>
* The value is, e.g. true <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerFavoriteCountEnabled();
/**
* Is the property for the key 'indexer.favorite.count.enabled' true? <br>
* The value is, e.g. true <br>
* @return The determination, true or false. (if not found, exception but basically no way)
*/
boolean isIndexerFavoriteCountEnabled();
/**
* Get the value for the key 'indexer.webfs.commit.margin.time'. <br>
* The value is, e.g. 10000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerWebfsCommitMarginTime();
/**
* Get the value for the key 'indexer.webfs.commit.margin.time' as {@link Integer}. <br>
* The value is, e.g. 10000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerWebfsCommitMarginTimeAsInteger();
/**
* Get the value for the key 'indexer.webfs.max.empty.list.conunt'. <br>
* The value is, e.g. 60 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerWebfsMaxEmptyListConunt();
/**
* Get the value for the key 'indexer.webfs.max.empty.list.conunt' as {@link Integer}. <br>
* The value is, e.g. 60 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerWebfsMaxEmptyListConuntAsInteger();
/**
* Get the value for the key 'indexer.webfs.update.interval'. <br>
* The value is, e.g. 60000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerWebfsUpdateInterval();
/**
* Get the value for the key 'indexer.webfs.update.interval' as {@link Integer}. <br>
* The value is, e.g. 60000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerWebfsUpdateIntervalAsInteger();
/**
* Get the value for the key 'indexer.webfs.max.document.cache.size'. <br>
* The value is, e.g. 5 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerWebfsMaxDocumentCacheSize();
/**
* Get the value for the key 'indexer.webfs.max.document.cache.size' as {@link Integer}. <br>
* The value is, e.g. 5 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger();
/**
* Get the value for the key 'indexer.data.max.document.cache.size'. <br>
* The value is, e.g. 5 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getIndexerDataMaxDocumentCacheSize();
/**
* Get the value for the key 'indexer.data.max.document.cache.size' as {@link Integer}. <br>
* The value is, e.g. 5 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getIndexerDataMaxDocumentCacheSizeAsInteger();
/**
* Get the value for the key 'index.field.favorite_count'. <br>
* The value is, e.g. favorite_count <br>
@ -1998,6 +2141,70 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return get(FessConfig.CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES);
}
public String getIndexerThreadDumpEnabled() {
return get(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
}
public boolean isIndexerThreadDumpEnabled() {
return is(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
}
public String getIndexerClickCountEnabled() {
return get(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
}
public boolean isIndexerClickCountEnabled() {
return is(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
}
public String getIndexerFavoriteCountEnabled() {
return get(FessConfig.INDEXER_FAVORITE_COUNT_ENABLED);
}
public boolean isIndexerFavoriteCountEnabled() {
return is(FessConfig.INDEXER_FAVORITE_COUNT_ENABLED);
}
public String getIndexerWebfsCommitMarginTime() {
return get(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME);
}
public Integer getIndexerWebfsCommitMarginTimeAsInteger() {
return getAsInteger(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME);
}
public String getIndexerWebfsMaxEmptyListConunt() {
return get(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT);
}
public Integer getIndexerWebfsMaxEmptyListConuntAsInteger() {
return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT);
}
public String getIndexerWebfsUpdateInterval() {
return get(FessConfig.INDEXER_WEBFS_UPDATE_INTERVAL);
}
public Integer getIndexerWebfsUpdateIntervalAsInteger() {
return getAsInteger(FessConfig.INDEXER_WEBFS_UPDATE_INTERVAL);
}
public String getIndexerWebfsMaxDocumentCacheSize() {
return get(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE);
}
public Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger() {
return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE);
}
public String getIndexerDataMaxDocumentCacheSize() {
return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE);
}
public Integer getIndexerDataMaxDocumentCacheSizeAsInteger() {
return getAsInteger(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE);
}
public String getIndexFieldFavoriteCount() {
return get(FessConfig.INDEX_FIELD_favorite_count);
}

View file

@ -106,4 +106,22 @@ public interface FessProp {
return StreamUtil.of(mimetypes).anyMatch(s -> s.equalsIgnoreCase(mimetype));
}
String getIndexerClickCountEnabled();
public default boolean getIndexerClickCountEnabledAsBoolean() {
return Constants.TRUE.equalsIgnoreCase(getIndexerClickCountEnabled());
}
String getIndexerFavoriteCountEnabled();
public default boolean getIndexerFavoriteCountEnabledAsBoolean() {
return Constants.TRUE.equalsIgnoreCase(getIndexerFavoriteCountEnabled());
}
String getIndexerThreadDumpEnabled();
public default boolean getIndexerThreadDumpEnabledAsBoolean() {
return Constants.TRUE.equalsIgnoreCase(getIndexerThreadDumpEnabled());
}
}

View file

@ -52,7 +52,7 @@ public class CrawlerPostcard extends LaTypicalPostcard {
protected String[] getPropertyNames() {
return new String[] { "hostname", "webFsCrawlStartTime", "webFsCrawlEndTime", "webFsCrawlExecTime", "webFsIndexExecTime",
"webFsIndexSize", "dataCrawlStartTime", "dataCrawlEndTime", "dataCrawlExecTime", "dataIndexExecTime", "dataFsIndexSize",
"commitStartTime", "commitEndTime", "commitExecTime", "crawlerStartTime", "crawlerEndTime", "crawlerExecTime" };
"commitStartTime", "commitEndTime", "commitExecTime", "crawlerStartTime", "crawlerEndTime", "crawlerExecTime", "status" };
}
// ===================================================================================
@ -252,4 +252,13 @@ public class CrawlerPostcard extends LaTypicalPostcard {
public void setCrawlerExecTime(String crawlerExecTime) {
registerVariable("crawlerExecTime", crawlerExecTime);
}
/**
* Set the value of status, used in parameter comment. <br>
* Even if empty string, treated as empty plainly. So "IF pmb != null" is false if empty.
* @param status The parameter value of status. (NotNull)
*/
public void setStatus(String status) {
registerVariable("status", status);
}
}

View file

@ -82,6 +82,16 @@ crawler.document.cache.supported.mimetypes=text/html
#,text/plain,application/xml,application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-powerpoint,application/vnd.openxmlformats-officedocument.presentationml.presentation
crawler.document.cache.html.mimetypes=text/html
# indexer
indexer.thread.dump.enabled=true
indexer.click.count.enabled=true
indexer.favorite.count.enabled=true
indexer.webfs.commit.margin.time=10000
indexer.webfs.max.empty.list.conunt=60
indexer.webfs.update.interval=60000
indexer.webfs.max.document.cache.size=5
indexer.data.max.document.cache.size=5
# field names
index.field.favorite_count=favorite_count
index.field.click_count=click_count

View file

@ -28,9 +28,6 @@
</component>
<component name="indexUpdateCallback" class="org.codelibs.fess.ds.impl.IndexUpdateCallbackImpl" instance="prototype">
<!--
<property name="maxDocumentCacheSize">10</property>
-->
</component>
</components>

View file

@ -7,32 +7,32 @@ subject: [FESS] Crawler completed: /*pmb.hostname*/
--- Server Info ---
Host Name: /*IF pmb.hostname != null*//*pmb.hostname*//*END*//*IF pmb.hostname == null*/Unknown/*END*/
/*IF pmb.webFsIndexSize != null*/
--- Web/FileSystem Crawler ---
Start Time: /*pmb.webFsCrawlStartTime*/
End Time: /*pmb.webFsCrawlEndTime*/
Exec Time: /*pmb.webFsCrawlExecTime*/ms
--- Web/FileSystem Crawler ---/*IF pmb.webFsCrawlStartTime != null*/
Start Time: /*pmb.webFsCrawlStartTime*//*END*//*IF pmb.webFsCrawlEndTime != null*/
End Time: /*pmb.webFsCrawlEndTime*//*END*//*IF pmb.webFsCrawlExecTime != null*/
Exec Time: /*pmb.webFsCrawlExecTime*/ms/*END*/
--- Web/FileSystem Indexer ---
Exec Time: /*pmb.webFsIndexExecTime*/
Num of Doc: /*pmb.webFsIndexSize*/ docs
--- Web/FileSystem Indexer ---/*IF pmb.webFsIndexExecTime != null*/
Exec Time: /*pmb.webFsIndexExecTime*//*END*//*IF pmb.webFsIndexSize != null*/
Num of Doc: /*pmb.webFsIndexSize*/ docs/*END*/
/*END*//*IF pmb.dataFsIndexSize != null*/
--- Data Store Crawler ---
Start Time: /*pmb.dataCrawlStartTime*/
End Time: /*pmb.dataCrawlEndTime*/
Exec Time: /*pmb.dataCrawlExecTime*/ms
--- Data Store Crawler ---/*IF pmb.dataCrawlStartTime != null*/
Start Time: /*pmb.dataCrawlStartTime*//*END*//*IF pmb.dataCrawlEndTime != null*/
End Time: /*pmb.dataCrawlEndTime*//*END*//*IF pmb.dataCrawlExecTime != null*/
Exec Time: /*pmb.dataCrawlExecTime*/ms/*END*/
--- Data Store Indexer ---
Exec Time: /*pmb.dataIndexExecTime*/
Num of Doc: /*pmb.dataFsIndexSize*/ docs
--- Data Store Indexer ---/*IF pmb.dataIndexExecTime != null*/
Exec Time: /*pmb.dataIndexExecTime*//*END*//*IF pmb.dataFsIndexSize != null*/
Num of Doc: /*pmb.dataFsIndexSize*/ docs/*END*/
/*END*//*IF pmb.commitExecTime != null*/
--- Indexer(Commit) ---
Start Time: /*pmb.commitStartTime*/
End Time: /*pmb.commitEndTime*/
Exec Time: /*pmb.commitExecTime*/ms
--- Indexer(Commit) ---/*IF pmb.commitStartTime != null*/
Start Time: /*pmb.commitStartTime*//*END*//*IF pmb.commitEndTime != null*/
End Time: /*pmb.commitEndTime*//*END*//*IF pmb.commitExecTime != null*/
Exec Time: /*pmb.commitExecTime*/ms/*END*/
/*END*/
--- Total ---
Start Time: /*pmb.crawlerStartTime*/
End Time: /*pmb.crawlerEndTime*/
Exec Time: /*pmb.crawlerExecTime*/ms
Status: /*IF pmb.success != null*/Success/*END*//*IF pmb.success == null*/Fail/*END*/
--- Total ---/*IF pmb.crawlerStartTime != null*/
Start Time: /*pmb.crawlerStartTime*//*END*//*IF pmb.crawlerEndTime != null*/
End Time: /*pmb.crawlerEndTime*//*END*//*IF pmb.crawlerExecTime != null*/
Exec Time: /*pmb.crawlerExecTime*/ms/*END*/
Status: /*pmb.status*/