modify data store crawling
This commit is contained in:
parent
0d3a299e8a
commit
5adaf030a3
14 changed files with 339 additions and 105 deletions
|
@ -17,16 +17,10 @@ package org.codelibs.fess.ds;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
import org.codelibs.fess.es.client.FessEsClient;
|
||||
|
||||
public interface IndexUpdateCallback {
|
||||
|
||||
boolean store(Map<String, Object> dataMap);
|
||||
|
||||
void setEsClient(FessEsClient fessEsClient);
|
||||
|
||||
FessEsClient getsClient();
|
||||
|
||||
long getDocumentSize();
|
||||
|
||||
long getExecuteTime();
|
||||
|
|
|
@ -121,7 +121,7 @@ public abstract class AbstractDataStoreImpl implements DataStore {
|
|||
}
|
||||
return value;
|
||||
} catch (final Exception e) {
|
||||
logger.warn("Invalid value format: " + template, e);
|
||||
logger.warn("Invalid value format: " + template + " => " + paramMap, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -192,12 +192,16 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
|
|||
resultMap.putAll(paramMap);
|
||||
resultMap.put("csvfile", csvFile.getAbsolutePath());
|
||||
resultMap.put("csvfilename", csvFile.getName());
|
||||
boolean foundValues = false;
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
String key = null;
|
||||
String value = list.get(i);
|
||||
if (value == null) {
|
||||
value = StringUtil.EMPTY;
|
||||
}
|
||||
if (StringUtil.isNotBlank(value)) {
|
||||
foundValues = true;
|
||||
}
|
||||
if (headerList != null && headerList.size() > i) {
|
||||
key = headerList.get(i);
|
||||
if (StringUtil.isNotBlank(key)) {
|
||||
|
@ -207,6 +211,10 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
|
|||
key = CELL_PREFIX + Integer.toString(i + 1);
|
||||
resultMap.put(key, value);
|
||||
}
|
||||
if (!foundValues) {
|
||||
logger.debug("No data in line: {}", resultMap);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
for (final Map.Entry<String, String> entry : resultMap.entrySet()) {
|
||||
|
@ -230,6 +238,8 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
|
|||
try {
|
||||
loop = callback.store(dataMap);
|
||||
} catch (final CrawlingAccessException e) {
|
||||
logger.warn("Crawling Access Exception at : " + dataMap, e);
|
||||
|
||||
Throwable target = e;
|
||||
if (target instanceof MultipleCrawlingAccessException) {
|
||||
final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses();
|
||||
|
@ -251,12 +261,9 @@ public class CsvDataStoreImpl extends AbstractDataStoreImpl {
|
|||
url = ((DataStoreCrawlingException) target).getUrl();
|
||||
} else {
|
||||
url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
|
||||
|
||||
}
|
||||
final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class);
|
||||
failureUrlService.store(dataConfig, errorName, url, target);
|
||||
|
||||
logger.warn("Crawling Access Exception at : " + dataMap, e);
|
||||
} catch (final Exception e) {
|
||||
final String url = csvFile.getAbsolutePath() + ":" + csvReader.getLineNumber();
|
||||
final FailureUrlService failureUrlService = SingletonLaContainer.getComponent(FailureUrlService.class);
|
||||
|
|
|
@ -239,9 +239,10 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
|
|||
deleteIdList.add(crawlingInfoHelper.generateId(dataMap));
|
||||
|
||||
if (deleteIdList.size() >= maxDeleteDocumentCacheSize) {
|
||||
final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
|
||||
final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
|
||||
for (final String id : deleteIdList) {
|
||||
indexingHelper.deleteDocument(indexUpdateCallback.getsClient(), id);
|
||||
indexingHelper.deleteDocument(fessEsClient, id);
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Deleted " + deleteIdList);
|
||||
|
@ -256,9 +257,10 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
|
|||
@Override
|
||||
public void commit() {
|
||||
if (!deleteIdList.isEmpty()) {
|
||||
final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
|
||||
final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
|
||||
for (final String id : deleteIdList) {
|
||||
indexingHelper.deleteDocument(indexUpdateCallback.getsClient(), id);
|
||||
indexingHelper.deleteDocument(fessEsClient, id);
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Deleted " + deleteIdList);
|
||||
|
@ -267,11 +269,6 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
|
|||
indexUpdateCallback.commit();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setEsClient(final FessEsClient fessEsClient) {
|
||||
indexUpdateCallback.setEsClient(fessEsClient);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDocumentSize() {
|
||||
return indexUpdateCallback.getDocumentSize();
|
||||
|
@ -282,9 +279,5 @@ public class FileListDataStoreImpl extends CsvDataStoreImpl {
|
|||
return indexUpdateCallback.getExecuteTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FessEsClient getsClient() {
|
||||
return indexUpdateCallback.getsClient();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,14 +35,6 @@ import org.slf4j.LoggerFactory;
|
|||
public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
||||
private static final Logger logger = LoggerFactory.getLogger(IndexUpdateCallbackImpl.class);
|
||||
|
||||
protected FessEsClient fessEsClient;
|
||||
|
||||
public int maxDocumentCacheSize = 5;
|
||||
|
||||
public boolean clickCountEnabled = true;
|
||||
|
||||
public boolean favoriteCountEnabled = true;
|
||||
|
||||
protected volatile AtomicLong documentSize = new AtomicLong(0);
|
||||
|
||||
protected volatile long executeTime = 0;
|
||||
|
@ -56,6 +48,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
public synchronized boolean store(final Map<String, Object> dataMap) {
|
||||
final long startTime = System.currentTimeMillis();
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Adding " + dataMap);
|
||||
|
@ -73,11 +66,11 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
|
||||
final String url = dataMap.get(fessConfig.getIndexFieldUrl()).toString();
|
||||
|
||||
if (clickCountEnabled) {
|
||||
if (fessConfig.getIndexerClickCountEnabledAsBoolean()) {
|
||||
addClickCountField(dataMap, url, fessConfig.getIndexFieldClickCount());
|
||||
}
|
||||
|
||||
if (favoriteCountEnabled) {
|
||||
if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) {
|
||||
addFavoriteCountField(dataMap, url, fessConfig.getIndexFieldFavoriteCount());
|
||||
}
|
||||
|
||||
|
@ -91,14 +84,11 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
logger.debug("Added the document. " + "The number of a document cache is " + docList.size() + ".");
|
||||
}
|
||||
|
||||
if (docList.size() >= maxDocumentCacheSize) {
|
||||
if (docList.size() >= fessConfig.getIndexerDataMaxDocumentCacheSizeAsInteger().intValue()) {
|
||||
indexingHelper.sendDocuments(fessEsClient, docList);
|
||||
}
|
||||
documentSize.getAndIncrement();
|
||||
|
||||
if (!docList.isEmpty()) {
|
||||
indexingHelper.sendDocuments(fessEsClient, docList);
|
||||
}
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("The number of an added document is " + documentSize.get() + ".");
|
||||
}
|
||||
|
@ -111,6 +101,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
public void commit() {
|
||||
if (!docList.isEmpty()) {
|
||||
final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
|
||||
final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
|
||||
indexingHelper.sendDocuments(fessEsClient, docList);
|
||||
}
|
||||
}
|
||||
|
@ -143,14 +134,4 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
return executeTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FessEsClient getsClient() {
|
||||
return fessEsClient;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setEsClient(final FessEsClient fessEsClient) {
|
||||
this.fessEsClient = fessEsClient;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ import java.util.Map;
|
|||
import javax.annotation.Resource;
|
||||
|
||||
import org.codelibs.core.CoreLibConstants;
|
||||
import org.codelibs.core.beans.util.BeanUtil;
|
||||
import org.codelibs.core.lang.StringUtil;
|
||||
import org.codelibs.core.misc.DynamicProperties;
|
||||
import org.codelibs.fess.Constants;
|
||||
|
@ -276,14 +275,11 @@ public class Crawler implements Serializable {
|
|||
final String toStrs = (String) crawlerProperties.get(Constants.NOTIFICATION_TO_PROPERTY);
|
||||
if (StringUtil.isNotBlank(toStrs)) {
|
||||
final String[] toAddresses = toStrs.split(",");
|
||||
final Map<String, Object> dataMap = new HashMap<String, Object>();
|
||||
final Map<String, String> dataMap = new HashMap<>();
|
||||
for (final Map.Entry<String, String> entry : infoMap.entrySet()) {
|
||||
dataMap.put(StringUtil.decapitalize(entry.getKey()), entry.getValue());
|
||||
}
|
||||
|
||||
if (Constants.T.equals(infoMap.get(Constants.CRAWLER_STATUS))) {
|
||||
dataMap.put("success", true);
|
||||
}
|
||||
try {
|
||||
dataMap.put("hostname", InetAddress.getLocalHost().getHostAddress());
|
||||
} catch (final UnknownHostException e) {
|
||||
|
@ -298,11 +294,40 @@ public class Crawler implements Serializable {
|
|||
StreamUtil.of(toAddresses).forEach(address -> {
|
||||
postcard.addTo(address);
|
||||
});
|
||||
BeanUtil.copyMapToBean(dataMap, postcard);
|
||||
postcard.setCommitEndTime(getValueOrEmpty(dataMap, "commitEndTime"));
|
||||
postcard.setCommitExecTime(getValueOrEmpty(dataMap, "commitExecTime"));
|
||||
postcard.setCommitStartTime(getValueOrEmpty(dataMap, "commitStartTime"));
|
||||
postcard.setCrawlerEndTime(getValueOrEmpty(dataMap, "crawlerEndTime"));
|
||||
postcard.setCrawlerExecTime(getValueOrEmpty(dataMap, "crawlerExecTime"));
|
||||
postcard.setCrawlerStartTime(getValueOrEmpty(dataMap, "crawlerStartTime"));
|
||||
postcard.setDataCrawlEndTime(getValueOrEmpty(dataMap, "dataCrawlEndTime"));
|
||||
postcard.setDataCrawlExecTime(getValueOrEmpty(dataMap, "dataCrawlExecTime"));
|
||||
postcard.setDataCrawlStartTime(getValueOrEmpty(dataMap, "dataCrawlStartTime"));
|
||||
postcard.setDataFsIndexSize(getValueOrEmpty(dataMap, "dataFsIndexSize"));
|
||||
postcard.setDataIndexExecTime(getValueOrEmpty(dataMap, "dataIndexExecTime"));
|
||||
postcard.setHostname(getValueOrEmpty(dataMap, "hostname"));
|
||||
postcard.setWebFsCrawlEndTime(getValueOrEmpty(dataMap, "webFsCrawlEndTime"));
|
||||
postcard.setWebFsCrawlExecTime(getValueOrEmpty(dataMap, "webFsCrawlExecTime"));
|
||||
postcard.setWebFsCrawlStartTime(getValueOrEmpty(dataMap, "webFsCrawlStartTime"));
|
||||
postcard.setWebFsIndexExecTime(getValueOrEmpty(dataMap, "webFsIndexExecTime"));
|
||||
postcard.setWebFsIndexSize(getValueOrEmpty(dataMap, "webFsIndexSize"));
|
||||
if (Constants.T.equals(infoMap.get(Constants.CRAWLER_STATUS))) {
|
||||
postcard.setStatus(Constants.OK);
|
||||
} else {
|
||||
postcard.setStatus(Constants.FAIL);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private String getValueOrEmpty(Map<String, String> dataMap, String key) {
|
||||
String value = dataMap.get(key);
|
||||
if (value == null) {
|
||||
return StringUtil.EMPTY;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public int doCrawl(final Options options) {
|
||||
if (logger.isInfoEnabled()) {
|
||||
logger.info("Starting Crawler..");
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.codelibs.core.lang.StringUtil;
|
|||
import org.codelibs.core.misc.DynamicProperties;
|
||||
import org.codelibs.fess.Constants;
|
||||
import org.codelibs.fess.app.service.DataConfigService;
|
||||
import org.codelibs.fess.app.service.FailureUrlService;
|
||||
import org.codelibs.fess.ds.DataStore;
|
||||
import org.codelibs.fess.ds.DataStoreFactory;
|
||||
import org.codelibs.fess.ds.IndexUpdateCallback;
|
||||
|
@ -235,6 +236,8 @@ public class DataIndexHelper implements Serializable {
|
|||
dataStore.store(dataConfig, indexUpdateCallback, initParamMap);
|
||||
} catch (final Exception e) {
|
||||
logger.error("Failed to process a data crawling: " + dataConfig.getName(), e);
|
||||
ComponentUtil.getComponent(FailureUrlService.class).store(dataConfig, e.getClass().getCanonicalName(),
|
||||
dataConfig.getConfigId() + ":" + dataConfig.getName(), e);
|
||||
} finally {
|
||||
indexUpdateCallback.commit();
|
||||
deleteOldDocs();
|
||||
|
|
|
@ -83,14 +83,8 @@ public class IndexUpdater extends Thread {
|
|||
@Resource
|
||||
protected IndexingHelper indexingHelper;
|
||||
|
||||
public int maxDocumentCacheSize = 5;
|
||||
|
||||
public int maxInvalidDocumentSize = 100;
|
||||
|
||||
protected boolean finishCrawling = false;
|
||||
|
||||
public long updateInterval = 60000; // 1 min
|
||||
|
||||
protected long executeTime;
|
||||
|
||||
protected long documentSize;
|
||||
|
@ -103,16 +97,6 @@ public class IndexUpdater extends Thread {
|
|||
|
||||
protected List<String> finishedSessionIdList = new ArrayList<>();
|
||||
|
||||
public long commitMarginTime = 10000; // 10ms
|
||||
|
||||
public int maxEmptyListCount = 60; // 1hour
|
||||
|
||||
public boolean threadDump = false;
|
||||
|
||||
public boolean clickCountEnabled = true;
|
||||
|
||||
public boolean favoriteCountEnabled = true;
|
||||
|
||||
private final List<DocBoostMatcher> docBoostMatcherList = new ArrayList<>();
|
||||
|
||||
private final Map<String, Object> docValueMap = new HashMap<>();
|
||||
|
@ -160,6 +144,9 @@ public class IndexUpdater extends Thread {
|
|||
executeTime = 0;
|
||||
documentSize = 0;
|
||||
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue();
|
||||
final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListConuntAsInteger().intValue();
|
||||
final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
|
||||
try {
|
||||
final Consumer<SearchRequestBuilder> cb =
|
||||
|
@ -172,10 +159,8 @@ public class IndexUpdater extends Thread {
|
|||
org.codelibs.fess.crawler.Constants.OK_STATUS));
|
||||
builder.setQuery(queryBuilder);
|
||||
builder.setFrom(0);
|
||||
if (maxDocumentCacheSize <= 0) {
|
||||
maxDocumentCacheSize = 1;
|
||||
}
|
||||
builder.setSize(maxDocumentCacheSize);
|
||||
final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
|
||||
builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize);
|
||||
builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC);
|
||||
};
|
||||
|
||||
|
@ -269,7 +254,7 @@ public class IndexUpdater extends Thread {
|
|||
// terminate crawling
|
||||
finishCrawling = true;
|
||||
forceStop();
|
||||
if (threadDump) {
|
||||
if (fessConfig.getIndexerThreadDumpEnabledAsBoolean()) {
|
||||
printThreadDump();
|
||||
}
|
||||
|
||||
|
@ -304,6 +289,8 @@ public class IndexUpdater extends Thread {
|
|||
|
||||
private void processAccessResults(final List<Map<String, Object>> docList, final List<EsAccessResult> accessResultList,
|
||||
final List<EsAccessResult> arList) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
|
||||
for (final EsAccessResult accessResult : arList) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Indexing " + accessResult.getUrl());
|
||||
|
@ -319,9 +306,9 @@ public class IndexUpdater extends Thread {
|
|||
continue;
|
||||
}
|
||||
|
||||
final AccessResultData accessResultData = accessResult.getAccessResultData();
|
||||
final AccessResultData<?> accessResultData = accessResult.getAccessResultData();
|
||||
if (accessResultData != null) {
|
||||
accessResult.setAccessResultData((AccessResultData) null);
|
||||
accessResult.setAccessResultData(null);
|
||||
try {
|
||||
final Transformer transformer = SingletonLaContainer.getComponent(accessResultData.getTransformerName());
|
||||
if (transformer == null) {
|
||||
|
@ -373,11 +360,13 @@ public class IndexUpdater extends Thread {
|
|||
}
|
||||
|
||||
protected void updateDocument(final Map<String, Object> map) {
|
||||
if (clickCountEnabled) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
|
||||
if (fessConfig.getIndexerClickCountEnabledAsBoolean()) {
|
||||
addClickCountField(map);
|
||||
}
|
||||
|
||||
if (favoriteCountEnabled) {
|
||||
if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) {
|
||||
addFavoriteCountField(map);
|
||||
}
|
||||
|
||||
|
@ -402,7 +391,6 @@ public class IndexUpdater extends Thread {
|
|||
addBoostValue(map, documentBoost);
|
||||
}
|
||||
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
if (!map.containsKey(fessConfig.getIndexFieldDocId())) {
|
||||
map.put(fessConfig.getIndexFieldDocId(), systemHelper.generateDocId(map));
|
||||
}
|
||||
|
@ -460,7 +448,9 @@ public class IndexUpdater extends Thread {
|
|||
final long execTime = System.currentTimeMillis();
|
||||
final List<EsAccessResult> arList = ((EsDataService) dataService).getAccessResultList(cb);
|
||||
if (!arList.isEmpty()) {
|
||||
for (final AccessResult ar : arList.toArray(new AccessResult[arList.size()])) {
|
||||
final FessConfig fessConfig = ComponentUtil.getFessConfig();
|
||||
final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue();
|
||||
for (final AccessResult<?> ar : arList.toArray(new AccessResult[arList.size()])) {
|
||||
if (ar.getCreateTime().longValue() > execTime - commitMarginTime) {
|
||||
arList.remove(ar);
|
||||
}
|
||||
|
|
|
@ -135,6 +135,30 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
/** The key of the configuration. e.g. text/html */
|
||||
String CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES = "crawler.document.cache.html.mimetypes";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String INDEXER_THREAD_DUMP_ENABLED = "indexer.thread.dump.enabled";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String INDEXER_CLICK_COUNT_ENABLED = "indexer.click.count.enabled";
|
||||
|
||||
/** The key of the configuration. e.g. true */
|
||||
String INDEXER_FAVORITE_COUNT_ENABLED = "indexer.favorite.count.enabled";
|
||||
|
||||
/** The key of the configuration. e.g. 10000 */
|
||||
String INDEXER_WEBFS_COMMIT_MARGIN_TIME = "indexer.webfs.commit.margin.time";
|
||||
|
||||
/** The key of the configuration. e.g. 60 */
|
||||
String INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT = "indexer.webfs.max.empty.list.conunt";
|
||||
|
||||
/** The key of the configuration. e.g. 60000 */
|
||||
String INDEXER_WEBFS_UPDATE_INTERVAL = "indexer.webfs.update.interval";
|
||||
|
||||
/** The key of the configuration. e.g. 5 */
|
||||
String INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE = "indexer.webfs.max.document.cache.size";
|
||||
|
||||
/** The key of the configuration. e.g. 5 */
|
||||
String INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE = "indexer.data.max.document.cache.size";
|
||||
|
||||
/** The key of the configuration. e.g. favorite_count */
|
||||
String INDEX_FIELD_favorite_count = "index.field.favorite_count";
|
||||
|
||||
|
@ -810,6 +834,125 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
*/
|
||||
String getCrawlerDocumentCacheHtmlMimetypes();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.thread.dump.enabled'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
* comment: indexer
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerThreadDumpEnabled();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'indexer.thread.dump.enabled' true? <br>
|
||||
* The value is, e.g. true <br>
|
||||
* comment: indexer
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isIndexerThreadDumpEnabled();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.click.count.enabled'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerClickCountEnabled();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'indexer.click.count.enabled' true? <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isIndexerClickCountEnabled();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.favorite.count.enabled'. <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerFavoriteCountEnabled();
|
||||
|
||||
/**
|
||||
* Is the property for the key 'indexer.favorite.count.enabled' true? <br>
|
||||
* The value is, e.g. true <br>
|
||||
* @return The determination, true or false. (if not found, exception but basically no way)
|
||||
*/
|
||||
boolean isIndexerFavoriteCountEnabled();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.commit.margin.time'. <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerWebfsCommitMarginTime();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.commit.margin.time' as {@link Integer}. <br>
|
||||
* The value is, e.g. 10000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerWebfsCommitMarginTimeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.max.empty.list.conunt'. <br>
|
||||
* The value is, e.g. 60 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerWebfsMaxEmptyListConunt();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.max.empty.list.conunt' as {@link Integer}. <br>
|
||||
* The value is, e.g. 60 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerWebfsMaxEmptyListConuntAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.update.interval'. <br>
|
||||
* The value is, e.g. 60000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerWebfsUpdateInterval();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.update.interval' as {@link Integer}. <br>
|
||||
* The value is, e.g. 60000 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerWebfsUpdateIntervalAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.max.document.cache.size'. <br>
|
||||
* The value is, e.g. 5 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerWebfsMaxDocumentCacheSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.webfs.max.document.cache.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 5 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.data.max.document.cache.size'. <br>
|
||||
* The value is, e.g. 5 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
*/
|
||||
String getIndexerDataMaxDocumentCacheSize();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'indexer.data.max.document.cache.size' as {@link Integer}. <br>
|
||||
* The value is, e.g. 5 <br>
|
||||
* @return The value of found property. (NotNull: if not found, exception but basically no way)
|
||||
* @throws NumberFormatException When the property is not integer.
|
||||
*/
|
||||
Integer getIndexerDataMaxDocumentCacheSizeAsInteger();
|
||||
|
||||
/**
|
||||
* Get the value for the key 'index.field.favorite_count'. <br>
|
||||
* The value is, e.g. favorite_count <br>
|
||||
|
@ -1998,6 +2141,70 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
|
|||
return get(FessConfig.CRAWLER_DOCUMENT_CACHE_HTML_MIMETYPES);
|
||||
}
|
||||
|
||||
public String getIndexerThreadDumpEnabled() {
|
||||
return get(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
|
||||
}
|
||||
|
||||
public boolean isIndexerThreadDumpEnabled() {
|
||||
return is(FessConfig.INDEXER_THREAD_DUMP_ENABLED);
|
||||
}
|
||||
|
||||
public String getIndexerClickCountEnabled() {
|
||||
return get(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
|
||||
}
|
||||
|
||||
public boolean isIndexerClickCountEnabled() {
|
||||
return is(FessConfig.INDEXER_CLICK_COUNT_ENABLED);
|
||||
}
|
||||
|
||||
public String getIndexerFavoriteCountEnabled() {
|
||||
return get(FessConfig.INDEXER_FAVORITE_COUNT_ENABLED);
|
||||
}
|
||||
|
||||
public boolean isIndexerFavoriteCountEnabled() {
|
||||
return is(FessConfig.INDEXER_FAVORITE_COUNT_ENABLED);
|
||||
}
|
||||
|
||||
public String getIndexerWebfsCommitMarginTime() {
|
||||
return get(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME);
|
||||
}
|
||||
|
||||
public Integer getIndexerWebfsCommitMarginTimeAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_WEBFS_COMMIT_MARGIN_TIME);
|
||||
}
|
||||
|
||||
public String getIndexerWebfsMaxEmptyListConunt() {
|
||||
return get(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT);
|
||||
}
|
||||
|
||||
public Integer getIndexerWebfsMaxEmptyListConuntAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_EMPTY_LIST_CONUNT);
|
||||
}
|
||||
|
||||
public String getIndexerWebfsUpdateInterval() {
|
||||
return get(FessConfig.INDEXER_WEBFS_UPDATE_INTERVAL);
|
||||
}
|
||||
|
||||
public Integer getIndexerWebfsUpdateIntervalAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_WEBFS_UPDATE_INTERVAL);
|
||||
}
|
||||
|
||||
public String getIndexerWebfsMaxDocumentCacheSize() {
|
||||
return get(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE);
|
||||
}
|
||||
|
||||
public Integer getIndexerWebfsMaxDocumentCacheSizeAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_WEBFS_MAX_DOCUMENT_CACHE_SIZE);
|
||||
}
|
||||
|
||||
public String getIndexerDataMaxDocumentCacheSize() {
|
||||
return get(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE);
|
||||
}
|
||||
|
||||
public Integer getIndexerDataMaxDocumentCacheSizeAsInteger() {
|
||||
return getAsInteger(FessConfig.INDEXER_DATA_MAX_DOCUMENT_CACHE_SIZE);
|
||||
}
|
||||
|
||||
public String getIndexFieldFavoriteCount() {
|
||||
return get(FessConfig.INDEX_FIELD_favorite_count);
|
||||
}
|
||||
|
|
|
@ -106,4 +106,22 @@ public interface FessProp {
|
|||
return StreamUtil.of(mimetypes).anyMatch(s -> s.equalsIgnoreCase(mimetype));
|
||||
}
|
||||
|
||||
String getIndexerClickCountEnabled();
|
||||
|
||||
public default boolean getIndexerClickCountEnabledAsBoolean() {
|
||||
return Constants.TRUE.equalsIgnoreCase(getIndexerClickCountEnabled());
|
||||
}
|
||||
|
||||
String getIndexerFavoriteCountEnabled();
|
||||
|
||||
public default boolean getIndexerFavoriteCountEnabledAsBoolean() {
|
||||
return Constants.TRUE.equalsIgnoreCase(getIndexerFavoriteCountEnabled());
|
||||
}
|
||||
|
||||
String getIndexerThreadDumpEnabled();
|
||||
|
||||
public default boolean getIndexerThreadDumpEnabledAsBoolean() {
|
||||
return Constants.TRUE.equalsIgnoreCase(getIndexerThreadDumpEnabled());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@ public class CrawlerPostcard extends LaTypicalPostcard {
|
|||
protected String[] getPropertyNames() {
|
||||
return new String[] { "hostname", "webFsCrawlStartTime", "webFsCrawlEndTime", "webFsCrawlExecTime", "webFsIndexExecTime",
|
||||
"webFsIndexSize", "dataCrawlStartTime", "dataCrawlEndTime", "dataCrawlExecTime", "dataIndexExecTime", "dataFsIndexSize",
|
||||
"commitStartTime", "commitEndTime", "commitExecTime", "crawlerStartTime", "crawlerEndTime", "crawlerExecTime" };
|
||||
"commitStartTime", "commitEndTime", "commitExecTime", "crawlerStartTime", "crawlerEndTime", "crawlerExecTime", "status" };
|
||||
}
|
||||
|
||||
// ===================================================================================
|
||||
|
@ -252,4 +252,13 @@ public class CrawlerPostcard extends LaTypicalPostcard {
|
|||
public void setCrawlerExecTime(String crawlerExecTime) {
|
||||
registerVariable("crawlerExecTime", crawlerExecTime);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value of status, used in parameter comment. <br>
|
||||
* Even if empty string, treated as empty plainly. So "IF pmb != null" is false if empty.
|
||||
* @param status The parameter value of status. (NotNull)
|
||||
*/
|
||||
public void setStatus(String status) {
|
||||
registerVariable("status", status);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -82,6 +82,16 @@ crawler.document.cache.supported.mimetypes=text/html
|
|||
#,text/plain,application/xml,application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-powerpoint,application/vnd.openxmlformats-officedocument.presentationml.presentation
|
||||
crawler.document.cache.html.mimetypes=text/html
|
||||
|
||||
# indexer
|
||||
indexer.thread.dump.enabled=true
|
||||
indexer.click.count.enabled=true
|
||||
indexer.favorite.count.enabled=true
|
||||
indexer.webfs.commit.margin.time=10000
|
||||
indexer.webfs.max.empty.list.conunt=60
|
||||
indexer.webfs.update.interval=60000
|
||||
indexer.webfs.max.document.cache.size=5
|
||||
indexer.data.max.document.cache.size=5
|
||||
|
||||
# field names
|
||||
index.field.favorite_count=favorite_count
|
||||
index.field.click_count=click_count
|
||||
|
|
|
@ -28,9 +28,6 @@
|
|||
</component>
|
||||
|
||||
<component name="indexUpdateCallback" class="org.codelibs.fess.ds.impl.IndexUpdateCallbackImpl" instance="prototype">
|
||||
<!--
|
||||
<property name="maxDocumentCacheSize">10</property>
|
||||
-->
|
||||
</component>
|
||||
|
||||
</components>
|
||||
|
|
|
@ -7,32 +7,32 @@ subject: [FESS] Crawler completed: /*pmb.hostname*/
|
|||
--- Server Info ---
|
||||
Host Name: /*IF pmb.hostname != null*//*pmb.hostname*//*END*//*IF pmb.hostname == null*/Unknown/*END*/
|
||||
/*IF pmb.webFsIndexSize != null*/
|
||||
--- Web/FileSystem Crawler ---
|
||||
Start Time: /*pmb.webFsCrawlStartTime*/
|
||||
End Time: /*pmb.webFsCrawlEndTime*/
|
||||
Exec Time: /*pmb.webFsCrawlExecTime*/ms
|
||||
--- Web/FileSystem Crawler ---/*IF pmb.webFsCrawlStartTime != null*/
|
||||
Start Time: /*pmb.webFsCrawlStartTime*//*END*//*IF pmb.webFsCrawlEndTime != null*/
|
||||
End Time: /*pmb.webFsCrawlEndTime*//*END*//*IF pmb.webFsCrawlExecTime != null*/
|
||||
Exec Time: /*pmb.webFsCrawlExecTime*/ms/*END*/
|
||||
|
||||
--- Web/FileSystem Indexer ---
|
||||
Exec Time: /*pmb.webFsIndexExecTime*/
|
||||
Num of Doc: /*pmb.webFsIndexSize*/ docs
|
||||
--- Web/FileSystem Indexer ---/*IF pmb.webFsIndexExecTime != null*/
|
||||
Exec Time: /*pmb.webFsIndexExecTime*//*END*//*IF pmb.webFsIndexSize != null*/
|
||||
Num of Doc: /*pmb.webFsIndexSize*/ docs/*END*/
|
||||
/*END*//*IF pmb.dataFsIndexSize != null*/
|
||||
--- Data Store Crawler ---
|
||||
Start Time: /*pmb.dataCrawlStartTime*/
|
||||
End Time: /*pmb.dataCrawlEndTime*/
|
||||
Exec Time: /*pmb.dataCrawlExecTime*/ms
|
||||
--- Data Store Crawler ---/*IF pmb.dataCrawlStartTime != null*/
|
||||
Start Time: /*pmb.dataCrawlStartTime*//*END*//*IF pmb.dataCrawlEndTime != null*/
|
||||
End Time: /*pmb.dataCrawlEndTime*//*END*//*IF pmb.dataCrawlExecTime != null*/
|
||||
Exec Time: /*pmb.dataCrawlExecTime*/ms/*END*/
|
||||
|
||||
--- Data Store Indexer ---
|
||||
Exec Time: /*pmb.dataIndexExecTime*/
|
||||
Num of Doc: /*pmb.dataFsIndexSize*/ docs
|
||||
--- Data Store Indexer ---/*IF pmb.dataIndexExecTime != null*/
|
||||
Exec Time: /*pmb.dataIndexExecTime*//*END*//*IF pmb.dataFsIndexSize != null*/
|
||||
Num of Doc: /*pmb.dataFsIndexSize*/ docs/*END*/
|
||||
/*END*//*IF pmb.commitExecTime != null*/
|
||||
--- Indexer(Commit) ---
|
||||
Start Time: /*pmb.commitStartTime*/
|
||||
End Time: /*pmb.commitEndTime*/
|
||||
Exec Time: /*pmb.commitExecTime*/ms
|
||||
--- Indexer(Commit) ---/*IF pmb.commitStartTime != null*/
|
||||
Start Time: /*pmb.commitStartTime*//*END*//*IF pmb.commitEndTime != null*/
|
||||
End Time: /*pmb.commitEndTime*//*END*//*IF pmb.commitExecTime != null*/
|
||||
Exec Time: /*pmb.commitExecTime*/ms/*END*/
|
||||
/*END*/
|
||||
--- Total ---
|
||||
Start Time: /*pmb.crawlerStartTime*/
|
||||
End Time: /*pmb.crawlerEndTime*/
|
||||
Exec Time: /*pmb.crawlerExecTime*/ms
|
||||
Status: /*IF pmb.success != null*/Success/*END*//*IF pmb.success == null*/Fail/*END*/
|
||||
--- Total ---/*IF pmb.crawlerStartTime != null*/
|
||||
Start Time: /*pmb.crawlerStartTime*//*END*//*IF pmb.crawlerEndTime != null*/
|
||||
End Time: /*pmb.crawlerEndTime*//*END*//*IF pmb.crawlerExecTime != null*/
|
||||
Exec Time: /*pmb.crawlerExecTime*/ms/*END*/
|
||||
Status: /*pmb.status*/
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue