diff --git a/deps.xml b/deps.xml
index b58f858ee..067dab2c9 100644
--- a/deps.xml
+++ b/deps.xml
@@ -45,27 +45,27 @@
-
+
-
-
+
+
-
+
-
-
+
+
-
+
-
-
+
+
diff --git a/src/main/java/org/codelibs/fess/Constants.java b/src/main/java/org/codelibs/fess/Constants.java
index bb482f7dc..17853a6d1 100644
--- a/src/main/java/org/codelibs/fess/Constants.java
+++ b/src/main/java/org/codelibs/fess/Constants.java
@@ -458,4 +458,5 @@ public class Constants extends CoreLibConstants {
public static final String TEXT_FRAGMENT_TYPE_HIGHLIGHT = "highlight";
+ public static final String CRAWLER_STATS_KEY = "crawler.stats.key";
}
diff --git a/src/main/java/org/codelibs/fess/ds/AbstractDataStore.java b/src/main/java/org/codelibs/fess/ds/AbstractDataStore.java
index 8b870de9b..203153912 100644
--- a/src/main/java/org/codelibs/fess/ds/AbstractDataStore.java
+++ b/src/main/java/org/codelibs/fess/ds/AbstractDataStore.java
@@ -31,6 +31,7 @@ import org.codelibs.core.lang.ThreadUtil;
import org.codelibs.core.misc.Pair;
import org.codelibs.fess.Constants;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
+import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.helper.CrawlingInfoHelper;
import org.codelibs.fess.helper.SystemHelper;
@@ -59,7 +60,7 @@ public abstract class AbstractDataStore implements DataStore {
}
@Override
- public void store(final DataConfig config, final IndexUpdateCallback callback, final Map initParamMap) {
+ public void store(final DataConfig config, final IndexUpdateCallback callback, final DataStoreParams initParamMap) {
final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
final Date documentExpires = crawlingInfoHelper.getDocumentExpires(config);
@@ -76,7 +77,7 @@ public abstract class AbstractDataStore implements DataStore {
final Map configScriptMap = config.getHandlerScriptMap();
initParamMap.putAll(configParamMap);
- final Map paramMap = initParamMap;
+ final DataStoreParams paramMap = initParamMap;
// default values
final Map defaultDataMap = new HashMap<>();
@@ -91,7 +92,7 @@ public abstract class AbstractDataStore implements DataStore {
defaultDataMap.put(fessConfig.getIndexFieldExpires(), documentExpires);
}
// segment
- defaultDataMap.put(fessConfig.getIndexFieldSegment(), initParamMap.get(Constants.SESSION_ID));
+ defaultDataMap.put(fessConfig.getIndexFieldSegment(), initParamMap.getAsString(Constants.SESSION_ID));
// created
defaultDataMap.put(fessConfig.getIndexFieldCreated(), systemHelper.getCurrentTime());
// boost
@@ -118,12 +119,12 @@ public abstract class AbstractDataStore implements DataStore {
defaultDataMap.put(fessConfig.getIndexFieldVirtualHost(),
stream(config.getVirtualHosts()).get(stream -> stream.filter(StringUtil::isNotBlank).collect(Collectors.toList())));
- storeData(config, callback, new ParamMap<>(paramMap), configScriptMap, defaultDataMap);
+ storeData(config, callback, paramMap.newInstance(), configScriptMap, defaultDataMap);
}
- protected String getScriptType(final Map paramMap) {
- final String value = paramMap.get(SCRIPT_TYPE);
+ protected String getScriptType(final DataStoreParams paramMap) {
+ final String value = paramMap.getAsString(SCRIPT_TYPE);
if (StringUtil.isBlank(value)) {
return Constants.DEFAULT_SCRIPT;
}
@@ -142,9 +143,9 @@ public abstract class AbstractDataStore implements DataStore {
return ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(template, paramMap);
}
- protected long getReadInterval(final Map paramMap) {
+ protected long getReadInterval(final DataStoreParams paramMap) {
long readInterval = 0;
- final String value = paramMap.get("readInterval");
+ final String value = paramMap.getAsString("readInterval");
if (StringUtil.isNotBlank(value)) {
try {
readInterval = Long.parseLong(value);
@@ -159,6 +160,6 @@ public abstract class AbstractDataStore implements DataStore {
ThreadUtil.sleepQuietly(interval);
}
- protected abstract void storeData(DataConfig dataConfig, IndexUpdateCallback callback, Map paramMap,
+ protected abstract void storeData(DataConfig dataConfig, IndexUpdateCallback callback, DataStoreParams paramMap,
Map scriptMap, Map defaultDataMap);
}
diff --git a/src/main/java/org/codelibs/fess/ds/DataStore.java b/src/main/java/org/codelibs/fess/ds/DataStore.java
index aea7b9c55..75449c4b9 100644
--- a/src/main/java/org/codelibs/fess/ds/DataStore.java
+++ b/src/main/java/org/codelibs/fess/ds/DataStore.java
@@ -15,14 +15,13 @@
*/
package org.codelibs.fess.ds;
-import java.util.Map;
-
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
+import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
public interface DataStore {
- void store(DataConfig config, IndexUpdateCallback callback, Map initParamMap);
+ void store(DataConfig config, IndexUpdateCallback callback, DataStoreParams initParamMap);
void stop();
diff --git a/src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java b/src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java
index f02bde53b..c3522ace4 100644
--- a/src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java
+++ b/src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java
@@ -45,8 +45,11 @@ import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
import org.codelibs.fess.crawler.rule.Rule;
import org.codelibs.fess.crawler.rule.RuleManager;
import org.codelibs.fess.crawler.transformer.Transformer;
+import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.exception.DataStoreCrawlingException;
+import org.codelibs.fess.helper.CrawlerStatsHelper;
+import org.codelibs.fess.helper.CrawlerStatsHelper.StatsKeyObject;
import org.codelibs.fess.helper.IndexingHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
@@ -89,7 +92,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
@Override
- public void store(final Map paramMap, final Map dataMap) {
+ public void store(final DataStoreParams paramMap, final Map dataMap) {
executor.execute(() -> {
final Object eventType = dataMap.remove(getParamValue(paramMap, "field.event_type", "event_type"));
if (getParamValue(paramMap, "event.create", "create").equals(eventType)
@@ -105,12 +108,13 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
});
}
- protected String getParamValue(final Map paramMap, final String key, final String defaultValue) {
- return paramMap.getOrDefault(key, defaultValue);
+ protected String getParamValue(final DataStoreParams paramMap, final String key, final String defaultValue) {
+ return paramMap.getAsString(key, defaultValue);
}
- protected void addDocument(final Map paramMap, final Map dataMap) {
+ protected void addDocument(final DataStoreParams paramMap, final Map dataMap) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
+ final CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
synchronized (indexUpdateCallback) {
// required check
if (!dataMap.containsKey(fessConfig.getIndexFieldUrl()) || dataMap.get(fessConfig.getIndexFieldUrl()) == null) {
@@ -125,6 +129,8 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
return;
}
+ final StatsKeyObject keyObj = paramMap.get(Constants.CRAWLER_STATS_KEY) instanceof StatsKeyObject sko ? sko : null;
+
final long maxAccessCount = getMaxAccessCount(paramMap, dataMap);
long counter = 0;
final Deque urlQueue = new LinkedList<>();
@@ -138,16 +144,23 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
try {
for (int i = 0; i < maxRedirectCount; i++) {
+ if (keyObj != null) {
+ keyObj.setUrl(processingUrl);
+ }
+ crawlerStatsHelper.record(keyObj, "prepared");
processingUrl = processRequest(paramMap, localDataMap, processingUrl, client);
if (processingUrl == null) {
break;
}
counter++;
localDataMap.put(fessConfig.getIndexFieldUrl(), processingUrl);
+ crawlerStatsHelper.record(keyObj, "redirected");
}
} catch (final ChildUrlsException e) {
+ crawlerStatsHelper.record(keyObj, "child_urls");
e.getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
} catch (final DataStoreCrawlingException e) {
+ crawlerStatsHelper.record(keyObj, "crawling_exception");
final Throwable cause = e.getCause();
if (cause instanceof ChildUrlsException) {
((ChildUrlsException) cause).getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
@@ -161,7 +174,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
}
- protected long getMaxAccessCount(final Map paramMap, final Map dataMap) {
+ protected long getMaxAccessCount(final DataStoreParams paramMap, final Map dataMap) {
final Object recursive = dataMap.remove(getParamValue(paramMap, "field.recursive", "recursive"));
if (recursive == null || Constants.FALSE.equalsIgnoreCase(recursive.toString())) {
return 1L;
@@ -176,9 +189,11 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
}
- protected String processRequest(final Map paramMap, final Map dataMap, final String url,
+ protected String processRequest(final DataStoreParams paramMap, final Map dataMap, final String url,
final CrawlerClient client) {
final long startTime = System.currentTimeMillis();
+ final CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
+ final StatsKeyObject keyObj = paramMap.get(Constants.CRAWLER_STATS_KEY) instanceof StatsKeyObject sko ? sko : null;
try (final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build())) {
if (responseData.getRedirectLocation() != null) {
return responseData.getRedirectLocation();
@@ -187,7 +202,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
if (dataMap.containsKey(Constants.SESSION_ID)) {
responseData.setSessionId((String) dataMap.get(Constants.SESSION_ID));
} else {
- responseData.setSessionId(paramMap.get(Constants.CRAWLING_INFO_ID));
+ responseData.setSessionId((String) paramMap.get(Constants.CRAWLING_INFO_ID));
}
final RuleManager ruleManager = SingletonLaContainer.getComponent(RuleManager.class);
@@ -210,17 +225,19 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
throw new CrawlerSystemException("Could not create an instance from bytes.", e);
}
}
+ crawlerStatsHelper.record(keyObj, "accessed");
// remove
String[] ignoreFields;
if (paramMap.containsKey("ignore.field.names")) {
- ignoreFields = paramMap.get("ignore.field.names").split(",");
+ ignoreFields = ((String) paramMap.get("ignore.field.names")).split(",");
} else {
ignoreFields = new String[] { Constants.INDEXING_TARGET, Constants.SESSION_ID };
}
stream(ignoreFields).of(stream -> stream.map(String::trim).forEach(s -> dataMap.remove(s)));
indexUpdateCallback.store(paramMap, dataMap);
+ crawlerStatsHelper.record(keyObj, "processed");
} else {
logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: {}, Data: {}",
responseProcessor, dataMap);
@@ -235,7 +252,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
}
- protected boolean deleteDocument(final Map paramMap, final Map dataMap) {
+ protected boolean deleteDocument(final DataStoreParams paramMap, final Map dataMap) {
if (logger.isDebugEnabled()) {
logger.debug("Deleting {}", dataMap);
diff --git a/src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallback.java b/src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallback.java
index 423401c91..f5272233c 100644
--- a/src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallback.java
+++ b/src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallback.java
@@ -17,9 +17,11 @@ package org.codelibs.fess.ds.callback;
import java.util.Map;
+import org.codelibs.fess.entity.DataStoreParams;
+
public interface IndexUpdateCallback {
- void store(Map paramMap, Map dataMap);
+ void store(DataStoreParams paramMap, Map dataMap);
long getDocumentSize();
diff --git a/src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallbackImpl.java b/src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallbackImpl.java
index 3f4168459..47ea3200d 100644
--- a/src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallbackImpl.java
+++ b/src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallbackImpl.java
@@ -25,6 +25,7 @@ import javax.annotation.PostConstruct;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.stream.StreamUtil;
+import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.exception.DataStoreException;
import org.codelibs.fess.helper.CrawlingInfoHelper;
@@ -70,7 +71,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
* @see org.codelibs.fess.ds.callback.IndexUpdateCallback#store(java.util.Map)
*/
@Override
- public void store(final Map paramMap, final Map dataMap) {
+ public void store(final DataStoreParams paramMap, final Map dataMap) {
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
systemHelper.calibrateCpuLoad();
@@ -142,7 +143,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
}
- protected Map ingest(final Map paramMap, final Map dataMap) {
+ protected Map ingest(final DataStoreParams paramMap, final Map dataMap) {
if (ingestFactory == null) {
return dataMap;
}
diff --git a/src/main/java/org/codelibs/fess/entity/DataStoreParams.java b/src/main/java/org/codelibs/fess/entity/DataStoreParams.java
new file mode 100644
index 000000000..be1340163
--- /dev/null
+++ b/src/main/java/org/codelibs/fess/entity/DataStoreParams.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2012-2022 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.entity;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class DataStoreParams {
+
+ protected final Map params;
+
+ public DataStoreParams() {
+ params = new HashMap<>();
+ }
+
+ protected DataStoreParams(final Map params) {
+ this.params = new HashMap<>(params);
+ }
+
+ public void put(final String key, final Object value) {
+ params.put(key, value);
+ }
+
+ public Object get(final String key) {
+ return params.get(key);
+ }
+
+ public String getAsString(final String key) {
+ if (params.get(key) instanceof String strValue) {
+ return strValue;
+ }
+ final Object value = params.get(key);
+ if (value != null) {
+ return value.toString();
+ }
+ return null;
+ }
+
+ public String getAsString(final String key, final String defaultValue) {
+ final String value = getAsString(key);
+ if (value != null) {
+ return value;
+ }
+ return defaultValue;
+ }
+
+ public DataStoreParams newInstance() {
+ return new DataStoreParams(params);
+ }
+
+ public void putAll(final Map map) {
+ params.putAll(map);
+ }
+
+ public boolean containsKey(final String key) {
+ return params.containsKey(key);
+ }
+
+ public Map asMap() {
+ return new HashMap<>(params);
+ }
+}
diff --git a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java
index e06fe682d..d5443a808 100644
--- a/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java
+++ b/src/main/java/org/codelibs/fess/helper/DataIndexHelper.java
@@ -17,9 +17,7 @@ package org.codelibs.fess.helper;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -30,6 +28,7 @@ import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.ds.DataStore;
import org.codelibs.fess.ds.DataStoreFactory;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
+import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.mylasta.direction.FessConfig;
@@ -88,7 +87,7 @@ public class DataIndexHelper {
dataCrawlingThreadList.clear();
final List dataCrawlingThreadStatusList = new ArrayList<>();
for (final DataConfig dataConfig : configList) {
- final Map initParamMap = new HashMap<>();
+ final DataStoreParams initParamMap = new DataStoreParams();
final String sid = ComponentUtil.getCrawlingConfigHelper().store(sessionId, dataConfig);
sessionIdList.add(sid);
@@ -180,7 +179,7 @@ public class DataIndexHelper {
private final IndexUpdateCallback indexUpdateCallback;
- private final Map initParamMap;
+ private final DataStoreParams initParamMap;
protected boolean finished = false;
@@ -189,7 +188,7 @@ public class DataIndexHelper {
private DataStore dataStore;
protected DataCrawlingThread(final DataConfig dataConfig, final IndexUpdateCallback indexUpdateCallback,
- final Map initParamMap) {
+ final DataStoreParams initParamMap) {
this.dataConfig = dataConfig;
this.indexUpdateCallback = indexUpdateCallback;
this.initParamMap = initParamMap;
@@ -226,10 +225,10 @@ public class DataIndexHelper {
}
private void deleteOldDocs() {
- if (Constants.FALSE.equals(initParamMap.get(DELETE_OLD_DOCS))) {
+ if (Constants.FALSE.equals(initParamMap.getAsString(DELETE_OLD_DOCS))) {
return;
}
- final String sessionId = initParamMap.get(Constants.SESSION_ID);
+ final String sessionId = initParamMap.getAsString(Constants.SESSION_ID);
if (StringUtil.isBlank(sessionId)) {
logger.warn("Invalid sessionId at {}", dataConfig);
return;
@@ -262,7 +261,7 @@ public class DataIndexHelper {
}
public String getCrawlingInfoId() {
- return initParamMap.get(Constants.CRAWLING_INFO_ID);
+ return initParamMap.getAsString(Constants.CRAWLING_INFO_ID);
}
public boolean isRunning() {
diff --git a/src/main/java/org/codelibs/fess/ingest/Ingester.java b/src/main/java/org/codelibs/fess/ingest/Ingester.java
index fb632fb02..1a0398837 100644
--- a/src/main/java/org/codelibs/fess/ingest/Ingester.java
+++ b/src/main/java/org/codelibs/fess/ingest/Ingester.java
@@ -20,6 +20,7 @@ import java.util.Map;
import org.codelibs.fess.crawler.entity.AccessResult;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.ResultData;
+import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.util.ComponentUtil;
public abstract class Ingester {
@@ -49,7 +50,7 @@ public abstract class Ingester {
}
// datastore
- public Map process(final Map target, final Map params) {
+ public Map process(final Map target, final DataStoreParams params) {
return process(target);
}
diff --git a/src/test/java/org/codelibs/fess/ds/AbstractDataStoreTest.java b/src/test/java/org/codelibs/fess/ds/AbstractDataStoreTest.java
index 2e8b6786e..339baafdc 100644
--- a/src/test/java/org/codelibs/fess/ds/AbstractDataStoreTest.java
+++ b/src/test/java/org/codelibs/fess/ds/AbstractDataStoreTest.java
@@ -20,6 +20,7 @@ import java.util.Map;
import org.codelibs.fess.Constants;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
+import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.exception.JobProcessingException;
import org.codelibs.fess.script.AbstractScriptEngine;
@@ -45,7 +46,7 @@ public class AbstractDataStoreTest extends UnitFessTestCase {
}
@Override
- protected void storeData(DataConfig dataConfig, IndexUpdateCallback callback, Map paramMap,
+ protected void storeData(DataConfig dataConfig, IndexUpdateCallback callback, DataStoreParams paramMap,
Map scriptMap, Map defaultDataMap) {
// TODO nothing
}