fix #2641 add DataStoreParams

This commit is contained in:
Shinsuke Sugaya 2022-04-11 21:37:37 +09:00
parent 38b2c9577b
commit 3f5b914b7a
11 changed files with 140 additions and 43 deletions

View file

@ -45,27 +45,27 @@
</unzip>
<!-- fess-ds-csv -->
<antcall target="install.plugin.jar">
<param name="repo.url" value="${maven.release.repo.url}" />
<param name="repo.url" value="${maven.snapshot.repo.url}" />
<param name="jar.groupId" value="org/codelibs/fess" />
<param name="jar.artifactId" value="fess-ds-csv" />
<param name="jar.version" value="14.0.0" />
<param name="file.version" value="14.0.0" />
<param name="jar.version" value="14.1.0-SNAPSHOT" />
<param name="file.version" value="14.1.0-20220410.221610-1" />
</antcall>
<!-- fess-ds-db -->
<antcall target="install.plugin.jar">
<param name="repo.url" value="${maven.release.repo.url}" />
<param name="repo.url" value="${maven.snapshot.repo.url}" />
<param name="jar.groupId" value="org/codelibs/fess" />
<param name="jar.artifactId" value="fess-ds-db" />
<param name="jar.version" value="14.0.0" />
<param name="file.version" value="14.0.0" />
<param name="jar.version" value="14.1.0-SNAPSHOT" />
<param name="file.version" value="14.1.0-20220410.222311-1" />
</antcall>
<!-- fess-script-groovy -->
<antcall target="install.plugin.jar">
<param name="repo.url" value="${maven.release.repo.url}" />
<param name="repo.url" value="${maven.snapshot.repo.url}" />
<param name="jar.groupId" value="org/codelibs/fess" />
<param name="jar.artifactId" value="fess-script-groovy" />
<param name="jar.version" value="14.0.0" />
<param name="file.version" value="14.0.0" />
<param name="jar.version" value="14.1.0-SNAPSHOT" />
<param name="file.version" value="14.1.0-20220411.123412-1" />
</antcall>
</target>

View file

@ -458,4 +458,5 @@ public class Constants extends CoreLibConstants {
public static final String TEXT_FRAGMENT_TYPE_HIGHLIGHT = "highlight";
public static final String CRAWLER_STATS_KEY = "crawler.stats.key";
}

View file

@ -31,6 +31,7 @@ import org.codelibs.core.lang.ThreadUtil;
import org.codelibs.core.misc.Pair;
import org.codelibs.fess.Constants;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.helper.CrawlingInfoHelper;
import org.codelibs.fess.helper.SystemHelper;
@ -59,7 +60,7 @@ public abstract class AbstractDataStore implements DataStore {
}
@Override
public void store(final DataConfig config, final IndexUpdateCallback callback, final Map<String, String> initParamMap) {
public void store(final DataConfig config, final IndexUpdateCallback callback, final DataStoreParams initParamMap) {
final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
final Date documentExpires = crawlingInfoHelper.getDocumentExpires(config);
@ -76,7 +77,7 @@ public abstract class AbstractDataStore implements DataStore {
final Map<String, String> configScriptMap = config.getHandlerScriptMap();
initParamMap.putAll(configParamMap);
final Map<String, String> paramMap = initParamMap;
final DataStoreParams paramMap = initParamMap;
// default values
final Map<String, Object> defaultDataMap = new HashMap<>();
@ -91,7 +92,7 @@ public abstract class AbstractDataStore implements DataStore {
defaultDataMap.put(fessConfig.getIndexFieldExpires(), documentExpires);
}
// segment
defaultDataMap.put(fessConfig.getIndexFieldSegment(), initParamMap.get(Constants.SESSION_ID));
defaultDataMap.put(fessConfig.getIndexFieldSegment(), initParamMap.getAsString(Constants.SESSION_ID));
// created
defaultDataMap.put(fessConfig.getIndexFieldCreated(), systemHelper.getCurrentTime());
// boost
@ -118,12 +119,12 @@ public abstract class AbstractDataStore implements DataStore {
defaultDataMap.put(fessConfig.getIndexFieldVirtualHost(),
stream(config.getVirtualHosts()).get(stream -> stream.filter(StringUtil::isNotBlank).collect(Collectors.toList())));
storeData(config, callback, new ParamMap<>(paramMap), configScriptMap, defaultDataMap);
storeData(config, callback, paramMap.newInstance(), configScriptMap, defaultDataMap);
}
protected String getScriptType(final Map<String, String> paramMap) {
final String value = paramMap.get(SCRIPT_TYPE);
protected String getScriptType(final DataStoreParams paramMap) {
final String value = paramMap.getAsString(SCRIPT_TYPE);
if (StringUtil.isBlank(value)) {
return Constants.DEFAULT_SCRIPT;
}
@ -142,9 +143,9 @@ public abstract class AbstractDataStore implements DataStore {
return ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(template, paramMap);
}
protected long getReadInterval(final Map<String, String> paramMap) {
protected long getReadInterval(final DataStoreParams paramMap) {
long readInterval = 0;
final String value = paramMap.get("readInterval");
final String value = paramMap.getAsString("readInterval");
if (StringUtil.isNotBlank(value)) {
try {
readInterval = Long.parseLong(value);
@ -159,6 +160,6 @@ public abstract class AbstractDataStore implements DataStore {
ThreadUtil.sleepQuietly(interval);
}
protected abstract void storeData(DataConfig dataConfig, IndexUpdateCallback callback, Map<String, String> paramMap,
protected abstract void storeData(DataConfig dataConfig, IndexUpdateCallback callback, DataStoreParams paramMap,
Map<String, String> scriptMap, Map<String, Object> defaultDataMap);
}

View file

@ -15,14 +15,13 @@
*/
package org.codelibs.fess.ds;
import java.util.Map;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
public interface DataStore {
void store(DataConfig config, IndexUpdateCallback callback, Map<String, String> initParamMap);
void store(DataConfig config, IndexUpdateCallback callback, DataStoreParams initParamMap);
void stop();

View file

@ -45,8 +45,11 @@ import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
import org.codelibs.fess.crawler.rule.Rule;
import org.codelibs.fess.crawler.rule.RuleManager;
import org.codelibs.fess.crawler.transformer.Transformer;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.exception.DataStoreCrawlingException;
import org.codelibs.fess.helper.CrawlerStatsHelper;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsKeyObject;
import org.codelibs.fess.helper.IndexingHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
@ -89,7 +92,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
@Override
public void store(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
public void store(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
executor.execute(() -> {
final Object eventType = dataMap.remove(getParamValue(paramMap, "field.event_type", "event_type"));
if (getParamValue(paramMap, "event.create", "create").equals(eventType)
@ -105,12 +108,13 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
});
}
protected String getParamValue(final Map<String, String> paramMap, final String key, final String defaultValue) {
return paramMap.getOrDefault(key, defaultValue);
protected String getParamValue(final DataStoreParams paramMap, final String key, final String defaultValue) {
return paramMap.getAsString(key, defaultValue);
}
protected void addDocument(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
protected void addDocument(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
synchronized (indexUpdateCallback) {
// required check
if (!dataMap.containsKey(fessConfig.getIndexFieldUrl()) || dataMap.get(fessConfig.getIndexFieldUrl()) == null) {
@ -125,6 +129,8 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
return;
}
final StatsKeyObject keyObj = paramMap.get(Constants.CRAWLER_STATS_KEY) instanceof StatsKeyObject sko ? sko : null;
final long maxAccessCount = getMaxAccessCount(paramMap, dataMap);
long counter = 0;
final Deque<String> urlQueue = new LinkedList<>();
@ -138,16 +144,23 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
try {
for (int i = 0; i < maxRedirectCount; i++) {
if (keyObj != null) {
keyObj.setUrl(processingUrl);
}
crawlerStatsHelper.record(keyObj, "prepared");
processingUrl = processRequest(paramMap, localDataMap, processingUrl, client);
if (processingUrl == null) {
break;
}
counter++;
localDataMap.put(fessConfig.getIndexFieldUrl(), processingUrl);
crawlerStatsHelper.record(keyObj, "redirected");
}
} catch (final ChildUrlsException e) {
crawlerStatsHelper.record(keyObj, "child_urls");
e.getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
} catch (final DataStoreCrawlingException e) {
crawlerStatsHelper.record(keyObj, "crawling_exception");
final Throwable cause = e.getCause();
if (cause instanceof ChildUrlsException) {
((ChildUrlsException) cause).getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
@ -161,7 +174,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
}
protected long getMaxAccessCount(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
protected long getMaxAccessCount(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
final Object recursive = dataMap.remove(getParamValue(paramMap, "field.recursive", "recursive"));
if (recursive == null || Constants.FALSE.equalsIgnoreCase(recursive.toString())) {
return 1L;
@ -176,9 +189,11 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
}
protected String processRequest(final Map<String, String> paramMap, final Map<String, Object> dataMap, final String url,
protected String processRequest(final DataStoreParams paramMap, final Map<String, Object> dataMap, final String url,
final CrawlerClient client) {
final long startTime = System.currentTimeMillis();
final CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
final StatsKeyObject keyObj = paramMap.get(Constants.CRAWLER_STATS_KEY) instanceof StatsKeyObject sko ? sko : null;
try (final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build())) {
if (responseData.getRedirectLocation() != null) {
return responseData.getRedirectLocation();
@ -187,7 +202,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
if (dataMap.containsKey(Constants.SESSION_ID)) {
responseData.setSessionId((String) dataMap.get(Constants.SESSION_ID));
} else {
responseData.setSessionId(paramMap.get(Constants.CRAWLING_INFO_ID));
responseData.setSessionId((String) paramMap.get(Constants.CRAWLING_INFO_ID));
}
final RuleManager ruleManager = SingletonLaContainer.getComponent(RuleManager.class);
@ -210,17 +225,19 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
throw new CrawlerSystemException("Could not create an instance from bytes.", e);
}
}
crawlerStatsHelper.record(keyObj, "accessed");
// remove
String[] ignoreFields;
if (paramMap.containsKey("ignore.field.names")) {
ignoreFields = paramMap.get("ignore.field.names").split(",");
ignoreFields = ((String) paramMap.get("ignore.field.names")).split(",");
} else {
ignoreFields = new String[] { Constants.INDEXING_TARGET, Constants.SESSION_ID };
}
stream(ignoreFields).of(stream -> stream.map(String::trim).forEach(s -> dataMap.remove(s)));
indexUpdateCallback.store(paramMap, dataMap);
crawlerStatsHelper.record(keyObj, "processed");
} else {
logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: {}, Data: {}",
responseProcessor, dataMap);
@ -235,7 +252,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
}
}
protected boolean deleteDocument(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
protected boolean deleteDocument(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
if (logger.isDebugEnabled()) {
logger.debug("Deleting {}", dataMap);

View file

@ -17,9 +17,11 @@ package org.codelibs.fess.ds.callback;
import java.util.Map;
import org.codelibs.fess.entity.DataStoreParams;
public interface IndexUpdateCallback {
void store(Map<String, String> paramMap, Map<String, Object> dataMap);
void store(DataStoreParams paramMap, Map<String, Object> dataMap);
long getDocumentSize();

View file

@ -25,6 +25,7 @@ import javax.annotation.PostConstruct;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.exception.DataStoreException;
import org.codelibs.fess.helper.CrawlingInfoHelper;
@ -70,7 +71,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
* @see org.codelibs.fess.ds.callback.IndexUpdateCallback#store(java.util.Map)
*/
@Override
public void store(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
public void store(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
systemHelper.calibrateCpuLoad();
@ -142,7 +143,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
}
protected Map<String, Object> ingest(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
protected Map<String, Object> ingest(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
if (ingestFactory == null) {
return dataMap;
}

View file

@ -0,0 +1,75 @@
/*
* Copyright 2012-2022 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.entity;
import java.util.HashMap;
import java.util.Map;
public class DataStoreParams {
protected final Map<String, Object> params;
public DataStoreParams() {
params = new HashMap<>();
}
protected DataStoreParams(final Map<String, Object> params) {
this.params = new HashMap<>(params);
}
public void put(final String key, final Object value) {
params.put(key, value);
}
public Object get(final String key) {
return params.get(key);
}
public String getAsString(final String key) {
if (params.get(key) instanceof String strValue) {
return strValue;
}
final Object value = params.get(key);
if (value != null) {
return value.toString();
}
return null;
}
public String getAsString(final String key, final String defaultValue) {
final String value = getAsString(key);
if (value != null) {
return value;
}
return defaultValue;
}
public DataStoreParams newInstance() {
return new DataStoreParams(params);
}
public void putAll(final Map<String, String> map) {
params.putAll(map);
}
public boolean containsKey(final String key) {
return params.containsKey(key);
}
public Map<String, Object> asMap() {
return new HashMap<>(params);
}
}

View file

@ -17,9 +17,7 @@ package org.codelibs.fess.helper;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -30,6 +28,7 @@ import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.ds.DataStore;
import org.codelibs.fess.ds.DataStoreFactory;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.mylasta.direction.FessConfig;
@ -88,7 +87,7 @@ public class DataIndexHelper {
dataCrawlingThreadList.clear();
final List<String> dataCrawlingThreadStatusList = new ArrayList<>();
for (final DataConfig dataConfig : configList) {
final Map<String, String> initParamMap = new HashMap<>();
final DataStoreParams initParamMap = new DataStoreParams();
final String sid = ComponentUtil.getCrawlingConfigHelper().store(sessionId, dataConfig);
sessionIdList.add(sid);
@ -180,7 +179,7 @@ public class DataIndexHelper {
private final IndexUpdateCallback indexUpdateCallback;
private final Map<String, String> initParamMap;
private final DataStoreParams initParamMap;
protected boolean finished = false;
@ -189,7 +188,7 @@ public class DataIndexHelper {
private DataStore dataStore;
protected DataCrawlingThread(final DataConfig dataConfig, final IndexUpdateCallback indexUpdateCallback,
final Map<String, String> initParamMap) {
final DataStoreParams initParamMap) {
this.dataConfig = dataConfig;
this.indexUpdateCallback = indexUpdateCallback;
this.initParamMap = initParamMap;
@ -226,10 +225,10 @@ public class DataIndexHelper {
}
private void deleteOldDocs() {
if (Constants.FALSE.equals(initParamMap.get(DELETE_OLD_DOCS))) {
if (Constants.FALSE.equals(initParamMap.getAsString(DELETE_OLD_DOCS))) {
return;
}
final String sessionId = initParamMap.get(Constants.SESSION_ID);
final String sessionId = initParamMap.getAsString(Constants.SESSION_ID);
if (StringUtil.isBlank(sessionId)) {
logger.warn("Invalid sessionId at {}", dataConfig);
return;
@ -262,7 +261,7 @@ public class DataIndexHelper {
}
public String getCrawlingInfoId() {
return initParamMap.get(Constants.CRAWLING_INFO_ID);
return initParamMap.getAsString(Constants.CRAWLING_INFO_ID);
}
public boolean isRunning() {

View file

@ -20,6 +20,7 @@ import java.util.Map;
import org.codelibs.fess.crawler.entity.AccessResult;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.ResultData;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.util.ComponentUtil;
public abstract class Ingester {
@ -49,7 +50,7 @@ public abstract class Ingester {
}
// datastore
public Map<String, Object> process(final Map<String, Object> target, final Map<String, String> params) {
public Map<String, Object> process(final Map<String, Object> target, final DataStoreParams params) {
return process(target);
}

View file

@ -20,6 +20,7 @@ import java.util.Map;
import org.codelibs.fess.Constants;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.exception.JobProcessingException;
import org.codelibs.fess.script.AbstractScriptEngine;
@ -45,7 +46,7 @@ public class AbstractDataStoreTest extends UnitFessTestCase {
}
@Override
protected void storeData(DataConfig dataConfig, IndexUpdateCallback callback, Map<String, String> paramMap,
protected void storeData(DataConfig dataConfig, IndexUpdateCallback callback, DataStoreParams paramMap,
Map<String, String> scriptMap, Map<String, Object> defaultDataMap) {
// TODO nothing
}