浏览代码

fix #2641 add DataStoreParams

Shinsuke Sugaya 3 年之前
父节点
当前提交
3f5b914b7a

+ 9 - 9
deps.xml

@@ -45,27 +45,27 @@
 		</unzip>
 		<!-- fess-ds-csv -->
 		<antcall target="install.plugin.jar">
-			<param name="repo.url" value="${maven.release.repo.url}" />
+			<param name="repo.url" value="${maven.snapshot.repo.url}" />
 			<param name="jar.groupId" value="org/codelibs/fess" />
 			<param name="jar.artifactId" value="fess-ds-csv" />
-			<param name="jar.version" value="14.0.0" />
-			<param name="file.version" value="14.0.0" />
+			<param name="jar.version" value="14.1.0-SNAPSHOT" />
+			<param name="file.version" value="14.1.0-20220410.221610-1" />
 		</antcall>
 		<!-- fess-ds-db -->
 		<antcall target="install.plugin.jar">
-			<param name="repo.url" value="${maven.release.repo.url}" />
+			<param name="repo.url" value="${maven.snapshot.repo.url}" />
 			<param name="jar.groupId" value="org/codelibs/fess" />
 			<param name="jar.artifactId" value="fess-ds-db" />
-			<param name="jar.version" value="14.0.0" />
-			<param name="file.version" value="14.0.0" />
+			<param name="jar.version" value="14.1.0-SNAPSHOT" />
+			<param name="file.version" value="14.1.0-20220410.222311-1" />
 		</antcall>
 		<!-- fess-script-groovy -->
 		<antcall target="install.plugin.jar">
-			<param name="repo.url" value="${maven.release.repo.url}" />
+			<param name="repo.url" value="${maven.snapshot.repo.url}" />
 			<param name="jar.groupId" value="org/codelibs/fess" />
 			<param name="jar.artifactId" value="fess-script-groovy" />
-			<param name="jar.version" value="14.0.0" />
-			<param name="file.version" value="14.0.0" />
+			<param name="jar.version" value="14.1.0-SNAPSHOT" />
+			<param name="file.version" value="14.1.0-20220411.123412-1" />
 		</antcall>
 	</target>
 

+ 1 - 0
src/main/java/org/codelibs/fess/Constants.java

@@ -458,4 +458,5 @@ public class Constants extends CoreLibConstants {
 
     public static final String TEXT_FRAGMENT_TYPE_HIGHLIGHT = "highlight";
 
+    public static final String CRAWLER_STATS_KEY = "crawler.stats.key";
 }

+ 10 - 9
src/main/java/org/codelibs/fess/ds/AbstractDataStore.java

@@ -31,6 +31,7 @@ import org.codelibs.core.lang.ThreadUtil;
 import org.codelibs.core.misc.Pair;
 import org.codelibs.fess.Constants;
 import org.codelibs.fess.ds.callback.IndexUpdateCallback;
+import org.codelibs.fess.entity.DataStoreParams;
 import org.codelibs.fess.es.config.exentity.DataConfig;
 import org.codelibs.fess.helper.CrawlingInfoHelper;
 import org.codelibs.fess.helper.SystemHelper;
@@ -59,7 +60,7 @@ public abstract class AbstractDataStore implements DataStore {
     }
 
     @Override
-    public void store(final DataConfig config, final IndexUpdateCallback callback, final Map<String, String> initParamMap) {
+    public void store(final DataConfig config, final IndexUpdateCallback callback, final DataStoreParams initParamMap) {
         final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
         final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
         final Date documentExpires = crawlingInfoHelper.getDocumentExpires(config);
@@ -76,7 +77,7 @@ public abstract class AbstractDataStore implements DataStore {
         final Map<String, String> configScriptMap = config.getHandlerScriptMap();
 
         initParamMap.putAll(configParamMap);
-        final Map<String, String> paramMap = initParamMap;
+        final DataStoreParams paramMap = initParamMap;
 
         // default values
         final Map<String, Object> defaultDataMap = new HashMap<>();
@@ -91,7 +92,7 @@ public abstract class AbstractDataStore implements DataStore {
             defaultDataMap.put(fessConfig.getIndexFieldExpires(), documentExpires);
         }
         // segment
-        defaultDataMap.put(fessConfig.getIndexFieldSegment(), initParamMap.get(Constants.SESSION_ID));
+        defaultDataMap.put(fessConfig.getIndexFieldSegment(), initParamMap.getAsString(Constants.SESSION_ID));
         // created
         defaultDataMap.put(fessConfig.getIndexFieldCreated(), systemHelper.getCurrentTime());
         // boost
@@ -118,12 +119,12 @@ public abstract class AbstractDataStore implements DataStore {
         defaultDataMap.put(fessConfig.getIndexFieldVirtualHost(),
                 stream(config.getVirtualHosts()).get(stream -> stream.filter(StringUtil::isNotBlank).collect(Collectors.toList())));
 
-        storeData(config, callback, new ParamMap<>(paramMap), configScriptMap, defaultDataMap);
+        storeData(config, callback, paramMap.newInstance(), configScriptMap, defaultDataMap);
 
     }
 
-    protected String getScriptType(final Map<String, String> paramMap) {
-        final String value = paramMap.get(SCRIPT_TYPE);
+    protected String getScriptType(final DataStoreParams paramMap) {
+        final String value = paramMap.getAsString(SCRIPT_TYPE);
         if (StringUtil.isBlank(value)) {
             return Constants.DEFAULT_SCRIPT;
         }
@@ -142,9 +143,9 @@ public abstract class AbstractDataStore implements DataStore {
         return ComponentUtil.getScriptEngineFactory().getScriptEngine(scriptType).evaluate(template, paramMap);
     }
 
-    protected long getReadInterval(final Map<String, String> paramMap) {
+    protected long getReadInterval(final DataStoreParams paramMap) {
         long readInterval = 0;
-        final String value = paramMap.get("readInterval");
+        final String value = paramMap.getAsString("readInterval");
         if (StringUtil.isNotBlank(value)) {
             try {
                 readInterval = Long.parseLong(value);
@@ -159,6 +160,6 @@ public abstract class AbstractDataStore implements DataStore {
         ThreadUtil.sleepQuietly(interval);
     }
 
-    protected abstract void storeData(DataConfig dataConfig, IndexUpdateCallback callback, Map<String, String> paramMap,
+    protected abstract void storeData(DataConfig dataConfig, IndexUpdateCallback callback, DataStoreParams paramMap,
             Map<String, String> scriptMap, Map<String, Object> defaultDataMap);
 }

+ 2 - 3
src/main/java/org/codelibs/fess/ds/DataStore.java

@@ -15,14 +15,13 @@
  */
 package org.codelibs.fess.ds;
 
-import java.util.Map;
-
 import org.codelibs.fess.ds.callback.IndexUpdateCallback;
+import org.codelibs.fess.entity.DataStoreParams;
 import org.codelibs.fess.es.config.exentity.DataConfig;
 
 public interface DataStore {
 
-    void store(DataConfig config, IndexUpdateCallback callback, Map<String, String> initParamMap);
+    void store(DataConfig config, IndexUpdateCallback callback, DataStoreParams initParamMap);
 
     void stop();
 

+ 26 - 9
src/main/java/org/codelibs/fess/ds/callback/FileListIndexUpdateCallbackImpl.java

@@ -45,8 +45,11 @@ import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
 import org.codelibs.fess.crawler.rule.Rule;
 import org.codelibs.fess.crawler.rule.RuleManager;
 import org.codelibs.fess.crawler.transformer.Transformer;
+import org.codelibs.fess.entity.DataStoreParams;
 import org.codelibs.fess.es.client.SearchEngineClient;
 import org.codelibs.fess.exception.DataStoreCrawlingException;
+import org.codelibs.fess.helper.CrawlerStatsHelper;
+import org.codelibs.fess.helper.CrawlerStatsHelper.StatsKeyObject;
 import org.codelibs.fess.helper.IndexingHelper;
 import org.codelibs.fess.mylasta.direction.FessConfig;
 import org.codelibs.fess.util.ComponentUtil;
@@ -89,7 +92,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
     }
 
     @Override
-    public void store(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+    public void store(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
         executor.execute(() -> {
             final Object eventType = dataMap.remove(getParamValue(paramMap, "field.event_type", "event_type"));
             if (getParamValue(paramMap, "event.create", "create").equals(eventType)
@@ -105,12 +108,13 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
         });
     }
 
-    protected String getParamValue(final Map<String, String> paramMap, final String key, final String defaultValue) {
-        return paramMap.getOrDefault(key, defaultValue);
+    protected String getParamValue(final DataStoreParams paramMap, final String key, final String defaultValue) {
+        return paramMap.getAsString(key, defaultValue);
     }
 
-    protected void addDocument(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+    protected void addDocument(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
         final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        final CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
         synchronized (indexUpdateCallback) {
             // required check
             if (!dataMap.containsKey(fessConfig.getIndexFieldUrl()) || dataMap.get(fessConfig.getIndexFieldUrl()) == null) {
@@ -125,6 +129,8 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
                 return;
             }
 
+            final StatsKeyObject keyObj = paramMap.get(Constants.CRAWLER_STATS_KEY) instanceof StatsKeyObject sko ? sko : null;
+
             final long maxAccessCount = getMaxAccessCount(paramMap, dataMap);
             long counter = 0;
             final Deque<String> urlQueue = new LinkedList<>();
@@ -138,16 +144,23 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
                 }
                 try {
                     for (int i = 0; i < maxRedirectCount; i++) {
+                        if (keyObj != null) {
+                            keyObj.setUrl(processingUrl);
+                        }
+                        crawlerStatsHelper.record(keyObj, "prepared");
                         processingUrl = processRequest(paramMap, localDataMap, processingUrl, client);
                         if (processingUrl == null) {
                             break;
                         }
                         counter++;
                         localDataMap.put(fessConfig.getIndexFieldUrl(), processingUrl);
+                        crawlerStatsHelper.record(keyObj, "redirected");
                     }
                 } catch (final ChildUrlsException e) {
+                    crawlerStatsHelper.record(keyObj, "child_urls");
                     e.getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
                 } catch (final DataStoreCrawlingException e) {
+                    crawlerStatsHelper.record(keyObj, "crawling_exception");
                     final Throwable cause = e.getCause();
                     if (cause instanceof ChildUrlsException) {
                         ((ChildUrlsException) cause).getChildUrlList().stream().map(RequestData::getUrl).forEach(urlQueue::offer);
@@ -161,7 +174,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
         }
     }
 
-    protected long getMaxAccessCount(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+    protected long getMaxAccessCount(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
         final Object recursive = dataMap.remove(getParamValue(paramMap, "field.recursive", "recursive"));
         if (recursive == null || Constants.FALSE.equalsIgnoreCase(recursive.toString())) {
             return 1L;
@@ -176,9 +189,11 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
         }
     }
 
-    protected String processRequest(final Map<String, String> paramMap, final Map<String, Object> dataMap, final String url,
+    protected String processRequest(final DataStoreParams paramMap, final Map<String, Object> dataMap, final String url,
             final CrawlerClient client) {
         final long startTime = System.currentTimeMillis();
+        final CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
+        final StatsKeyObject keyObj = paramMap.get(Constants.CRAWLER_STATS_KEY) instanceof StatsKeyObject sko ? sko : null;
         try (final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build())) {
             if (responseData.getRedirectLocation() != null) {
                 return responseData.getRedirectLocation();
@@ -187,7 +202,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
             if (dataMap.containsKey(Constants.SESSION_ID)) {
                 responseData.setSessionId((String) dataMap.get(Constants.SESSION_ID));
             } else {
-                responseData.setSessionId(paramMap.get(Constants.CRAWLING_INFO_ID));
+                responseData.setSessionId((String) paramMap.get(Constants.CRAWLING_INFO_ID));
             }
 
             final RuleManager ruleManager = SingletonLaContainer.getComponent(RuleManager.class);
@@ -210,17 +225,19 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
                             throw new CrawlerSystemException("Could not create an instance from bytes.", e);
                         }
                     }
+                    crawlerStatsHelper.record(keyObj, "accessed");
 
                     // remove
                     String[] ignoreFields;
                     if (paramMap.containsKey("ignore.field.names")) {
-                        ignoreFields = paramMap.get("ignore.field.names").split(",");
+                        ignoreFields = ((String) paramMap.get("ignore.field.names")).split(",");
                     } else {
                         ignoreFields = new String[] { Constants.INDEXING_TARGET, Constants.SESSION_ID };
                     }
                     stream(ignoreFields).of(stream -> stream.map(String::trim).forEach(s -> dataMap.remove(s)));
 
                     indexUpdateCallback.store(paramMap, dataMap);
+                    crawlerStatsHelper.record(keyObj, "processed");
                 } else {
                     logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: {}, Data: {}",
                             responseProcessor, dataMap);
@@ -235,7 +252,7 @@ public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
         }
     }
 
-    protected boolean deleteDocument(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+    protected boolean deleteDocument(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
 
         if (logger.isDebugEnabled()) {
             logger.debug("Deleting {}", dataMap);

+ 3 - 1
src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallback.java

@@ -17,9 +17,11 @@ package org.codelibs.fess.ds.callback;
 
 import java.util.Map;
 
+import org.codelibs.fess.entity.DataStoreParams;
+
 public interface IndexUpdateCallback {
 
-    void store(Map<String, String> paramMap, Map<String, Object> dataMap);
+    void store(DataStoreParams paramMap, Map<String, Object> dataMap);
 
     long getDocumentSize();
 

+ 3 - 2
src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallbackImpl.java

@@ -25,6 +25,7 @@ import javax.annotation.PostConstruct;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.codelibs.core.stream.StreamUtil;
+import org.codelibs.fess.entity.DataStoreParams;
 import org.codelibs.fess.es.client.SearchEngineClient;
 import org.codelibs.fess.exception.DataStoreException;
 import org.codelibs.fess.helper.CrawlingInfoHelper;
@@ -70,7 +71,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
      * @see org.codelibs.fess.ds.callback.IndexUpdateCallback#store(java.util.Map)
      */
     @Override
-    public void store(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+    public void store(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
         final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
         systemHelper.calibrateCpuLoad();
 
@@ -142,7 +143,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
 
     }
 
-    protected Map<String, Object> ingest(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+    protected Map<String, Object> ingest(final DataStoreParams paramMap, final Map<String, Object> dataMap) {
         if (ingestFactory == null) {
             return dataMap;
         }

+ 75 - 0
src/main/java/org/codelibs/fess/entity/DataStoreParams.java

@@ -0,0 +1,75 @@
+/*
+ * Copyright 2012-2022 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.entity;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class DataStoreParams {
+
+    protected final Map<String, Object> params;
+
+    public DataStoreParams() {
+        params = new HashMap<>();
+    }
+
+    protected DataStoreParams(final Map<String, Object> params) {
+        this.params = new HashMap<>(params);
+    }
+
+    public void put(final String key, final Object value) {
+        params.put(key, value);
+    }
+
+    public Object get(final String key) {
+        return params.get(key);
+    }
+
+    public String getAsString(final String key) {
+        if (params.get(key) instanceof String strValue) {
+            return strValue;
+        }
+        final Object value = params.get(key);
+        if (value != null) {
+            return value.toString();
+        }
+        return null;
+    }
+
+    public String getAsString(final String key, final String defaultValue) {
+        final String value = getAsString(key);
+        if (value != null) {
+            return value;
+        }
+        return defaultValue;
+    }
+
+    public DataStoreParams newInstance() {
+        return new DataStoreParams(params);
+    }
+
+    public void putAll(final Map<String, String> map) {
+        params.putAll(map);
+    }
+
+    public boolean containsKey(final String key) {
+        return params.containsKey(key);
+    }
+
+    public Map<String, Object> asMap() {
+        return new HashMap<>(params);
+    }
+}

+ 7 - 8
src/main/java/org/codelibs/fess/helper/DataIndexHelper.java

@@ -17,9 +17,7 @@ package org.codelibs.fess.helper;
 
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -30,6 +28,7 @@ import org.codelibs.fess.app.service.FailureUrlService;
 import org.codelibs.fess.ds.DataStore;
 import org.codelibs.fess.ds.DataStoreFactory;
 import org.codelibs.fess.ds.callback.IndexUpdateCallback;
+import org.codelibs.fess.entity.DataStoreParams;
 import org.codelibs.fess.es.client.SearchEngineClient;
 import org.codelibs.fess.es.config.exentity.DataConfig;
 import org.codelibs.fess.mylasta.direction.FessConfig;
@@ -88,7 +87,7 @@ public class DataIndexHelper {
         dataCrawlingThreadList.clear();
         final List<String> dataCrawlingThreadStatusList = new ArrayList<>();
         for (final DataConfig dataConfig : configList) {
-            final Map<String, String> initParamMap = new HashMap<>();
+            final DataStoreParams initParamMap = new DataStoreParams();
             final String sid = ComponentUtil.getCrawlingConfigHelper().store(sessionId, dataConfig);
             sessionIdList.add(sid);
 
@@ -180,7 +179,7 @@ public class DataIndexHelper {
 
         private final IndexUpdateCallback indexUpdateCallback;
 
-        private final Map<String, String> initParamMap;
+        private final DataStoreParams initParamMap;
 
         protected boolean finished = false;
 
@@ -189,7 +188,7 @@ public class DataIndexHelper {
         private DataStore dataStore;
 
         protected DataCrawlingThread(final DataConfig dataConfig, final IndexUpdateCallback indexUpdateCallback,
-                final Map<String, String> initParamMap) {
+                final DataStoreParams initParamMap) {
             this.dataConfig = dataConfig;
             this.indexUpdateCallback = indexUpdateCallback;
             this.initParamMap = initParamMap;
@@ -226,10 +225,10 @@ public class DataIndexHelper {
         }
 
         private void deleteOldDocs() {
-            if (Constants.FALSE.equals(initParamMap.get(DELETE_OLD_DOCS))) {
+            if (Constants.FALSE.equals(initParamMap.getAsString(DELETE_OLD_DOCS))) {
                 return;
             }
-            final String sessionId = initParamMap.get(Constants.SESSION_ID);
+            final String sessionId = initParamMap.getAsString(Constants.SESSION_ID);
             if (StringUtil.isBlank(sessionId)) {
                 logger.warn("Invalid sessionId at {}", dataConfig);
                 return;
@@ -262,7 +261,7 @@ public class DataIndexHelper {
         }
 
         public String getCrawlingInfoId() {
-            return initParamMap.get(Constants.CRAWLING_INFO_ID);
+            return initParamMap.getAsString(Constants.CRAWLING_INFO_ID);
         }
 
         public boolean isRunning() {

+ 2 - 1
src/main/java/org/codelibs/fess/ingest/Ingester.java

@@ -20,6 +20,7 @@ import java.util.Map;
 import org.codelibs.fess.crawler.entity.AccessResult;
 import org.codelibs.fess.crawler.entity.ResponseData;
 import org.codelibs.fess.crawler.entity.ResultData;
+import org.codelibs.fess.entity.DataStoreParams;
 import org.codelibs.fess.util.ComponentUtil;
 
 public abstract class Ingester {
@@ -49,7 +50,7 @@ public abstract class Ingester {
     }
 
     // datastore
-    public Map<String, Object> process(final Map<String, Object> target, final Map<String, String> params) {
+    public Map<String, Object> process(final Map<String, Object> target, final DataStoreParams params) {
         return process(target);
     }
 

+ 2 - 1
src/test/java/org/codelibs/fess/ds/AbstractDataStoreTest.java

@@ -20,6 +20,7 @@ import java.util.Map;
 
 import org.codelibs.fess.Constants;
 import org.codelibs.fess.ds.callback.IndexUpdateCallback;
+import org.codelibs.fess.entity.DataStoreParams;
 import org.codelibs.fess.es.config.exentity.DataConfig;
 import org.codelibs.fess.exception.JobProcessingException;
 import org.codelibs.fess.script.AbstractScriptEngine;
@@ -45,7 +46,7 @@ public class AbstractDataStoreTest extends UnitFessTestCase {
             }
 
             @Override
-            protected void storeData(DataConfig dataConfig, IndexUpdateCallback callback, Map<String, String> paramMap,
+            protected void storeData(DataConfig dataConfig, IndexUpdateCallback callback, DataStoreParams paramMap,
                     Map<String, String> scriptMap, Map<String, Object> defaultDataMap) {
                 // TODO nothing
             }