Shinsuke Sugaya пре 5 година
родитељ
комит
a569803668

+ 56 - 0
src/main/java/org/codelibs/fess/crawler/processor/FessResponseProcessor.java

@@ -0,0 +1,56 @@
+/*
+ * Copyright 2012-2020 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.crawler.processor;
+
+import javax.annotation.PostConstruct;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.codelibs.fess.crawler.entity.AccessResult;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.entity.ResultData;
+import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
+import org.codelibs.fess.ingest.IngestFactory;
+import org.codelibs.fess.ingest.Ingester;
+import org.codelibs.fess.util.ComponentUtil;
+
+public class FessResponseProcessor extends DefaultResponseProcessor {
+    private static final Logger logger = LogManager.getLogger(FessResponseProcessor.class);
+
+    private IngestFactory ingestFactory;
+
+    @PostConstruct
+    public void init() {
+        ingestFactory = ComponentUtil.getIngestFactory();
+    }
+
+    @Override
+    protected AccessResult<?> createAccessResult(final ResponseData responseData, final ResultData resultData) {
+        return super.createAccessResult(responseData, ingest(responseData, resultData));
+    }
+
+    private ResultData ingest(final ResponseData responseData, final ResultData resultData) {
+        ResultData target = resultData;
+        for (final Ingester ingester : ingestFactory.getIngesters()) {
+            try {
+                target = ingester.process(target, responseData);
+            } catch (Exception e) {
+                logger.warn("Failed to process Ingest[{}]", ingester.getClass().getSimpleName(), e);
+            }
+        }
+        return target;
+    }
+}

+ 18 - 1
src/main/java/org/codelibs/fess/ds/callback/IndexUpdateCallbackImpl.java

@@ -31,6 +31,8 @@ import org.codelibs.fess.helper.CrawlingInfoHelper;
 import org.codelibs.fess.helper.IndexingHelper;
 import org.codelibs.fess.helper.SearchLogHelper;
 import org.codelibs.fess.helper.SystemHelper;
+import org.codelibs.fess.ingest.IngestFactory;
+import org.codelibs.fess.ingest.Ingester;
 import org.codelibs.fess.mylasta.direction.FessConfig;
 import org.codelibs.fess.util.ComponentUtil;
 import org.codelibs.fess.util.DocList;
@@ -50,6 +52,8 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
 
     protected int maxDocumentCacheSize;
 
+    private IngestFactory ingestFactory;
+
     @PostConstruct
     public void init() {
         if (logger.isDebugEnabled()) {
@@ -57,6 +61,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
         }
         maxDocumentRequestSize = Long.parseLong(ComponentUtil.getFessConfig().getIndexerDataMaxDocumentRequestSize());
         maxDocumentCacheSize = ComponentUtil.getFessConfig().getIndexerDataMaxDocumentCacheSizeAsInteger();
+        ingestFactory = ComponentUtil.getIngestFactory();
     }
 
     /* (non-Javadoc)
@@ -111,7 +116,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
         ComponentUtil.getLanguageHelper().updateDocument(dataMap);
 
         synchronized (docList) {
-            docList.add(dataMap);
+            docList.add(ingest(paramMap, dataMap));
             final long contentSize = indexingHelper.calculateDocumentSize(dataMap);
             docList.addContentSize(contentSize);
             final long processingTime = System.currentTimeMillis() - startTime;
@@ -135,6 +140,18 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
 
     }
 
+    protected Map<String, Object> ingest(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+        Map<String, Object> target = dataMap;
+        for (final Ingester ingester : ingestFactory.getIngesters()) {
+            try {
+                target = ingester.process(target, paramMap);
+            } catch (Exception e) {
+                logger.warn("Failed to process Ingest[{}]", ingester.getClass().getSimpleName(), e);
+            }
+        }
+        return target;
+    }
+
     @Override
     public void commit() {
         synchronized (docList) {

+ 3 - 1
src/main/java/org/codelibs/fess/helper/PluginHelper.java

@@ -363,7 +363,7 @@ public class PluginHelper {
     }
 
     public enum ArtifactType {
-        DATA_STORE("fess-ds"), THEME("fess-theme"), UNKNOWN("jar");
+        DATA_STORE("fess-ds"), THEME("fess-theme"), INGEST("fess-ingest"), UNKNOWN("jar");
 
         private final String id;
 
@@ -380,6 +380,8 @@ public class PluginHelper {
                 return DATA_STORE;
             } else if (name.startsWith(THEME.getId())) {
                 return THEME;
+            } else if (name.startsWith(INGEST.getId())) {
+                return INGEST;
             }
             return UNKNOWN;
         }

+ 41 - 0
src/main/java/org/codelibs/fess/ingest/IngestFactory.java

@@ -0,0 +1,41 @@
+/*
+ * Copyright 2012-2020 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.ingest;
+
+import java.util.Arrays;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class IngestFactory {
+    private static final Logger logger = LogManager.getLogger(IngestFactory.class);
+
+    private Ingester[] ingesters = new Ingester[0];
+
+    public synchronized void add(final Ingester ingester) {
+        if (logger.isDebugEnabled()) {
+            logger.debug("Loaded {}", ingester.getClass().getSimpleName());
+        }
+        final Ingester[] newIngesters = Arrays.copyOf(ingesters, ingesters.length + 1);
+        newIngesters[ingesters.length] = ingester;
+        Arrays.sort(newIngesters, (o1, o2) -> o1.priority - o2.priority);
+        ingesters = newIngesters;
+    }
+
+    public Ingester[] getIngesters() {
+        return ingesters;
+    }
+}

+ 51 - 0
src/main/java/org/codelibs/fess/ingest/Ingester.java

@@ -0,0 +1,51 @@
+/*
+ * Copyright 2012-2020 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.ingest;
+
+import java.util.Map;
+
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.entity.ResultData;
+import org.codelibs.fess.crawler.transformer.Transformer;
+import org.codelibs.fess.util.ComponentUtil;
+
+public abstract class Ingester {
+
+    protected int priority = 99;
+
+    public int getPriority() {
+        return priority;
+    }
+
+    public void setPriority(final int priority) {
+        this.priority = priority;
+    }
+
+    public void register() {
+        ComponentUtil.getIngestFactory().add(this);
+    }
+
+    // datastore
+    public Map<String, Object> process(final Map<String, Object> target, final Map<String, String> params) {
+        return target;
+    }
+
+    // web/file
+    public ResultData process(final ResultData target, final ResponseData responseData) {
+        return target;
+    }
+
+}

+ 7 - 0
src/main/java/org/codelibs/fess/util/ComponentUtil.java

@@ -72,6 +72,7 @@ import org.codelibs.fess.helper.UserInfoHelper;
 import org.codelibs.fess.helper.ViewHelper;
 import org.codelibs.fess.helper.VirtualHostHelper;
 import org.codelibs.fess.indexer.IndexUpdater;
+import org.codelibs.fess.ingest.IngestFactory;
 import org.codelibs.fess.job.JobExecutor;
 import org.codelibs.fess.ldap.LdapManager;
 import org.codelibs.fess.mylasta.direction.FessConfig;
@@ -94,6 +95,8 @@ public final class ComponentUtil {
 
     private static Map<String, Object> componentMap = new HashMap<>();
 
+    private static final String INGEST_FACTORY = "ingestFactory";
+
     private static final String NOTIFICATION_HELPER = "notificationHelper";
 
     private static final String SEARCH_HELPER = "searchHelper";
@@ -474,6 +477,10 @@ public final class ComponentUtil {
         return getComponent(NOTIFICATION_HELPER);
     }
 
+    public static IngestFactory getIngestFactory() {
+        return getComponent(INGEST_FACTORY);
+    }
+
     public static <T> T getComponent(final Class<T> clazz) {
         try {
             return SingletonLaContainer.getComponent(clazz);

+ 4 - 4
src/main/resources/crawler/rule.xml

@@ -38,7 +38,7 @@
 	<component name="webHtmlRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
 		<property name="ruleId">"webHtmlRule"</property>
 		<property name="responseProcessor">
-			<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
+			<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
 				<property name="transformer">fessXpathTransformer</property>
 				<property name="successfulHttpCodes">(int[])[200]</property>
 				<property name="notModifiedHttpCodes">(int[])[304]</property>
@@ -59,7 +59,7 @@
 	<component name="webFileRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
 		<property name="ruleId">"webFileRule"</property>
 		<property name="responseProcessor">
-			<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
+			<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
 				<property name="transformer">fessFileTransformer</property>
 				<property name="successfulHttpCodes">(int[])[200]</property>
 				<property name="notModifiedHttpCodes">(int[])[304]</property>
@@ -88,7 +88,7 @@
 	<component name="fsFileRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
 		<property name="ruleId">"fsFileRule"</property>
 		<property name="responseProcessor">
-			<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
+			<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
 				<property name="transformer">fessFileTransformer</property>
 				<property name="successfulHttpCodes">(int[])[200]</property>
 				<property name="notModifiedHttpCodes">(int[])[304]</property>
@@ -122,7 +122,7 @@
 	<component name="defaultRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
 		<property name="ruleId">"defaultRule"</property>
 		<property name="responseProcessor">
-			<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
+			<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
 				<property name="transformer">fessStandardTransformer</property>
 				<property name="successfulHttpCodes">(int[])[200]</property>
 				<property name="notModifiedHttpCodes">(int[])[304]</property>

+ 7 - 0
src/main/resources/fess_ingest.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
+	"http://dbflute.org/meta/lastadi10.dtd">
+<components>
+	<component name="ingestFactory" class="org.codelibs.fess.ingest.IngestFactory">
+	</component>
+</components>

+ 1 - 0
src/main/webapp/WEB-INF/env/crawler/resources/app.xml

@@ -8,6 +8,7 @@
 
 	<include path="crawler_es.xml" />
 	<include path="fess_thumbnail.xml" />
+	<include path="fess_ingest.xml" />
 
 	<component name="labelTypeHelper" class="org.codelibs.fess.helper.LabelTypeHelper">
 	</component>

+ 49 - 0
src/test/java/org/codelibs/fess/ingest/IngestFactoryTest.java

@@ -0,0 +1,49 @@
+/*
+ * Copyright 2012-2020 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.ingest;
+
+import org.codelibs.fess.unit.UnitFessTestCase;
+
+public class IngestFactoryTest extends UnitFessTestCase {
+
+    public void test_add_1() {
+        IngestFactory factory = new IngestFactory();
+        factory.add(new TestIngester(1));
+        factory.add(new TestIngester(2));
+        factory.add(new TestIngester(3));
+        Ingester[] ingesters = factory.getIngesters();
+        assertEquals(1, ingesters[0].getPriority());
+        assertEquals(2, ingesters[1].getPriority());
+        assertEquals(3, ingesters[2].getPriority());
+    }
+
+    public void test_add_2() {
+        IngestFactory factory = new IngestFactory();
+        factory.add(new TestIngester(3));
+        factory.add(new TestIngester(2));
+        factory.add(new TestIngester(1));
+        Ingester[] ingesters = factory.getIngesters();
+        assertEquals(1, ingesters[0].getPriority());
+        assertEquals(2, ingesters[1].getPriority());
+        assertEquals(3, ingesters[2].getPriority());
+    }
+
+    private static class TestIngester extends Ingester {
+        public TestIngester(int priority) {
+            this.priority = priority;
+        }
+    }
+}