fix #2463 add fess-ingest
This commit is contained in:
parent
989a385819
commit
a569803668
10 changed files with 237 additions and 6 deletions
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright 2012-2020 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.crawler.processor;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.codelibs.fess.crawler.entity.AccessResult;
|
||||
import org.codelibs.fess.crawler.entity.ResponseData;
|
||||
import org.codelibs.fess.crawler.entity.ResultData;
|
||||
import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
|
||||
import org.codelibs.fess.ingest.IngestFactory;
|
||||
import org.codelibs.fess.ingest.Ingester;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
|
||||
public class FessResponseProcessor extends DefaultResponseProcessor {
|
||||
private static final Logger logger = LogManager.getLogger(FessResponseProcessor.class);
|
||||
|
||||
private IngestFactory ingestFactory;
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
ingestFactory = ComponentUtil.getIngestFactory();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AccessResult<?> createAccessResult(final ResponseData responseData, final ResultData resultData) {
|
||||
return super.createAccessResult(responseData, ingest(responseData, resultData));
|
||||
}
|
||||
|
||||
private ResultData ingest(final ResponseData responseData, final ResultData resultData) {
|
||||
ResultData target = resultData;
|
||||
for (final Ingester ingester : ingestFactory.getIngesters()) {
|
||||
try {
|
||||
target = ingester.process(target, responseData);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to process Ingest[{}]", ingester.getClass().getSimpleName(), e);
|
||||
}
|
||||
}
|
||||
return target;
|
||||
}
|
||||
}
|
|
@ -31,6 +31,8 @@ import org.codelibs.fess.helper.CrawlingInfoHelper;
|
|||
import org.codelibs.fess.helper.IndexingHelper;
|
||||
import org.codelibs.fess.helper.SearchLogHelper;
|
||||
import org.codelibs.fess.helper.SystemHelper;
|
||||
import org.codelibs.fess.ingest.IngestFactory;
|
||||
import org.codelibs.fess.ingest.Ingester;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
import org.codelibs.fess.util.DocList;
|
||||
|
@ -50,6 +52,8 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
|
||||
protected int maxDocumentCacheSize;
|
||||
|
||||
private IngestFactory ingestFactory;
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
if (logger.isDebugEnabled()) {
|
||||
|
@ -57,6 +61,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
}
|
||||
maxDocumentRequestSize = Long.parseLong(ComponentUtil.getFessConfig().getIndexerDataMaxDocumentRequestSize());
|
||||
maxDocumentCacheSize = ComponentUtil.getFessConfig().getIndexerDataMaxDocumentCacheSizeAsInteger();
|
||||
ingestFactory = ComponentUtil.getIngestFactory();
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@ -111,7 +116,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
ComponentUtil.getLanguageHelper().updateDocument(dataMap);
|
||||
|
||||
synchronized (docList) {
|
||||
docList.add(dataMap);
|
||||
docList.add(ingest(paramMap, dataMap));
|
||||
final long contentSize = indexingHelper.calculateDocumentSize(dataMap);
|
||||
docList.addContentSize(contentSize);
|
||||
final long processingTime = System.currentTimeMillis() - startTime;
|
||||
|
@ -135,6 +140,18 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
|
|||
|
||||
}
|
||||
|
||||
protected Map<String, Object> ingest(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
|
||||
Map<String, Object> target = dataMap;
|
||||
for (final Ingester ingester : ingestFactory.getIngesters()) {
|
||||
try {
|
||||
target = ingester.process(target, paramMap);
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to process Ingest[{}]", ingester.getClass().getSimpleName(), e);
|
||||
}
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit() {
|
||||
synchronized (docList) {
|
||||
|
|
|
@ -363,7 +363,7 @@ public class PluginHelper {
|
|||
}
|
||||
|
||||
public enum ArtifactType {
|
||||
DATA_STORE("fess-ds"), THEME("fess-theme"), UNKNOWN("jar");
|
||||
DATA_STORE("fess-ds"), THEME("fess-theme"), INGEST("fess-ingest"), UNKNOWN("jar");
|
||||
|
||||
private final String id;
|
||||
|
||||
|
@ -380,6 +380,8 @@ public class PluginHelper {
|
|||
return DATA_STORE;
|
||||
} else if (name.startsWith(THEME.getId())) {
|
||||
return THEME;
|
||||
} else if (name.startsWith(INGEST.getId())) {
|
||||
return INGEST;
|
||||
}
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
|
41
src/main/java/org/codelibs/fess/ingest/IngestFactory.java
Normal file
41
src/main/java/org/codelibs/fess/ingest/IngestFactory.java
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright 2012-2020 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.ingest;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
public class IngestFactory {
|
||||
private static final Logger logger = LogManager.getLogger(IngestFactory.class);
|
||||
|
||||
private Ingester[] ingesters = new Ingester[0];
|
||||
|
||||
public synchronized void add(final Ingester ingester) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Loaded {}", ingester.getClass().getSimpleName());
|
||||
}
|
||||
final Ingester[] newIngesters = Arrays.copyOf(ingesters, ingesters.length + 1);
|
||||
newIngesters[ingesters.length] = ingester;
|
||||
Arrays.sort(newIngesters, (o1, o2) -> o1.priority - o2.priority);
|
||||
ingesters = newIngesters;
|
||||
}
|
||||
|
||||
public Ingester[] getIngesters() {
|
||||
return ingesters;
|
||||
}
|
||||
}
|
51
src/main/java/org/codelibs/fess/ingest/Ingester.java
Normal file
51
src/main/java/org/codelibs/fess/ingest/Ingester.java
Normal file
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright 2012-2020 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.ingest;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.codelibs.fess.crawler.entity.ResponseData;
|
||||
import org.codelibs.fess.crawler.entity.ResultData;
|
||||
import org.codelibs.fess.crawler.transformer.Transformer;
|
||||
import org.codelibs.fess.util.ComponentUtil;
|
||||
|
||||
public abstract class Ingester {
|
||||
|
||||
protected int priority = 99;
|
||||
|
||||
public int getPriority() {
|
||||
return priority;
|
||||
}
|
||||
|
||||
public void setPriority(final int priority) {
|
||||
this.priority = priority;
|
||||
}
|
||||
|
||||
public void register() {
|
||||
ComponentUtil.getIngestFactory().add(this);
|
||||
}
|
||||
|
||||
// datastore
|
||||
public Map<String, Object> process(final Map<String, Object> target, final Map<String, String> params) {
|
||||
return target;
|
||||
}
|
||||
|
||||
// web/file
|
||||
public ResultData process(final ResultData target, final ResponseData responseData) {
|
||||
return target;
|
||||
}
|
||||
|
||||
}
|
|
@ -72,6 +72,7 @@ import org.codelibs.fess.helper.UserInfoHelper;
|
|||
import org.codelibs.fess.helper.ViewHelper;
|
||||
import org.codelibs.fess.helper.VirtualHostHelper;
|
||||
import org.codelibs.fess.indexer.IndexUpdater;
|
||||
import org.codelibs.fess.ingest.IngestFactory;
|
||||
import org.codelibs.fess.job.JobExecutor;
|
||||
import org.codelibs.fess.ldap.LdapManager;
|
||||
import org.codelibs.fess.mylasta.direction.FessConfig;
|
||||
|
@ -94,6 +95,8 @@ public final class ComponentUtil {
|
|||
|
||||
private static Map<String, Object> componentMap = new HashMap<>();
|
||||
|
||||
private static final String INGEST_FACTORY = "ingestFactory";
|
||||
|
||||
private static final String NOTIFICATION_HELPER = "notificationHelper";
|
||||
|
||||
private static final String SEARCH_HELPER = "searchHelper";
|
||||
|
@ -474,6 +477,10 @@ public final class ComponentUtil {
|
|||
return getComponent(NOTIFICATION_HELPER);
|
||||
}
|
||||
|
||||
public static IngestFactory getIngestFactory() {
|
||||
return getComponent(INGEST_FACTORY);
|
||||
}
|
||||
|
||||
public static <T> T getComponent(final Class<T> clazz) {
|
||||
try {
|
||||
return SingletonLaContainer.getComponent(clazz);
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
<component name="webHtmlRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
|
||||
<property name="ruleId">"webHtmlRule"</property>
|
||||
<property name="responseProcessor">
|
||||
<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
|
||||
<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
|
||||
<property name="transformer">fessXpathTransformer</property>
|
||||
<property name="successfulHttpCodes">(int[])[200]</property>
|
||||
<property name="notModifiedHttpCodes">(int[])[304]</property>
|
||||
|
@ -59,7 +59,7 @@
|
|||
<component name="webFileRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
|
||||
<property name="ruleId">"webFileRule"</property>
|
||||
<property name="responseProcessor">
|
||||
<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
|
||||
<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
|
||||
<property name="transformer">fessFileTransformer</property>
|
||||
<property name="successfulHttpCodes">(int[])[200]</property>
|
||||
<property name="notModifiedHttpCodes">(int[])[304]</property>
|
||||
|
@ -88,7 +88,7 @@
|
|||
<component name="fsFileRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
|
||||
<property name="ruleId">"fsFileRule"</property>
|
||||
<property name="responseProcessor">
|
||||
<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
|
||||
<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
|
||||
<property name="transformer">fessFileTransformer</property>
|
||||
<property name="successfulHttpCodes">(int[])[200]</property>
|
||||
<property name="notModifiedHttpCodes">(int[])[304]</property>
|
||||
|
@ -122,7 +122,7 @@
|
|||
<component name="defaultRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
|
||||
<property name="ruleId">"defaultRule"</property>
|
||||
<property name="responseProcessor">
|
||||
<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
|
||||
<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
|
||||
<property name="transformer">fessStandardTransformer</property>
|
||||
<property name="successfulHttpCodes">(int[])[200]</property>
|
||||
<property name="notModifiedHttpCodes">(int[])[304]</property>
|
||||
|
|
7
src/main/resources/fess_ingest.xml
Normal file
7
src/main/resources/fess_ingest.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
|
||||
"http://dbflute.org/meta/lastadi10.dtd">
|
||||
<components>
|
||||
<component name="ingestFactory" class="org.codelibs.fess.ingest.IngestFactory">
|
||||
</component>
|
||||
</components>
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
<include path="crawler_es.xml" />
|
||||
<include path="fess_thumbnail.xml" />
|
||||
<include path="fess_ingest.xml" />
|
||||
|
||||
<component name="labelTypeHelper" class="org.codelibs.fess.helper.LabelTypeHelper">
|
||||
</component>
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright 2012-2020 CodeLibs Project and the Others.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
* either express or implied. See the License for the specific language
|
||||
* governing permissions and limitations under the License.
|
||||
*/
|
||||
package org.codelibs.fess.ingest;
|
||||
|
||||
import org.codelibs.fess.unit.UnitFessTestCase;
|
||||
|
||||
public class IngestFactoryTest extends UnitFessTestCase {
|
||||
|
||||
public void test_add_1() {
|
||||
IngestFactory factory = new IngestFactory();
|
||||
factory.add(new TestIngester(1));
|
||||
factory.add(new TestIngester(2));
|
||||
factory.add(new TestIngester(3));
|
||||
Ingester[] ingesters = factory.getIngesters();
|
||||
assertEquals(1, ingesters[0].getPriority());
|
||||
assertEquals(2, ingesters[1].getPriority());
|
||||
assertEquals(3, ingesters[2].getPriority());
|
||||
}
|
||||
|
||||
public void test_add_2() {
|
||||
IngestFactory factory = new IngestFactory();
|
||||
factory.add(new TestIngester(3));
|
||||
factory.add(new TestIngester(2));
|
||||
factory.add(new TestIngester(1));
|
||||
Ingester[] ingesters = factory.getIngesters();
|
||||
assertEquals(1, ingesters[0].getPriority());
|
||||
assertEquals(2, ingesters[1].getPriority());
|
||||
assertEquals(3, ingesters[2].getPriority());
|
||||
}
|
||||
|
||||
private static class TestIngester extends Ingester {
|
||||
public TestIngester(int priority) {
|
||||
this.priority = priority;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue