fix #2463 add fess-ingest

This commit is contained in:
Shinsuke Sugaya 2020-06-04 17:42:49 +09:00
parent 989a385819
commit a569803668
10 changed files with 237 additions and 6 deletions

View file

@ -0,0 +1,56 @@
/*
* Copyright 2012-2020 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.crawler.processor;
import javax.annotation.PostConstruct;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.fess.crawler.entity.AccessResult;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.ResultData;
import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
import org.codelibs.fess.ingest.IngestFactory;
import org.codelibs.fess.ingest.Ingester;
import org.codelibs.fess.util.ComponentUtil;
public class FessResponseProcessor extends DefaultResponseProcessor {
private static final Logger logger = LogManager.getLogger(FessResponseProcessor.class);
private IngestFactory ingestFactory;
@PostConstruct
public void init() {
ingestFactory = ComponentUtil.getIngestFactory();
}
@Override
protected AccessResult<?> createAccessResult(final ResponseData responseData, final ResultData resultData) {
return super.createAccessResult(responseData, ingest(responseData, resultData));
}
private ResultData ingest(final ResponseData responseData, final ResultData resultData) {
ResultData target = resultData;
for (final Ingester ingester : ingestFactory.getIngesters()) {
try {
target = ingester.process(target, responseData);
} catch (Exception e) {
logger.warn("Failed to process Ingest[{}]", ingester.getClass().getSimpleName(), e);
}
}
return target;
}
}

View file

@ -31,6 +31,8 @@ import org.codelibs.fess.helper.CrawlingInfoHelper;
import org.codelibs.fess.helper.IndexingHelper;
import org.codelibs.fess.helper.SearchLogHelper;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.ingest.IngestFactory;
import org.codelibs.fess.ingest.Ingester;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.DocList;
@ -50,6 +52,8 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
protected int maxDocumentCacheSize;
private IngestFactory ingestFactory;
@PostConstruct
public void init() {
if (logger.isDebugEnabled()) {
@ -57,6 +61,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
}
maxDocumentRequestSize = Long.parseLong(ComponentUtil.getFessConfig().getIndexerDataMaxDocumentRequestSize());
maxDocumentCacheSize = ComponentUtil.getFessConfig().getIndexerDataMaxDocumentCacheSizeAsInteger();
ingestFactory = ComponentUtil.getIngestFactory();
}
/* (non-Javadoc)
@ -111,7 +116,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
ComponentUtil.getLanguageHelper().updateDocument(dataMap);
synchronized (docList) {
docList.add(dataMap);
docList.add(ingest(paramMap, dataMap));
final long contentSize = indexingHelper.calculateDocumentSize(dataMap);
docList.addContentSize(contentSize);
final long processingTime = System.currentTimeMillis() - startTime;
@ -135,6 +140,18 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
}
protected Map<String, Object> ingest(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
Map<String, Object> target = dataMap;
for (final Ingester ingester : ingestFactory.getIngesters()) {
try {
target = ingester.process(target, paramMap);
} catch (Exception e) {
logger.warn("Failed to process Ingest[{}]", ingester.getClass().getSimpleName(), e);
}
}
return target;
}
@Override
public void commit() {
synchronized (docList) {

View file

@ -363,7 +363,7 @@ public class PluginHelper {
}
public enum ArtifactType {
DATA_STORE("fess-ds"), THEME("fess-theme"), UNKNOWN("jar");
DATA_STORE("fess-ds"), THEME("fess-theme"), INGEST("fess-ingest"), UNKNOWN("jar");
private final String id;
@ -380,6 +380,8 @@ public class PluginHelper {
return DATA_STORE;
} else if (name.startsWith(THEME.getId())) {
return THEME;
} else if (name.startsWith(INGEST.getId())) {
return INGEST;
}
return UNKNOWN;
}

View file

@ -0,0 +1,41 @@
/*
* Copyright 2012-2020 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.ingest;
import java.util.Arrays;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class IngestFactory {
private static final Logger logger = LogManager.getLogger(IngestFactory.class);
private Ingester[] ingesters = new Ingester[0];
public synchronized void add(final Ingester ingester) {
if (logger.isDebugEnabled()) {
logger.debug("Loaded {}", ingester.getClass().getSimpleName());
}
final Ingester[] newIngesters = Arrays.copyOf(ingesters, ingesters.length + 1);
newIngesters[ingesters.length] = ingester;
Arrays.sort(newIngesters, (o1, o2) -> o1.priority - o2.priority);
ingesters = newIngesters;
}
public Ingester[] getIngesters() {
return ingesters;
}
}

View file

@ -0,0 +1,51 @@
/*
* Copyright 2012-2020 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.ingest;
import java.util.Map;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.ResultData;
import org.codelibs.fess.crawler.transformer.Transformer;
import org.codelibs.fess.util.ComponentUtil;
public abstract class Ingester {
protected int priority = 99;
public int getPriority() {
return priority;
}
public void setPriority(final int priority) {
this.priority = priority;
}
public void register() {
ComponentUtil.getIngestFactory().add(this);
}
// datastore
public Map<String, Object> process(final Map<String, Object> target, final Map<String, String> params) {
return target;
}
// web/file
public ResultData process(final ResultData target, final ResponseData responseData) {
return target;
}
}

View file

@ -72,6 +72,7 @@ import org.codelibs.fess.helper.UserInfoHelper;
import org.codelibs.fess.helper.ViewHelper;
import org.codelibs.fess.helper.VirtualHostHelper;
import org.codelibs.fess.indexer.IndexUpdater;
import org.codelibs.fess.ingest.IngestFactory;
import org.codelibs.fess.job.JobExecutor;
import org.codelibs.fess.ldap.LdapManager;
import org.codelibs.fess.mylasta.direction.FessConfig;
@ -94,6 +95,8 @@ public final class ComponentUtil {
private static Map<String, Object> componentMap = new HashMap<>();
private static final String INGEST_FACTORY = "ingestFactory";
private static final String NOTIFICATION_HELPER = "notificationHelper";
private static final String SEARCH_HELPER = "searchHelper";
@ -474,6 +477,10 @@ public final class ComponentUtil {
return getComponent(NOTIFICATION_HELPER);
}
public static IngestFactory getIngestFactory() {
return getComponent(INGEST_FACTORY);
}
public static <T> T getComponent(final Class<T> clazz) {
try {
return SingletonLaContainer.getComponent(clazz);

View file

@ -38,7 +38,7 @@
<component name="webHtmlRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
<property name="ruleId">"webHtmlRule"</property>
<property name="responseProcessor">
<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
<property name="transformer">fessXpathTransformer</property>
<property name="successfulHttpCodes">(int[])[200]</property>
<property name="notModifiedHttpCodes">(int[])[304]</property>
@ -59,7 +59,7 @@
<component name="webFileRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
<property name="ruleId">"webFileRule"</property>
<property name="responseProcessor">
<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
<property name="transformer">fessFileTransformer</property>
<property name="successfulHttpCodes">(int[])[200]</property>
<property name="notModifiedHttpCodes">(int[])[304]</property>
@ -88,7 +88,7 @@
<component name="fsFileRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
<property name="ruleId">"fsFileRule"</property>
<property name="responseProcessor">
<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
<property name="transformer">fessFileTransformer</property>
<property name="successfulHttpCodes">(int[])[200]</property>
<property name="notModifiedHttpCodes">(int[])[304]</property>
@ -122,7 +122,7 @@
<component name="defaultRule" class="org.codelibs.fess.crawler.rule.impl.RegexRule" >
<property name="ruleId">"defaultRule"</property>
<property name="responseProcessor">
<component class="org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor">
<component class="org.codelibs.fess.crawler.processor.FessResponseProcessor">
<property name="transformer">fessStandardTransformer</property>
<property name="successfulHttpCodes">(int[])[200]</property>
<property name="notModifiedHttpCodes">(int[])[304]</property>

View file

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE components PUBLIC "-//DBFLUTE//DTD LastaDi 1.0//EN"
"http://dbflute.org/meta/lastadi10.dtd">
<components>
<component name="ingestFactory" class="org.codelibs.fess.ingest.IngestFactory">
</component>
</components>

View file

@ -8,6 +8,7 @@
<include path="crawler_es.xml" />
<include path="fess_thumbnail.xml" />
<include path="fess_ingest.xml" />
<component name="labelTypeHelper" class="org.codelibs.fess.helper.LabelTypeHelper">
</component>

View file

@ -0,0 +1,49 @@
/*
* Copyright 2012-2020 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.ingest;
import org.codelibs.fess.unit.UnitFessTestCase;
public class IngestFactoryTest extends UnitFessTestCase {
public void test_add_1() {
IngestFactory factory = new IngestFactory();
factory.add(new TestIngester(1));
factory.add(new TestIngester(2));
factory.add(new TestIngester(3));
Ingester[] ingesters = factory.getIngesters();
assertEquals(1, ingesters[0].getPriority());
assertEquals(2, ingesters[1].getPriority());
assertEquals(3, ingesters[2].getPriority());
}
public void test_add_2() {
IngestFactory factory = new IngestFactory();
factory.add(new TestIngester(3));
factory.add(new TestIngester(2));
factory.add(new TestIngester(1));
Ingester[] ingesters = factory.getIngesters();
assertEquals(1, ingesters[0].getPriority());
assertEquals(2, ingesters[1].getPriority());
assertEquals(3, ingesters[2].getPriority());
}
private static class TestIngester extends Ingester {
public TestIngester(int priority) {
this.priority = priority;
}
}
}