소스 검색

fix #454 : add EsDataStore/EsListDataStore

Shinsuke Sugaya 9 년 전
부모
커밋
746c85cb24

+ 1 - 1
src/main/java/org/codelibs/fess/ds/impl/AbstractDataStoreImpl.java

@@ -109,7 +109,7 @@ public abstract class AbstractDataStoreImpl implements DataStore {
 
     }
 
-    protected Object convertValue(final String template, final Map<String, String> paramMap) {
+    protected <T> Object convertValue(final String template, final Map<String, T> paramMap) {
         if (StringUtil.isEmpty(template)) {
             return StringUtil.EMPTY;
         }

+ 88 - 0
src/main/java/org/codelibs/fess/ds/impl/CsvListDataStoreImpl.java

@@ -0,0 +1,88 @@
+/*
+ * Copyright 2012-2016 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.ds.impl;
+
+import java.io.File;
+import java.util.Map;
+
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
+import org.codelibs.fess.ds.DataStoreException;
+import org.codelibs.fess.ds.IndexUpdateCallback;
+import org.codelibs.fess.es.config.exentity.DataConfig;
+import org.codelibs.fess.util.ComponentUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.orangesignal.csv.CsvConfig;
+
+public class CsvListDataStoreImpl extends CsvDataStoreImpl {
+
+    private static final Logger logger = LoggerFactory.getLogger(CsvListDataStoreImpl.class);
+
+    public boolean deleteProcessedFile = true;
+
+    public long csvFileTimestampMargin = 60 * 1000;// 1min
+
+    public boolean ignoreDataStoreException = true;
+
+    @Override
+    protected boolean isCsvFile(final File parentFile, final String filename) {
+        if (super.isCsvFile(parentFile, filename)) {
+            final File file = new File(parentFile, filename);
+            final long now = System.currentTimeMillis();
+            return now - file.lastModified() > csvFileTimestampMargin;
+        }
+        return false;
+    }
+
+    @Override
+    protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap,
+            final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap) {
+
+        final CrawlerClientFactory crawlerClientFactory = ComponentUtil.getCrawlerClientFactory();
+        dataConfig.initializeClientFactory(crawlerClientFactory);
+        final FileListIndexUpdateCallbackImpl fileListIndexUpdateCallback =
+                new FileListIndexUpdateCallbackImpl(callback, crawlerClientFactory);
+        super.storeData(dataConfig, fileListIndexUpdateCallback, paramMap, scriptMap, defaultDataMap);
+        fileListIndexUpdateCallback.commit();
+    }
+
+    @Override
+    protected void processCsv(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap,
+            final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap, final CsvConfig csvConfig, final File csvFile,
+            final long readInterval, final String csvFileEncoding, final boolean hasHeaderLine) {
+        try {
+            super.processCsv(dataConfig, callback, paramMap, scriptMap, defaultDataMap, csvConfig, csvFile, readInterval, csvFileEncoding,
+                    hasHeaderLine);
+
+            // delete csv file
+            if (deleteProcessedFile && !csvFile.delete()) {
+                logger.warn("Failed to delete {}", csvFile.getAbsolutePath());
+            }
+        } catch (final DataStoreException e) {
+            if (ignoreDataStoreException) {
+                logger.error("Failed to process " + csvFile.getAbsolutePath(), e);
+                // rename csv file, or delete it if failed
+                if (!csvFile.renameTo(new File(csvFile.getParent(), csvFile.getName() + ".txt")) && !csvFile.delete()) {
+                    logger.warn("Failed to delete {}", csvFile.getAbsolutePath());
+                }
+            } else {
+                throw e;
+            }
+        }
+    }
+
+}

+ 248 - 0
src/main/java/org/codelibs/fess/ds/impl/EsDataStoreImpl.java

@@ -0,0 +1,248 @@
+/*
+ * Copyright 2012-2016 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.ds.impl;
+
+import java.net.InetAddress;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.codelibs.core.lang.StringUtil;
+import org.codelibs.fess.Constants;
+import org.codelibs.fess.app.service.FailureUrlService;
+import org.codelibs.fess.crawler.exception.CrawlingAccessException;
+import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
+import org.codelibs.fess.ds.DataStoreCrawlingException;
+import org.codelibs.fess.ds.DataStoreException;
+import org.codelibs.fess.ds.IndexUpdateCallback;
+import org.codelibs.fess.es.config.exentity.DataConfig;
+import org.codelibs.fess.util.ComponentUtil;
+import org.codelibs.fess.util.StreamUtil;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.action.search.SearchRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.client.transport.TransportClient;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.transport.InetSocketTransportAddress;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.SearchHits;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class EsDataStoreImpl extends AbstractDataStoreImpl {
+    private static final String PREFERENCE = "preference";
+
+    private static final String QUERY = "query";
+
+    private static final String FIELDS = "fields";
+
+    private static final String SIZE = "size";
+
+    private static final String TYPE = "type";
+
+    private static final String TIMEOUT = "timeout";
+
+    private static final String SCROLL = "scroll";
+
+    private static final String INDEX = "index";
+
+    private static final String HOSTS = "hosts";
+
+    private static final String SETTINGS_PREFIX = "settings.";
+
+    private static final Logger logger = LoggerFactory.getLogger(EsDataStoreImpl.class);
+
+    @Override
+    protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap,
+            final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap) {
+        final String hostsStr = paramMap.get(HOSTS);
+        if (StringUtil.isBlank(hostsStr)) {
+            logger.info("hosts is empty.");
+            return;
+        }
+
+        final long readInterval = getReadInterval(paramMap);
+
+        final Settings settings =
+                Settings.settingsBuilder()
+                        .put(paramMap
+                                .entrySet()
+                                .stream()
+                                .filter(e -> e.getKey().startsWith(SETTINGS_PREFIX))
+                                .collect(
+                                        Collectors.toMap(e -> e.getKey().replaceFirst("^settings\\.", StringUtil.EMPTY), e -> e.getValue())))
+                        .build();
+        logger.info("Connecting to " + hostsStr + " with [" + settings.toDelimitedString(',') + "]");
+        final InetSocketTransportAddress[] addresses = StreamUtil.of(hostsStr.split(",")).map(h -> {
+            String[] values = h.trim().split(":");
+            try {
+                if (values.length == 1) {
+                    return new InetSocketTransportAddress(InetAddress.getByName(values[0]), 9300);
+                } else if (values.length == 2) {
+                    return new InetSocketTransportAddress(InetAddress.getByName(values[0]), Integer.parseInt(values[1]));
+                }
+            } catch (Exception e) {
+                logger.warn("Failed to parse address: " + h, e);
+            }
+            return null;
+        }).filter(v -> v != null).toArray(n -> new InetSocketTransportAddress[n]);
+        try (Client client = TransportClient.builder().settings(settings).build().addTransportAddresses(addresses)) {
+            processData(dataConfig, callback, paramMap, scriptMap, defaultDataMap, readInterval, client);
+        }
+    }
+
+    protected void processData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap,
+            final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap, final long readInterval, final Client client) {
+
+        final boolean deleteProcessedDoc = paramMap.getOrDefault("delete.processed.doc", Constants.FALSE).equalsIgnoreCase(Constants.TRUE);
+        final String[] indices;
+        if (paramMap.containsKey(INDEX)) {
+            indices = paramMap.get(INDEX).trim().split(",");
+        } else {
+            indices = new String[] { "_all" };
+        }
+        final String scroll = paramMap.containsKey(SCROLL) ? paramMap.get(SCROLL).trim() : "1m";
+        final String timeout = paramMap.containsKey(TIMEOUT) ? paramMap.get(TIMEOUT).trim() : "1m";
+        final SearchRequestBuilder builder = client.prepareSearch(indices);
+        if (paramMap.containsKey(TYPE)) {
+            builder.setTypes(paramMap.get(TYPE).trim().split(","));
+        }
+        if (paramMap.containsKey(SIZE)) {
+            builder.setSize(Integer.parseInt(paramMap.get(SIZE)));
+        }
+        if (paramMap.containsKey(FIELDS)) {
+            builder.addFields(paramMap.get(FIELDS).trim().split(","));
+        }
+        builder.setQuery(paramMap.containsKey(QUERY) ? paramMap.get(QUERY).trim() : "{\"query\":{\"match_all\":{}}}");
+        builder.setScroll(scroll);
+        builder.setPreference(paramMap.containsKey(PREFERENCE) ? paramMap.get(PREFERENCE).trim() : Constants.SEARCH_PREFERENCE_PRIMARY);
+        try {
+            SearchResponse response = builder.execute().actionGet(timeout);
+
+            String scrollId = response.getScrollId();
+            while (scrollId != null) {
+                final SearchHits searchHits = response.getHits();
+                final SearchHit[] hits = searchHits.getHits();
+                if (hits.length == 0) {
+                    scrollId = null;
+                    break;
+                }
+
+                boolean loop = true;
+                final BulkRequestBuilder bulkRequest = deleteProcessedDoc ? client.prepareBulk() : null;
+                for (final SearchHit hit : hits) {
+                    if (!alive || !loop) {
+                        break;
+                    }
+
+                    final Map<String, Object> dataMap = new HashMap<String, Object>();
+                    dataMap.putAll(defaultDataMap);
+                    final Map<String, Object> resultMap = new LinkedHashMap<>();
+                    resultMap.putAll(paramMap);
+                    resultMap.put("index", hit.getIndex());
+                    resultMap.put("type", hit.getType());
+                    resultMap.put("id", hit.getId());
+                    resultMap.put("version", Long.valueOf(hit.getVersion()));
+                    resultMap.put("hit", hit);
+                    resultMap.put("source", hit.getSource());
+
+                    if (logger.isDebugEnabled()) {
+                        for (final Map.Entry<String, Object> entry : resultMap.entrySet()) {
+                            logger.debug(entry.getKey() + "=" + entry.getValue());
+                        }
+                    }
+
+                    for (final Map.Entry<String, String> entry : scriptMap.entrySet()) {
+                        final Object convertValue = convertValue(entry.getValue(), resultMap);
+                        if (convertValue != null) {
+                            dataMap.put(entry.getKey(), convertValue);
+                        }
+                    }
+
+                    if (logger.isDebugEnabled()) {
+                        for (final Map.Entry<String, Object> entry : dataMap.entrySet()) {
+                            logger.debug(entry.getKey() + "=" + entry.getValue());
+                        }
+                    }
+
+                    try {
+                        loop = callback.store(paramMap, dataMap);
+                    } catch (final CrawlingAccessException e) {
+                        logger.warn("Crawling Access Exception at : " + dataMap, e);
+
+                        Throwable target = e;
+                        if (target instanceof MultipleCrawlingAccessException) {
+                            final Throwable[] causes = ((MultipleCrawlingAccessException) target).getCauses();
+                            if (causes.length > 0) {
+                                target = causes[causes.length - 1];
+                            }
+                        }
+
+                        String errorName;
+                        final Throwable cause = target.getCause();
+                        if (cause != null) {
+                            errorName = cause.getClass().getCanonicalName();
+                        } else {
+                            errorName = target.getClass().getCanonicalName();
+                        }
+
+                        String url;
+                        if (target instanceof DataStoreCrawlingException) {
+                            url = ((DataStoreCrawlingException) target).getUrl();
+                        } else {
+                            url = hit.getIndex() + "/" + hit.getType() + "/" + hit.getId();
+                        }
+                        final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
+                        failureUrlService.store(dataConfig, errorName, url, target);
+                    } catch (final Exception e) {
+                        final String url = hit.getIndex() + "/" + hit.getType() + "/" + hit.getId();
+                        final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
+                        failureUrlService.store(dataConfig, e.getClass().getCanonicalName(), url, e);
+
+                        logger.warn("Crawling Access Exception at : " + dataMap, e);
+                    }
+
+                    if (bulkRequest != null) {
+                        bulkRequest.add(client.prepareDelete(hit.getIndex(), hit.getType(), hit.getId()));
+                    }
+
+                    if (readInterval > 0) {
+                        sleep(readInterval);
+                    }
+                }
+
+                if (bulkRequest != null && bulkRequest.numberOfActions() > 0) {
+                    final BulkResponse bulkResponse = bulkRequest.execute().actionGet(timeout);
+                    if (bulkResponse.hasFailures()) {
+                        logger.warn(bulkResponse.buildFailureMessage());
+                    }
+                }
+
+                if (!alive) {
+                    break;
+                }
+                response = client.prepareSearchScroll(scrollId).setScroll(scroll).execute().actionGet(timeout);
+                scrollId = response.getScrollId();
+            }
+        } catch (final Exception e) {
+            throw new DataStoreException("Failed to crawl data when acessing elasticsearch.", e);
+        }
+    }
+
+}

+ 39 - 0
src/main/java/org/codelibs/fess/ds/impl/EsListDataStoreImpl.java

@@ -0,0 +1,39 @@
+/*
+ * Copyright 2012-2016 CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+package org.codelibs.fess.ds.impl;
+
+import java.util.Map;
+
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
+import org.codelibs.fess.ds.IndexUpdateCallback;
+import org.codelibs.fess.es.config.exentity.DataConfig;
+import org.codelibs.fess.util.ComponentUtil;
+
+public class EsListDataStoreImpl extends EsDataStoreImpl {
+
+    @Override
+    protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map<String, String> paramMap,
+            final Map<String, String> scriptMap, final Map<String, Object> defaultDataMap) {
+
+        final CrawlerClientFactory crawlerClientFactory = ComponentUtil.getCrawlerClientFactory();
+        dataConfig.initializeClientFactory(crawlerClientFactory);
+        final FileListIndexUpdateCallbackImpl fileListIndexUpdateCallback =
+                new FileListIndexUpdateCallbackImpl(callback, crawlerClientFactory);
+        super.storeData(dataConfig, fileListIndexUpdateCallback, paramMap, scriptMap, defaultDataMap);
+        fileListIndexUpdateCallback.commit();
+    }
+
+}

+ 2 - 0
src/main/java/org/codelibs/fess/ds/impl/FileListDataStoreImpl.java

@@ -50,6 +50,8 @@ import org.slf4j.LoggerFactory;
 
 import com.orangesignal.csv.CsvConfig;
 
+@Deprecated
+// replace with CsvListDataStoreImpl
 public class FileListDataStoreImpl extends CsvDataStoreImpl {
 
     private static final Logger logger = LoggerFactory.getLogger(FileListDataStoreImpl.class);

+ 199 - 0
src/main/java/org/codelibs/fess/ds/impl/FileListIndexUpdateCallbackImpl.java

@@ -0,0 +1,199 @@
+package org.codelibs.fess.ds.impl;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.codelibs.core.io.SerializeUtil;
+import org.codelibs.fess.Constants;
+import org.codelibs.fess.crawler.builder.RequestDataBuilder;
+import org.codelibs.fess.crawler.client.CrawlerClient;
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
+import org.codelibs.fess.crawler.entity.ResponseData;
+import org.codelibs.fess.crawler.entity.ResultData;
+import org.codelibs.fess.crawler.exception.CrawlerSystemException;
+import org.codelibs.fess.crawler.processor.ResponseProcessor;
+import org.codelibs.fess.crawler.processor.impl.DefaultResponseProcessor;
+import org.codelibs.fess.crawler.rule.Rule;
+import org.codelibs.fess.crawler.rule.RuleManager;
+import org.codelibs.fess.crawler.transformer.Transformer;
+import org.codelibs.fess.ds.DataStoreCrawlingException;
+import org.codelibs.fess.ds.IndexUpdateCallback;
+import org.codelibs.fess.es.client.FessEsClient;
+import org.codelibs.fess.helper.IndexingHelper;
+import org.codelibs.fess.mylasta.direction.FessConfig;
+import org.codelibs.fess.util.ComponentUtil;
+import org.codelibs.fess.util.StreamUtil;
+import org.lastaflute.di.core.SingletonLaContainer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class FileListIndexUpdateCallbackImpl implements IndexUpdateCallback {
+    private static final Logger logger = LoggerFactory.getLogger(FileListIndexUpdateCallbackImpl.class);
+
+    protected IndexUpdateCallback indexUpdateCallback;
+
+    protected CrawlerClientFactory crawlerClientFactory;
+
+    protected List<String> deleteIdList = new ArrayList<String>(100);
+
+    protected int maxDeleteDocumentCacheSize = 100;
+
+    protected FileListIndexUpdateCallbackImpl(final IndexUpdateCallback indexUpdateCallback, final CrawlerClientFactory crawlerClientFactory) {
+        this.indexUpdateCallback = indexUpdateCallback;
+        this.crawlerClientFactory = crawlerClientFactory;
+    }
+
+    @Override
+    public boolean store(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+        final Object eventType = dataMap.remove(getParamValue(paramMap, "field.event_type", "event_type"));
+
+        if (getParamValue(paramMap, "event.create", "create").equals(eventType)
+                || getParamValue(paramMap, "event.modify", "modify").equals(eventType)) {
+            // updated file
+            return addDocument(paramMap, dataMap);
+        } else if (getParamValue(paramMap, "event.delete", "delete").equals(eventType)) {
+            // deleted file
+            return deleteDocument(paramMap, dataMap);
+        }
+
+        logger.warn("unknown event: " + eventType + ", data: " + dataMap);
+        return false;
+    }
+
+    protected String getParamValue(Map<String, String> paramMap, String key, String defaultValue) {
+        return paramMap.getOrDefault(key, defaultValue);
+    }
+
+    protected boolean addDocument(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        synchronized (indexUpdateCallback) {
+            // required check
+            if (!dataMap.containsKey(fessConfig.getIndexFieldUrl()) || dataMap.get(fessConfig.getIndexFieldUrl()) == null) {
+                logger.warn("Could not add a doc. Invalid data: " + dataMap);
+                return false;
+            }
+
+            final String url = dataMap.get(fessConfig.getIndexFieldUrl()).toString();
+            try {
+                final CrawlerClient client = crawlerClientFactory.getClient(url);
+                if (client == null) {
+                    logger.warn("CrawlerClient is null. Data: " + dataMap);
+                    return false;
+                }
+
+                final long startTime = System.currentTimeMillis();
+                final ResponseData responseData = client.execute(RequestDataBuilder.newRequestData().get().url(url).build());
+                responseData.setExecutionTime(System.currentTimeMillis() - startTime);
+                if (dataMap.containsKey(Constants.SESSION_ID)) {
+                    responseData.setSessionId((String) dataMap.get(Constants.SESSION_ID));
+                } else {
+                    responseData.setSessionId((String) paramMap.get(Constants.CRAWLING_INFO_ID));
+                }
+
+                final RuleManager ruleManager = SingletonLaContainer.getComponent(RuleManager.class);
+                final Rule rule = ruleManager.getRule(responseData);
+                if (rule == null) {
+                    logger.warn("No url rule. Data: " + dataMap);
+                    return false;
+                } else {
+                    responseData.setRuleId(rule.getRuleId());
+                    final ResponseProcessor responseProcessor = rule.getResponseProcessor();
+                    if (responseProcessor instanceof DefaultResponseProcessor) {
+                        final Transformer transformer = ((DefaultResponseProcessor) responseProcessor).getTransformer();
+                        final ResultData resultData = transformer.transform(responseData);
+                        final byte[] data = resultData.getData();
+                        if (data != null) {
+                            try {
+                                @SuppressWarnings("unchecked")
+                                final Map<String, Object> responseDataMap = (Map<String, Object>) SerializeUtil.fromBinaryToObject(data);
+                                dataMap.putAll(responseDataMap);
+                            } catch (final Exception e) {
+                                throw new CrawlerSystemException("Could not create an instance from bytes.", e);
+                            }
+                        }
+
+                        // remove
+                        String[] ignoreFields;
+                        if (paramMap.containsKey("ignore.field.names")) {
+                            ignoreFields = paramMap.get("ignore.field.names").split(",");
+                        } else {
+                            ignoreFields = new String[] { Constants.INDEXING_TARGET, Constants.SESSION_ID };
+                        }
+                        StreamUtil.of(ignoreFields).map(s -> s.trim()).forEach(s -> dataMap.remove(s));
+
+                        return indexUpdateCallback.store(paramMap, dataMap);
+                    } else {
+                        logger.warn("The response processor is not DefaultResponseProcessor. responseProcessor: " + responseProcessor
+                                + ", Data: " + dataMap);
+                        return false;
+                    }
+                }
+            } catch (final Exception e) {
+                throw new DataStoreCrawlingException(url, "Failed to add: " + dataMap, e);
+            }
+        }
+    }
+
+    protected boolean deleteDocument(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
+
+        if (logger.isDebugEnabled()) {
+            logger.debug("Deleting " + dataMap);
+        }
+
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+
+        // required check
+        if (!dataMap.containsKey(fessConfig.getIndexFieldUrl()) || dataMap.get(fessConfig.getIndexFieldUrl()) == null) {
+            logger.warn("Could not delete a doc. Invalid data: " + dataMap);
+            return false;
+        }
+
+        synchronized (indexUpdateCallback) {
+            deleteIdList.add(ComponentUtil.getCrawlingInfoHelper().generateId(dataMap));
+
+            if (deleteIdList.size() >= maxDeleteDocumentCacheSize) {
+                final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
+                final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
+                for (final String id : deleteIdList) {
+                    indexingHelper.deleteDocument(fessEsClient, id);
+                }
+                if (logger.isDebugEnabled()) {
+                    logger.debug("Deleted " + deleteIdList);
+                }
+                deleteIdList.clear();
+            }
+
+        }
+        return true;
+    }
+
+    @Override
+    public void commit() {
+        if (!deleteIdList.isEmpty()) {
+            final FessEsClient fessEsClient = ComponentUtil.getElasticsearchClient();
+            final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
+            for (final String id : deleteIdList) {
+                indexingHelper.deleteDocument(fessEsClient, id);
+            }
+            if (logger.isDebugEnabled()) {
+                logger.debug("Deleted " + deleteIdList);
+            }
+        }
+        indexUpdateCallback.commit();
+    }
+
+    @Override
+    public long getDocumentSize() {
+        return indexUpdateCallback.getDocumentSize();
+    }
+
+    @Override
+    public long getExecuteTime() {
+        return indexUpdateCallback.getExecuteTime();
+    }
+
+    public void setMaxDeleteDocumentCacheSize(int maxDeleteDocumentCacheSize) {
+        this.maxDeleteDocumentCacheSize = maxDeleteDocumentCacheSize;
+    }
+}

+ 6 - 0
src/main/java/org/codelibs/fess/util/ComponentUtil.java

@@ -19,6 +19,7 @@ import org.apache.lucene.queryparser.classic.QueryParser;
 import org.codelibs.core.crypto.CachedCipher;
 import org.codelibs.core.misc.DynamicProperties;
 import org.codelibs.fess.api.WebApiManagerFactory;
+import org.codelibs.fess.crawler.client.CrawlerClientFactory;
 import org.codelibs.fess.crawler.entity.EsAccessResult;
 import org.codelibs.fess.crawler.extractor.ExtractorFactory;
 import org.codelibs.fess.crawler.service.DataService;
@@ -60,6 +61,7 @@ import org.lastaflute.job.JobManager;
 import org.lastaflute.web.servlet.request.RequestManager;
 
 public final class ComponentUtil {
+
     private static final String QUERY_PARSER = "queryParser";
 
     private static final String DOCUMENT_HELPER = "documentHelper";
@@ -322,6 +324,10 @@ public final class ComponentUtil {
         return getComponent(QUERY_PARSER);
     }
 
+    public static CrawlerClientFactory getCrawlerClientFactory() {
+        return getComponent(CrawlerClientFactory.class);
+    }
+
     public static <T> T getComponent(final Class<T> clazz) {
         try {
             return SingletonLaContainer.getComponent(clazz);

+ 8 - 0
src/main/java/org/codelibs/fess/util/StreamUtil.java

@@ -30,6 +30,14 @@ public class StreamUtil {
         }
     }
 
+    public static Stream<String> splitOf(final String value, final String regex) {
+        if (value != null) {
+            return Arrays.stream(value.split(regex));
+        } else {
+            return Collections.<String> emptyList().stream();
+        }
+    }
+
     public static <K, V> Stream<Map.Entry<K, V>> of(final Map<K, V> map) {
         if (map != null) {
             return map.entrySet().stream();

+ 15 - 3
src/main/resources/fess_ds.xml

@@ -12,8 +12,16 @@
 			<arg>csvDataStore</arg>
 		</postConstruct>
 		<postConstruct name="add">
-			<arg>"FileListDataStore"</arg>
-			<arg>fileListDataStore</arg>
+			<arg>"CsvListDataStore"</arg>
+			<arg>csvListDataStore</arg>
+		</postConstruct>
+		<postConstruct name="add">
+			<arg>"EsDataStore"</arg>
+			<arg>esDataStore</arg>
+		</postConstruct>
+		<postConstruct name="add">
+			<arg>"EsListDataStore"</arg>
+			<arg>esListDataStore</arg>
 		</postConstruct>
 	</component>
 
@@ -24,7 +32,11 @@
 		<property name="csvFileSuffixs">new String[] { ".csv", ".tsv" }</property>
 		 -->
 	</component>
-	<component name="fileListDataStore" class="org.codelibs.fess.ds.impl.FileListDataStoreImpl">
+	<component name="csvListDataStore" class="org.codelibs.fess.ds.impl.CsvListDataStoreImpl">
+	</component>
+	<component name="esDataStore" class="org.codelibs.fess.ds.impl.EsDataStoreImpl">
+	</component>
+	<component name="esListDataStore" class="org.codelibs.fess.ds.impl.EsListDataStoreImpl">
 	</component>
 
 	<component name="indexUpdateCallback" class="org.codelibs.fess.ds.impl.IndexUpdateCallbackImpl" instance="prototype">