diff --git a/src/main/java/org/codelibs/fess/es/client/FessEsClient.java b/src/main/java/org/codelibs/fess/es/client/FessEsClient.java index f89bd693d..2b1d0a586 100644 --- a/src/main/java/org/codelibs/fess/es/client/FessEsClient.java +++ b/src/main/java/org/codelibs/fess/es/client/FessEsClient.java @@ -32,6 +32,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.BiConsumer; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -180,6 +181,10 @@ public class FessEsClient implements Client { protected String scrollForDelete = "1m"; + protected int sizeForUpdate = 100; + + protected String scrollForUpdate = "1m"; + protected int maxConfigSyncStatusRetry = 10; protected int maxEsStatusRetry = 60; @@ -643,6 +648,54 @@ public class FessEsClient implements Client { } } + public long updateByQuery(final String index, final Function option, + final BiFunction builder) { + + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + SearchResponse response = + option.apply( + client.prepareSearch(index).setScroll(scrollForUpdate).setSize(sizeForUpdate) + .setPreference(Constants.SEARCH_PREFERENCE_LOCAL)).execute() + .actionGet(fessConfig.getIndexScrollSearchTimeout()); + + int count = 0; + String scrollId = response.getScrollId(); + try { + while (scrollId != null) { + final SearchHits searchHits = response.getHits(); + final SearchHit[] hits = searchHits.getHits(); + if (hits.length == 0) { + break; + } + + final BulkRequestBuilder bulkRequest = client.prepareBulk(); + for (final SearchHit hit : hits) { + final UpdateRequestBuilder requestBuilder = + builder.apply(client.prepareUpdate().setIndex(index).setId(hit.getId()), hit); + if (requestBuilder != null) { + bulkRequest.add(requestBuilder); + } + count++; + } + final BulkResponse bulkResponse = bulkRequest.execute().actionGet(fessConfig.getIndexBulkTimeout()); + if (bulkResponse.hasFailures()) { + throw new IllegalBehaviorStateException(bulkResponse.buildFailureMessage()); + } + + response = + client.prepareSearchScroll(scrollId).setScroll(scrollForUpdate).execute() + .actionGet(fessConfig.getIndexBulkTimeout()); + if (!scrollId.equals(response.getScrollId())) { + deleteScrollContext(scrollId); + } + scrollId = response.getScrollId(); + } + } finally { + deleteScrollContext(scrollId); + } + return count; + } + public long deleteByQuery(final String index, final QueryBuilder queryBuilder) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); @@ -1477,6 +1530,14 @@ public class FessEsClient implements Client { return client.prepareMultiTermVectors(); } + public void setSizeForUpdate(final int sizeForUpdate) { + this.sizeForUpdate = sizeForUpdate; + } + + public void setScrollForUpdate(final String scrollForUpdate) { + this.scrollForUpdate = scrollForUpdate; + } + public void setSizeForDelete(final int sizeForDelete) { this.sizeForDelete = sizeForDelete; } diff --git a/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java b/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java index 648cbb950..b75c8681d 100644 --- a/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java +++ b/src/main/java/org/codelibs/fess/helper/LabelTypeHelper.java @@ -41,7 +41,7 @@ import org.slf4j.LoggerFactory; public class LabelTypeHelper { private static final Logger logger = LoggerFactory.getLogger(LabelTypeHelper.class); - protected volatile List labelTypeItemList = new ArrayList<>(); + protected volatile List labelTypeItemList; protected volatile List labelTypePatternList; @@ -53,14 +53,16 @@ public class LabelTypeHelper { public int update() { final List labelTypeList = ComponentUtil.getComponent(LabelTypeService.class).getLabelTypeList(); buildLabelTypeItems(labelTypeList); + buildLabelTypePatternList(labelTypeList); return labelTypeList.size(); } public void refresh(final List labelTypeList) { buildLabelTypeItems(labelTypeList); + buildLabelTypePatternList(labelTypeList); } - private void buildLabelTypeItems(final List labelTypeList) { + protected void buildLabelTypeItems(final List labelTypeList) { final List itemList = new ArrayList<>(); for (final LabelType labelType : labelTypeList) { final LabelTypeItem item = new LabelTypeItem(); @@ -120,21 +122,7 @@ public class LabelTypeHelper { if (labelTypePatternList == null) { synchronized (this) { if (labelTypePatternList == null) { - final List labelTypeList = ComponentUtil.getComponent(LabelTypeService.class).getLabelTypeList(); - final List list = new ArrayList<>(); - for (final LabelType labelType : labelTypeList) { - final String includedPaths = labelType.getIncludedPaths(); - final String excludedPaths = labelType.getExcludedPaths(); - if (StringUtil.isNotBlank(includedPaths) || StringUtil.isNotBlank(excludedPaths)) { - try { - list.add(new LabelTypePattern(labelType.getValue(), includedPaths, excludedPaths)); - } catch (final Exception e) { - logger.warn("Failed to create a matching pattern of a label: " + labelType.getValue() + ", includedPaths:" - + includedPaths + ", excludedPaths:" + excludedPaths, e); - } - } - } - labelTypePatternList = list; + buildLabelTypePatternList(ComponentUtil.getComponent(LabelTypeService.class).getLabelTypeList()); } } } @@ -152,6 +140,23 @@ public class LabelTypeHelper { return valueSet; } + protected void buildLabelTypePatternList(final List labelTypeList) { + final List list = new ArrayList<>(); + for (final LabelType labelType : labelTypeList) { + final String includedPaths = labelType.getIncludedPaths(); + final String excludedPaths = labelType.getExcludedPaths(); + if (StringUtil.isNotBlank(includedPaths) || StringUtil.isNotBlank(excludedPaths)) { + try { + list.add(new LabelTypePattern(labelType.getValue(), includedPaths, excludedPaths)); + } catch (final Exception e) { + logger.warn("Failed to create a matching pattern of a label: " + labelType.getValue() + ", includedPaths:" + + includedPaths + ", excludedPaths:" + excludedPaths, e); + } + } + } + labelTypePatternList = list; + } + protected static class LabelTypeItem { private String label; diff --git a/src/main/java/org/codelibs/fess/job/UpdateLabelJob.java b/src/main/java/org/codelibs/fess/job/UpdateLabelJob.java new file mode 100644 index 000000000..cee45ee52 --- /dev/null +++ b/src/main/java/org/codelibs/fess/job/UpdateLabelJob.java @@ -0,0 +1,83 @@ +/* + * Copyright 2012-2019 CodeLibs Project and the Others. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +package org.codelibs.fess.job; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +import org.codelibs.core.lang.StringUtil; +import org.codelibs.fess.es.client.FessEsClient; +import org.codelibs.fess.helper.LabelTypeHelper; +import org.codelibs.fess.mylasta.direction.FessConfig; +import org.codelibs.fess.util.ComponentUtil; +import org.codelibs.fess.util.DocumentUtil; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.query.QueryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class UpdateLabelJob { + + private static final Logger logger = LoggerFactory.getLogger(UpdateLabelJob.class); + + protected QueryBuilder queryBuilder = null; + + public String execute() { + final FessEsClient fessEsClient = ComponentUtil.getFessEsClient(); + final FessConfig fessConfig = ComponentUtil.getFessConfig(); + final LabelTypeHelper labelTypeHelper = ComponentUtil.getLabelTypeHelper(); + + final StringBuilder resultBuf = new StringBuilder(); + + try { + final long count = + fessEsClient.updateByQuery( + fessConfig.getIndexDocumentUpdateIndex(), + option -> { + if (queryBuilder != null) { + option.setQuery(queryBuilder); + } + return option.setFetchSource(new String[] { fessConfig.getIndexFieldUrl() }, null); + }, + (builder, hit) -> { + try { + final Map doc = hit.getSourceAsMap(); + final String url = DocumentUtil.getValue(doc, fessConfig.getIndexFieldUrl(), String.class); + if (StringUtil.isNotBlank(url)) { + final Set labelSet = labelTypeHelper.getMatchedLabelValueSet(url); + return builder.setDoc(XContentFactory.jsonBuilder().startObject() + .field(fessConfig.getIndexFieldLabel(), labelSet.toArray(n -> new String[n])).endObject()); + } + } catch (IOException e) { + logger.warn("Failed to process " + hit, e); + } + return null; + }); + resultBuf.append(count).append(" docs").append("\n"); + } catch (final Exception e) { + logger.error("Could not update labels.", e); + resultBuf.append(e.getMessage()).append("\n"); + } + + return resultBuf.toString(); + } + + public UpdateLabelJob query(final QueryBuilder queryBuilder) { + this.queryBuilder = queryBuilder; + return this; + } +} diff --git a/src/main/resources/fess_indices/.fess_config.scheduled_job/scheduled_job.bulk b/src/main/resources/fess_indices/.fess_config.scheduled_job/scheduled_job.bulk index 69c3d86ba..4b012f379 100644 --- a/src/main/resources/fess_indices/.fess_config.scheduled_job/scheduled_job.bulk +++ b/src/main/resources/fess_indices/.fess_config.scheduled_job/scheduled_job.bulk @@ -18,3 +18,5 @@ {"name":"Ping Elasticsearch","target":"all","cronExpression":"* * * * *","scriptType":"groovy","scriptData":"return container.getComponent(\"pingEsJob\").execute();","jobLogging":false,"crawler":false,"available":true,"sortOrder":9,"createdBy":"system","createdTime":0,"updatedBy":"system","updatedTime":0} {"index":{"_index":".fess_config.scheduled_job","_id":"score_booster"}} {"name":"Score Updater","target":"all","cronExpression":"0 * * * *","scriptType":"groovy","scriptData":"return container.getComponent(\"scoreUpdater\").execute();","jobLogging":false,"crawler":false,"available":true,"sortOrder":10,"createdBy":"system","createdTime":0,"updatedBy":"system","updatedTime":0} +{"index":{"_index":".fess_config.scheduled_job","_id":"label_updater"}} +{"name":"Label Updater","target":"all","cronExpression":"","scriptType":"groovy","scriptData":"return container.getComponent(\"updateLabelJob\").execute();","jobLogging":false,"crawler":false,"available":true,"sortOrder":11,"createdBy":"system","createdTime":0,"updatedBy":"system","updatedTime":0} diff --git a/src/main/resources/fess_job.xml b/src/main/resources/fess_job.xml index e73875d2a..4da408af9 100644 --- a/src/main/resources/fess_job.xml +++ b/src/main/resources/fess_job.xml @@ -20,6 +20,8 @@ + +