fix #2137 add label updater

This commit is contained in:
Shinsuke Sugaya 2019-05-30 06:51:50 +09:00
parent a3b34aa560
commit 4080b29fc8
5 changed files with 170 additions and 17 deletions

View file

@ -32,6 +32,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@ -180,6 +181,10 @@ public class FessEsClient implements Client {
protected String scrollForDelete = "1m";
protected int sizeForUpdate = 100;
protected String scrollForUpdate = "1m";
protected int maxConfigSyncStatusRetry = 10;
protected int maxEsStatusRetry = 60;
@ -643,6 +648,54 @@ public class FessEsClient implements Client {
}
}
public long updateByQuery(final String index, final Function<SearchRequestBuilder, SearchRequestBuilder> option,
final BiFunction<UpdateRequestBuilder, SearchHit, UpdateRequestBuilder> builder) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
SearchResponse response =
option.apply(
client.prepareSearch(index).setScroll(scrollForUpdate).setSize(sizeForUpdate)
.setPreference(Constants.SEARCH_PREFERENCE_LOCAL)).execute()
.actionGet(fessConfig.getIndexScrollSearchTimeout());
int count = 0;
String scrollId = response.getScrollId();
try {
while (scrollId != null) {
final SearchHits searchHits = response.getHits();
final SearchHit[] hits = searchHits.getHits();
if (hits.length == 0) {
break;
}
final BulkRequestBuilder bulkRequest = client.prepareBulk();
for (final SearchHit hit : hits) {
final UpdateRequestBuilder requestBuilder =
builder.apply(client.prepareUpdate().setIndex(index).setId(hit.getId()), hit);
if (requestBuilder != null) {
bulkRequest.add(requestBuilder);
}
count++;
}
final BulkResponse bulkResponse = bulkRequest.execute().actionGet(fessConfig.getIndexBulkTimeout());
if (bulkResponse.hasFailures()) {
throw new IllegalBehaviorStateException(bulkResponse.buildFailureMessage());
}
response =
client.prepareSearchScroll(scrollId).setScroll(scrollForUpdate).execute()
.actionGet(fessConfig.getIndexBulkTimeout());
if (!scrollId.equals(response.getScrollId())) {
deleteScrollContext(scrollId);
}
scrollId = response.getScrollId();
}
} finally {
deleteScrollContext(scrollId);
}
return count;
}
public long deleteByQuery(final String index, final QueryBuilder queryBuilder) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
@ -1477,6 +1530,14 @@ public class FessEsClient implements Client {
return client.prepareMultiTermVectors();
}
public void setSizeForUpdate(final int sizeForUpdate) {
this.sizeForUpdate = sizeForUpdate;
}
public void setScrollForUpdate(final String scrollForUpdate) {
this.scrollForUpdate = scrollForUpdate;
}
public void setSizeForDelete(final int sizeForDelete) {
this.sizeForDelete = sizeForDelete;
}

View file

@ -41,7 +41,7 @@ import org.slf4j.LoggerFactory;
public class LabelTypeHelper {
private static final Logger logger = LoggerFactory.getLogger(LabelTypeHelper.class);
protected volatile List<LabelTypeItem> labelTypeItemList = new ArrayList<>();
protected volatile List<LabelTypeItem> labelTypeItemList;
protected volatile List<LabelTypePattern> labelTypePatternList;
@ -53,14 +53,16 @@ public class LabelTypeHelper {
public int update() {
final List<LabelType> labelTypeList = ComponentUtil.getComponent(LabelTypeService.class).getLabelTypeList();
buildLabelTypeItems(labelTypeList);
buildLabelTypePatternList(labelTypeList);
return labelTypeList.size();
}
public void refresh(final List<LabelType> labelTypeList) {
buildLabelTypeItems(labelTypeList);
buildLabelTypePatternList(labelTypeList);
}
private void buildLabelTypeItems(final List<LabelType> labelTypeList) {
protected void buildLabelTypeItems(final List<LabelType> labelTypeList) {
final List<LabelTypeItem> itemList = new ArrayList<>();
for (final LabelType labelType : labelTypeList) {
final LabelTypeItem item = new LabelTypeItem();
@ -120,21 +122,7 @@ public class LabelTypeHelper {
if (labelTypePatternList == null) {
synchronized (this) {
if (labelTypePatternList == null) {
final List<LabelType> labelTypeList = ComponentUtil.getComponent(LabelTypeService.class).getLabelTypeList();
final List<LabelTypePattern> list = new ArrayList<>();
for (final LabelType labelType : labelTypeList) {
final String includedPaths = labelType.getIncludedPaths();
final String excludedPaths = labelType.getExcludedPaths();
if (StringUtil.isNotBlank(includedPaths) || StringUtil.isNotBlank(excludedPaths)) {
try {
list.add(new LabelTypePattern(labelType.getValue(), includedPaths, excludedPaths));
} catch (final Exception e) {
logger.warn("Failed to create a matching pattern of a label: " + labelType.getValue() + ", includedPaths:"
+ includedPaths + ", excludedPaths:" + excludedPaths, e);
}
}
}
labelTypePatternList = list;
buildLabelTypePatternList(ComponentUtil.getComponent(LabelTypeService.class).getLabelTypeList());
}
}
}
@ -152,6 +140,23 @@ public class LabelTypeHelper {
return valueSet;
}
protected void buildLabelTypePatternList(final List<LabelType> labelTypeList) {
final List<LabelTypePattern> list = new ArrayList<>();
for (final LabelType labelType : labelTypeList) {
final String includedPaths = labelType.getIncludedPaths();
final String excludedPaths = labelType.getExcludedPaths();
if (StringUtil.isNotBlank(includedPaths) || StringUtil.isNotBlank(excludedPaths)) {
try {
list.add(new LabelTypePattern(labelType.getValue(), includedPaths, excludedPaths));
} catch (final Exception e) {
logger.warn("Failed to create a matching pattern of a label: " + labelType.getValue() + ", includedPaths:"
+ includedPaths + ", excludedPaths:" + excludedPaths, e);
}
}
}
labelTypePatternList = list;
}
protected static class LabelTypeItem {
private String label;

View file

@ -0,0 +1,83 @@
/*
* Copyright 2012-2019 CodeLibs Project and the Others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.codelibs.fess.job;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.es.client.FessEsClient;
import org.codelibs.fess.helper.LabelTypeHelper;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.DocumentUtil;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class UpdateLabelJob {
private static final Logger logger = LoggerFactory.getLogger(UpdateLabelJob.class);
protected QueryBuilder queryBuilder = null;
public String execute() {
final FessEsClient fessEsClient = ComponentUtil.getFessEsClient();
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final LabelTypeHelper labelTypeHelper = ComponentUtil.getLabelTypeHelper();
final StringBuilder resultBuf = new StringBuilder();
try {
final long count =
fessEsClient.updateByQuery(
fessConfig.getIndexDocumentUpdateIndex(),
option -> {
if (queryBuilder != null) {
option.setQuery(queryBuilder);
}
return option.setFetchSource(new String[] { fessConfig.getIndexFieldUrl() }, null);
},
(builder, hit) -> {
try {
final Map<String, Object> doc = hit.getSourceAsMap();
final String url = DocumentUtil.getValue(doc, fessConfig.getIndexFieldUrl(), String.class);
if (StringUtil.isNotBlank(url)) {
final Set<String> labelSet = labelTypeHelper.getMatchedLabelValueSet(url);
return builder.setDoc(XContentFactory.jsonBuilder().startObject()
.field(fessConfig.getIndexFieldLabel(), labelSet.toArray(n -> new String[n])).endObject());
}
} catch (IOException e) {
logger.warn("Failed to process " + hit, e);
}
return null;
});
resultBuf.append(count).append(" docs").append("\n");
} catch (final Exception e) {
logger.error("Could not update labels.", e);
resultBuf.append(e.getMessage()).append("\n");
}
return resultBuf.toString();
}
public UpdateLabelJob query(final QueryBuilder queryBuilder) {
this.queryBuilder = queryBuilder;
return this;
}
}

View file

@ -18,3 +18,5 @@
{"name":"Ping Elasticsearch","target":"all","cronExpression":"* * * * *","scriptType":"groovy","scriptData":"return container.getComponent(\"pingEsJob\").execute();","jobLogging":false,"crawler":false,"available":true,"sortOrder":9,"createdBy":"system","createdTime":0,"updatedBy":"system","updatedTime":0}
{"index":{"_index":".fess_config.scheduled_job","_id":"score_booster"}}
{"name":"Score Updater","target":"all","cronExpression":"0 * * * *","scriptType":"groovy","scriptData":"return container.getComponent(\"scoreUpdater\").execute();","jobLogging":false,"crawler":false,"available":true,"sortOrder":10,"createdBy":"system","createdTime":0,"updatedBy":"system","updatedTime":0}
{"index":{"_index":".fess_config.scheduled_job","_id":"label_updater"}}
{"name":"Label Updater","target":"all","cronExpression":"","scriptType":"groovy","scriptData":"return container.getComponent(\"updateLabelJob\").execute();","jobLogging":false,"crawler":false,"available":true,"sortOrder":11,"createdBy":"system","createdTime":0,"updatedBy":"system","updatedTime":0}

View file

@ -20,6 +20,8 @@
</component>
<component name="purgeThumbnailJob" class="org.codelibs.fess.job.PurgeThumbnailJob" instance="prototype">
</component>
<component name="updateLabelJob" class="org.codelibs.fess.job.UpdateLabelJob" instance="prototype">
</component>
<component name="pingEsJob" class="org.codelibs.fess.job.PingEsJob" instance="prototype">
</component>
</components>