improve keymatch

This commit is contained in:
Shinsuke Sugaya 2015-11-19 20:07:02 +09:00
parent f476d87d6b
commit a212c1eef3
9 changed files with 125 additions and 83 deletions

View file

@ -330,4 +330,6 @@ public class Constants extends CoreLibConstants {
public static final String ES_API_ACCESS_TOKEN = "esApiAccessToken";
public static final String ADMIN_PACKAGE = "org.codelibs.fess.app.web.admin";
public static final String DEFAULT_FIELD = "_default";
}

View file

@ -27,6 +27,7 @@ import org.codelibs.fess.app.pager.KeyMatchPager;
import org.codelibs.fess.es.config.cbean.KeyMatchCB;
import org.codelibs.fess.es.config.exbhv.KeyMatchBhv;
import org.codelibs.fess.es.config.exentity.KeyMatch;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.dbflute.cbean.result.PagingResultBean;
import org.dbflute.optional.OptionalEntity;
@ -37,9 +38,8 @@ public class KeyMatchService implements Serializable {
@Resource
protected KeyMatchBhv keyMatchBhv;
public KeyMatchService() {
super();
}
@Resource
protected FessConfig fessConfig;
public List<KeyMatch> getKeyMatchList(final KeyMatchPager keyMatchPager) {
@ -113,6 +113,7 @@ public class KeyMatchService implements Serializable {
public List<KeyMatch> getAvailableKeyMatchList() {
return keyMatchBhv.selectList(cb -> {
cb.query().matchAll();
cb.fetchFirst(fessConfig.getPageKeymatchMaxFetchSizeAsInteger());
});
}

View file

@ -16,6 +16,7 @@
package org.codelibs.fess.entity;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@ -44,6 +45,8 @@ public class QueryContext {
private Map<String, List<String>> fieldLogMap = null;
private boolean disableRoleQuery = false;
@SuppressWarnings("unchecked")
public QueryContext(final String queryString, final boolean isQuery) {
this.queryString = queryString;
@ -114,6 +117,13 @@ public class QueryContext {
list.add(text);
}
public List<String> getDefaultKeyword() {
if (fieldLogMap != null) {
return fieldLogMap.getOrDefault(Constants.DEFAULT_FIELD, Collections.emptyList());
}
return Collections.emptyList();
}
public void addHighlightedQuery(String text) {
if (highlightedQuerySet != null) {
highlightedQuerySet.add(text);
@ -124,4 +134,11 @@ public class QueryContext {
return queryString;
}
public boolean roleQueryEnabled() {
return !disableRoleQuery;
}
public void skipRoleQuery() {
disableRoleQuery = true;
}
}

View file

@ -760,6 +760,7 @@ public class FessEsClient implements Client {
private int size = Constants.DEFAULT_PAGE_SIZE;
private GeoInfo geoInfo;
private FacetInfo facetInfo;
private boolean administrativeAccess = false;
public static SearchConditionBuilder builder(final SearchRequestBuilder searchRequestBuilder) {
return new SearchConditionBuilder(searchRequestBuilder);
@ -775,6 +776,7 @@ public class FessEsClient implements Client {
}
public SearchConditionBuilder administrativeAccess() {
administrativeAccess = true;
return this;
}
@ -815,6 +817,9 @@ public class FessEsClient implements Client {
}
final QueryContext queryContext = queryHelper.build(query, context -> {
if (administrativeAccess) {
context.skipRoleQuery();
}
// geo
if (geoInfo != null && geoInfo.isAvailable()) {
context.addQuery(boolQuery -> {

View file

@ -15,27 +15,32 @@
*/
package org.codelibs.fess.helper;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.misc.Pair;
import org.codelibs.fess.app.service.KeyMatchService;
import org.codelibs.fess.es.client.FessEsClient;
import org.codelibs.fess.es.client.FessEsClient.SearchConditionBuilder;
import org.codelibs.fess.es.config.exentity.KeyMatch;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.DocumentUtil;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import org.lastaflute.di.core.SingletonLaContainer;
public class KeyMatchHelper {
protected volatile Map<String, String[]> keyMatchQueryMap = Collections.emptyMap();
protected ThreadLocal<List<String>> searchWordList = new ThreadLocal<>();
protected volatile Map<String, Pair<QueryBuilder, ScoreFunctionBuilder>> keyMatchQueryMap = Collections.emptyMap();
protected long reloadInterval = 1000L;
@ -51,29 +56,32 @@ public class KeyMatchHelper {
protected void reload(final long interval) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final KeyMatchService keyMatchService = SingletonLaContainer.getComponent(KeyMatchService.class);
final List<KeyMatch> list = keyMatchService.getAvailableKeyMatchList();
final Map<String, String[]> keyMatchQueryMap = new HashMap<String, String[]>(list.size());
for (final KeyMatch keyMatch : list) {
final List<Map<String, Object>> documentList = getDocumentList(keyMatch);
final List<String> docIdList = new ArrayList<String>();
for (final Map<String, Object> map : documentList) {
final String docId = (String) map.get(fessConfig.getIndexFieldDocId());
if (StringUtil.isNotBlank(docId)) {
docIdList.add(fessConfig.getIndexFieldDocId() + ":" + docId + "^" + keyMatch.getBoost());
}
}
if (!docIdList.isEmpty()) {
keyMatchQueryMap.put(keyMatch.getTerm(), docIdList.toArray(new String[docIdList.size()]));
}
final Map<String, Pair<QueryBuilder, ScoreFunctionBuilder>> keyMatchQueryMap = new HashMap<>();
keyMatchService
.getAvailableKeyMatchList()
.stream()
.forEach(
keyMatch -> {
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
getDocumentList(keyMatch).stream().map(doc -> {
return DocumentUtil.getValue(doc, fessConfig.getIndexFieldDocId(), String.class);
}).forEach(docId -> {
boolQuery.should(QueryBuilders.termQuery(fessConfig.getIndexFieldDocId(), docId));
});
if (reloadInterval > 0) {
try {
Thread.sleep(reloadInterval);
} catch (final InterruptedException e) {
// ignore
}
}
}
if (boolQuery.hasClauses()) {
keyMatchQueryMap.put(toLowerCase(keyMatch.getTerm()),
new Pair<>(boolQuery, ScoreFunctionBuilders.weightFactorFunction(keyMatch.getBoost())));
}
if (reloadInterval > 0) {
try {
Thread.sleep(reloadInterval);
} catch (final InterruptedException e) {
// ignore
}
}
});
this.keyMatchQueryMap = keyMatchQueryMap;
}
@ -89,28 +97,6 @@ public class KeyMatchHelper {
return documentList;
}
public void clear() {
searchWordList.remove();
}
public void addSearchWord(final String word) {
final String[] values = keyMatchQueryMap.get(word);
if (values != null) {
List<String> list = searchWordList.get();
if (list == null) {
list = new ArrayList<>();
searchWordList.set(list);
}
for (final String value : values) {
list.add(value);
}
}
}
public List<String> getDocIdQueryList() {
return searchWordList.get();
}
public long getReloadInterval() {
return reloadInterval;
}
@ -118,4 +104,18 @@ public class KeyMatchHelper {
public void setReloadInterval(final long reloadInterval) {
this.reloadInterval = reloadInterval;
}
public void buildQuery(List<String> keywordList, FunctionScoreQueryBuilder functionScoreQuery) {
keywordList.stream().forEach(keyword -> {
Pair<QueryBuilder, ScoreFunctionBuilder> pair = keyMatchQueryMap.get(toLowerCase(keyword));
if (pair != null) {
functionScoreQuery.add(pair.getFirst(), pair.getSecond());
}
});
}
private String toLowerCase(String term) {
return term != null ? term.toLowerCase(Locale.ROOT) : term;
}
}

View file

@ -73,8 +73,6 @@ public class QueryHelper implements Serializable {
protected static final long serialVersionUID = 1L;
private static final String DEFAULT_FIELD = "_default";
protected static final String SCORE_FIELD = "score";
protected static final String INURL_FIELD = "inurl";
@ -224,22 +222,17 @@ public class QueryHelper implements Serializable {
final QueryContext queryContext = new QueryContext(q, true);
buildBaseQuery(queryContext, context);
buildBoostQuery(queryContext);
buildRoleQuery(queryContext);
if (keyMatchHelper != null) {
final List<String> docIdQueryList = keyMatchHelper.getDocIdQueryList();
if (docIdQueryList != null && !docIdQueryList.isEmpty()) {
queryContext.addQuery(boolQuery -> {
for (final String docIdQuery : docIdQueryList) {
// TODO id query?
boolQuery.should(QueryBuilders.queryStringQuery(docIdQuery));
}
});
}
if (!queryContext.hasSorts() && defaultSortBuilders != null) {
queryContext.addSorts(defaultSortBuilders);
}
return queryContext;
}
if (roleQueryHelper != null) {
protected void buildRoleQuery(final QueryContext queryContext) {
if (roleQueryHelper != null && queryContext.roleQueryEnabled()) {
final Set<String> roleSet = roleQueryHelper.build();
if (!roleSet.isEmpty()) {
queryContext.addQuery(boolQuery -> {
@ -251,16 +244,14 @@ public class QueryHelper implements Serializable {
});
}
}
if (!queryContext.hasSorts() && defaultSortBuilders != null) {
queryContext.addSorts(defaultSortBuilders);
}
return queryContext;
}
private void buildBoostQuery(final QueryContext queryContext) {
protected void buildBoostQuery(final QueryContext queryContext) {
queryContext.addFunctionScore(functionScoreQuery -> {
functionScoreQuery.add(ScoreFunctionBuilders.fieldValueFactorFunction(fessConfig.getIndexFieldBoost()));
if (keyMatchHelper != null) {
keyMatchHelper.buildQuery(queryContext.getDefaultKeyword(), functionScoreQuery);
}
});
}
@ -282,7 +273,7 @@ public class QueryHelper implements Serializable {
}
protected QueryParser getQueryParser() {
return new ExtendableQueryParser(DEFAULT_FIELD, new WhitespaceAnalyzer());
return new ExtendableQueryParser(Constants.DEFAULT_FIELD, new WhitespaceAnalyzer());
}
protected QueryBuilder convertQuery(final QueryContext context, final Query query) {
@ -329,7 +320,7 @@ public class QueryHelper implements Serializable {
protected QueryBuilder convertWildcardQuery(final QueryContext context, final WildcardQuery wildcardQuery) {
final String field = wildcardQuery.getField();
if (DEFAULT_FIELD.equals(field)) {
if (Constants.DEFAULT_FIELD.equals(field)) {
context.addFieldLog(field, wildcardQuery.getTerm().text());
return buildDefaultQueryBuilder(f -> QueryBuilders.wildcardQuery(f, wildcardQuery.getTerm().text()));
} else if (isSearchField(field)) {
@ -337,7 +328,7 @@ public class QueryHelper implements Serializable {
return QueryBuilders.wildcardQuery(field, wildcardQuery.getTerm().text()).boost(wildcardQuery.getBoost());
} else {
final String origQuery = wildcardQuery.getTerm().toString();
context.addFieldLog(DEFAULT_FIELD, origQuery);
context.addFieldLog(Constants.DEFAULT_FIELD, origQuery);
context.addHighlightedQuery(origQuery);
return buildDefaultQueryBuilder(f -> QueryBuilders.wildcardQuery(f, origQuery));
}
@ -345,7 +336,7 @@ public class QueryHelper implements Serializable {
protected QueryBuilder convertPrefixQuery(final QueryContext context, final PrefixQuery prefixQuery) {
final String field = prefixQuery.getField();
if (DEFAULT_FIELD.equals(field)) {
if (Constants.DEFAULT_FIELD.equals(field)) {
context.addFieldLog(field, prefixQuery.getPrefix().text());
return buildDefaultQueryBuilder(f -> QueryBuilders.prefixQuery(f, prefixQuery.getPrefix().text()));
} else if (isSearchField(field)) {
@ -353,7 +344,7 @@ public class QueryHelper implements Serializable {
return QueryBuilders.prefixQuery(field, prefixQuery.getPrefix().text()).boost(prefixQuery.getBoost());
} else {
final String origQuery = prefixQuery.getPrefix().toString();
context.addFieldLog(DEFAULT_FIELD, origQuery);
context.addFieldLog(Constants.DEFAULT_FIELD, origQuery);
context.addHighlightedQuery(origQuery);
return buildDefaultQueryBuilder(f -> QueryBuilders.prefixQuery(f, origQuery));
}
@ -363,7 +354,7 @@ public class QueryHelper implements Serializable {
final Term term = fuzzyQuery.getTerm();
final String field = term.field();
// TODO fuzzy value
if (DEFAULT_FIELD.equals(field)) {
if (Constants.DEFAULT_FIELD.equals(field)) {
context.addFieldLog(field, term.text());
return buildDefaultQueryBuilder(f -> QueryBuilders.fuzzyQuery(f, term.text()).fuzziness(
Fuzziness.fromEdits(fuzzyQuery.getMaxEdits())));
@ -373,7 +364,7 @@ public class QueryHelper implements Serializable {
.fuzziness(Fuzziness.fromEdits(fuzzyQuery.getMaxEdits()));
} else {
final String origQuery = fuzzyQuery.toString();
context.addFieldLog(DEFAULT_FIELD, origQuery);
context.addFieldLog(Constants.DEFAULT_FIELD, origQuery);
context.addHighlightedQuery(origQuery);
return buildDefaultQueryBuilder(f -> QueryBuilders.fuzzyQuery(f, origQuery).fuzziness(
Fuzziness.fromEdits(fuzzyQuery.getMaxEdits())));
@ -405,7 +396,7 @@ public class QueryHelper implements Serializable {
return rangeQuery;
} else {
final String origQuery = termRangeQuery.toString();
context.addFieldLog(DEFAULT_FIELD, origQuery);
context.addFieldLog(Constants.DEFAULT_FIELD, origQuery);
context.addHighlightedQuery(origQuery);
return buildDefaultQueryBuilder(f -> QueryBuilders.matchPhraseQuery(f, origQuery));
}
@ -414,7 +405,7 @@ public class QueryHelper implements Serializable {
protected QueryBuilder convertTermQuery(final QueryContext context, final TermQuery termQuery) {
final String field = termQuery.getTerm().field();
final String text = termQuery.getTerm().text();
if (DEFAULT_FIELD.equals(field)) {
if (Constants.DEFAULT_FIELD.equals(field)) {
context.addFieldLog(field, text);
context.addHighlightedQuery(text);
return buildDefaultQueryBuilder(f -> QueryBuilders.matchPhraseQuery(f, text));
@ -449,7 +440,7 @@ public class QueryHelper implements Serializable {
return QueryBuilders.matchPhraseQuery(field, text).boost(termQuery.getBoost());
} else {
final String origQuery = termQuery.toString();
context.addFieldLog(DEFAULT_FIELD, origQuery);
context.addFieldLog(Constants.DEFAULT_FIELD, origQuery);
context.addHighlightedQuery(origQuery);
return buildDefaultQueryBuilder(f -> QueryBuilders.matchPhraseQuery(f, origQuery));
}

View file

@ -82,7 +82,6 @@ public class SuggestJob {
try {
executeSuggestCreater();
ComponentUtil.getKeyMatchHelper().update();
} catch (final FessSystemException e) {
throw e;
} catch (final Exception e) {

View file

@ -139,6 +139,9 @@ public interface FessConfig extends FessEnv {
/** The key of the configuration. e.g. 1000 */
String PAGE_DOCBOOST_MAX_FETCH_SIZE = "page.docboost.max.fetch.size";
/** The key of the configuration. e.g. 1000 */
String PAGE_KEYMATCH_MAX_FETCH_SIZE = "page.keymatch.max.fetch.size";
/** The key of the configuration. e.g. 1000 */
String PAGE_ROLE_MAX_FETCH_SIZE = "page.role.max.fetch.size";
@ -608,6 +611,21 @@ public interface FessConfig extends FessEnv {
*/
Integer getPageDocboostMaxFetchSizeAsInteger();
/**
* Get the value for the key 'page.keymatch.max.fetch.size'. <br>
* The value is, e.g. 1000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getPageKeymatchMaxFetchSize();
/**
* Get the value for the key 'page.keymatch.max.fetch.size' as {@link Integer}. <br>
* The value is, e.g. 1000 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getPageKeymatchMaxFetchSizeAsInteger();
/**
* Get the value for the key 'page.role.max.fetch.size'. <br>
* The value is, e.g. 1000 <br>
@ -1071,6 +1089,14 @@ public interface FessConfig extends FessEnv {
return getAsInteger(FessConfig.PAGE_DOCBOOST_MAX_FETCH_SIZE);
}
public String getPageKeymatchMaxFetchSize() {
return get(FessConfig.PAGE_KEYMATCH_MAX_FETCH_SIZE);
}
public Integer getPageKeymatchMaxFetchSizeAsInteger() {
return getAsInteger(FessConfig.PAGE_KEYMATCH_MAX_FETCH_SIZE);
}
public String getPageRoleMaxFetchSize() {
return get(FessConfig.PAGE_ROLE_MAX_FETCH_SIZE);
}

View file

@ -85,6 +85,7 @@ paging.page.range.fill.limit = true
# max page size
page.docboost.max.fetch.size=1000
page.keymatch.max.fetch.size=1000
page.role.max.fetch.size=1000
page.group.max.fetch.size=1000