fix #2832 Auto-switch search operator to OR when hit count is below threshold

This commit is contained in:
Shinsuke Sugaya 2024-07-22 15:09:52 +09:00
parent e0f2314c75
commit 9de34ffd1a
5 changed files with 57 additions and 2 deletions

View file

@ -250,6 +250,8 @@ public class Constants extends CoreLibConstants {
public static final Pattern LUCENE_RANGE_FIELD_RESERVED_PATTERN = Pattern.compile("([!\\(\\){}\\[\\]\"~\\\\:\\p{Zs}]|(&&)|(\\|\\|))");
public static final String DEFAULT_QUERY_OPERATOR = "fess.DefaultQueryOperator";
public static final String SEARCH_LOG_ACCESS_TYPE = "searchLogAccessType";
public static final String SEARCH_LOG_ACCESS_TYPE_JSON = "json";

View file

@ -43,6 +43,7 @@ import org.codelibs.fess.exception.InvalidQueryException;
import org.codelibs.fess.mylasta.action.FessUserBean;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.query.QueryFieldConfig;
import org.codelibs.fess.rank.fusion.RankFusionProcessor;
import org.codelibs.fess.util.BooleanFunction;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.QueryResponseList;
@ -160,7 +161,22 @@ public class SearchHelper {
protected List<Map<String, Object>> searchInternal(final String query, final SearchRequestParams params,
final OptionalThing<FessUserBean> userBean) {
return ComponentUtil.getRankFusionProcessor().search(query, params, userBean);
final RankFusionProcessor rankFusionProcessor = ComponentUtil.getRankFusionProcessor();
final List<Map<String, Object>> documentItems = rankFusionProcessor.search(query, params, userBean);
if (documentItems instanceof QueryResponseList queryResponseList) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
if (queryResponseList.getAllRecordCount() <= fessConfig.getQueryOrsearchMinHitCountAsInteger()) {
return LaRequestUtil.getOptionalRequest().map(request -> {
request.setAttribute(Constants.DEFAULT_QUERY_OPERATOR, "OR");
if (logger.isDebugEnabled()) {
logger.debug("The number of hits is {}<={}. Searching again with OR operator.",
queryResponseList.getAllRecordCount(), fessConfig.getQueryOrsearchMinHitCountAsInteger());
}
return rankFusionProcessor.search(query, params, userBean);
}).orElse(queryResponseList);
}
}
return documentItems;
}
public long scrollSearch(final SearchRequestParams params, final BooleanFunction<Map<String, Object>> cursor,

View file

@ -856,6 +856,9 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. true */
String QUERY_REPLACE_TERM_WITH_PREFIX_QUERY = "query.replace.term.with.prefix.query";
/** The key of the configuration. e.g. 0 */
String QUERY_ORSEARCH_MIN_HIT_COUNT = "query.orsearch.min.hit.count";
/** The key of the configuration. e.g. u0021u002Cu002Eu003Fu0589u061Fu06D4u0700u0701u0702u0964u104Au104Bu1362u1367u1368u166Eu1803u1809u203Cu203Du2047u2048u2049u3002uFE52uFE57uFF01uFF0EuFF1FuFF61 */
String QUERY_HIGHLIGHT_TERMINAL_CHARS = "query.highlight.terminal.chars";
@ -4400,6 +4403,21 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
*/
boolean isQueryReplaceTermWithPrefixQuery();
/**
* Get the value for the key 'query.orsearch.min.hit.count'. <br>
* The value is, e.g. 0 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getQueryOrsearchMinHitCount();
/**
* Get the value for the key 'query.orsearch.min.hit.count' as {@link Integer}. <br>
* The value is, e.g. 0 <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
* @throws NumberFormatException When the property is not integer.
*/
Integer getQueryOrsearchMinHitCountAsInteger();
/**
* Get the value for the key 'query.highlight.terminal.chars'. <br>
* The value is, e.g. u0021u002Cu002Eu003Fu0589u061Fu06D4u0700u0701u0702u0964u104Au104Bu1362u1367u1368u166Eu1803u1809u203Cu203Du2047u2048u2049u3002uFE52uFE57uFF01uFF0EuFF1FuFF61 <br>
@ -9121,6 +9139,14 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
return is(FessConfig.QUERY_REPLACE_TERM_WITH_PREFIX_QUERY);
}
public String getQueryOrsearchMinHitCount() {
return get(FessConfig.QUERY_ORSEARCH_MIN_HIT_COUNT);
}
public Integer getQueryOrsearchMinHitCountAsInteger() {
return getAsInteger(FessConfig.QUERY_ORSEARCH_MIN_HIT_COUNT);
}
public String getQueryHighlightTerminalChars() {
return get(FessConfig.QUERY_HIGHLIGHT_TERMINAL_CHARS);
}
@ -11183,6 +11209,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
defaultMap.put(FessConfig.QUERY_GEO_FIELDS, "location");
defaultMap.put(FessConfig.QUERY_BROWSER_LANG_PARAMETER_NAME, "browser_lang");
defaultMap.put(FessConfig.QUERY_REPLACE_TERM_WITH_PREFIX_QUERY, "true");
defaultMap.put(FessConfig.QUERY_ORSEARCH_MIN_HIT_COUNT, "0");
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_TERMINAL_CHARS,
"u0021u002Cu002Eu003Fu0589u061Fu06D4u0700u0701u0702u0964u104Au104Bu1362u1367u1368u166Eu1803u1809u203Cu203Du2047u2048u2049u3002uFE52uFE57uFF01uFF0EuFF1FuFF61");
defaultMap.put(FessConfig.QUERY_HIGHLIGHT_FRAGMENT_SIZE, "60");

View file

@ -29,6 +29,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.codelibs.fess.Constants;
import org.codelibs.fess.exception.QueryParseException;
import org.lastaflute.web.util.LaRequestUtil;
import jakarta.annotation.PostConstruct;
@ -58,7 +59,15 @@ public class QueryParser {
protected org.apache.lucene.queryparser.classic.QueryParser createQueryParser() {
final LuceneQueryParser parser = new LuceneQueryParser(defaultField, analyzer);
parser.setAllowLeadingWildcard(allowLeadingWildcard);
parser.setDefaultOperator(defaultOperator);
LaRequestUtil.getOptionalRequest().ifPresent(req -> {
if (req.getAttribute(Constants.DEFAULT_QUERY_OPERATOR) instanceof String op) {
parser.setDefaultOperator(Operator.valueOf(op));
} else {
parser.setDefaultOperator(defaultOperator);
}
}).orElse(() -> {
parser.setDefaultOperator(defaultOperator);
});
return parser;
}

View file

@ -443,6 +443,7 @@ query.track.total.hits=10000
query.geo.fields=location
query.browser.lang.parameter.name=browser_lang
query.replace.term.with.prefix.query=true
query.orsearch.min.hit.count=0
query.highlight.terminal.chars=u0021u002Cu002Eu003Fu0589u061Fu06D4u0700u0701u0702u0964u104Au104Bu1362u1367u1368u166Eu1803u1809u203Cu203Du2047u2048u2049u3002uFE52uFE57uFF01uFF0EuFF1FuFF61
query.highlight.fragment.size=60
query.highlight.number.of.fragments=2