Do not add fuzzy query for terms with double-quotes

By default, for single-phrase Term Queries, Fess adds fuzzy query
(with a low boost value) to the default query builder, causing results with
similar phrases to appear as well.
(For example, searching "plawright" without quotes will also display results
 with "playwright")

However, there are no mechanisms for the user to indicates that they only want
results with their specified term (as in, without fuzzy query results)

This patch allows users to ignore fuzzy results by surrounding the exact-search
terms with double quotes, or by using phrase search in Advanced search page
This commit is contained in:
hieu1.hoangtrung 2023-10-20 17:11:27 +07:00
parent 2bc92694bc
commit de2809286b
2 changed files with 14 additions and 1 deletions

View file

@ -164,7 +164,12 @@ public class TermQueryCommand extends QueryCommand {
final BoolQueryBuilder boolQuery =
buildDefaultQueryBuilder(fessConfig, context, (f, b) -> buildMatchPhraseQuery(f, text).boost(b * boost));
final Integer fuzzyMinLength = fessConfig.getQueryBoostFuzzyMinLengthAsInteger();
if (fuzzyMinLength >= 0 && text.length() >= fuzzyMinLength) {
// Do not add fuzzy queries if the single-word query is surrounded by double quotes
final String exactWordMatchLookupRegex = ".*\"" + text + "\".*";
final boolean queryIsExactMatch = context.getQueryString().matches(exactWordMatchLookupRegex);
if (!queryIsExactMatch && fuzzyMinLength >= 0 && text.length() >= fuzzyMinLength) {
boolQuery.should(QueryBuilders.fuzzyQuery(fessConfig.getIndexFieldTitle(), text)
.boost(fessConfig.getQueryBoostFuzzyTitleAsDecimal().floatValue())
.prefixLength(fessConfig.getQueryBoostFuzzyTitlePrefixLengthAsInteger())

View file

@ -83,6 +83,14 @@ public class TermQueryCommandTest extends UnitFessTestCase {
"{\"prefix\":{\"site\":{\"value\":\"aaa\",\"boost\":1.0}}}", //
"site:aaa");
// assertion for fuzzy search
assertQueryBuilder(BoolQueryBuilder.class,
"{\"bool\":{\"should\":[{\"match_phrase\":{\"title\":{\"query\":\"helloworld\",\"slop\":0,\"zero_terms_query\":\"NONE\",\"boost\":0.5}}},{\"match_phrase\":{\"content\":{\"query\":\"helloworld\",\"slop\":0,\"zero_terms_query\":\"NONE\",\"boost\":0.05}}},{\"fuzzy\":{\"title\":{\"value\":\"helloworld\",\"fuzziness\":\"AUTO\",\"prefix_length\":0,\"max_expansions\":10,\"transpositions\":true,\"boost\":0.01}}},{\"fuzzy\":{\"content\":{\"value\":\"helloworld\",\"fuzziness\":\"AUTO\",\"prefix_length\":0,\"max_expansions\":10,\"transpositions\":true,\"boost\":0.005}}}],\"adjust_pure_negative\":true,\"boost\":1.0}}",
"helloworld");
assertQueryBuilder(BoolQueryBuilder.class,
"{\"bool\":{\"should\":[{\"match_phrase\":{\"title\":{\"query\":\"helloworld\",\"slop\":0,\"zero_terms_query\":\"NONE\",\"boost\":0.5}}},{\"match_phrase\":{\"content\":{\"query\":\"helloworld\",\"slop\":0,\"zero_terms_query\":\"NONE\",\"boost\":0.05}}}],\"adjust_pure_negative\":true,\"boost\":1.0}}",
"\"helloworld\"");
assertQueryBuilder("{\"timestamp\":{\"order\":\"asc\"}}", "sort:timestamp");
assertQueryBuilder("{\"timestamp\":{\"order\":\"asc\"}}", "sort:timestamp.asc");
assertQueryBuilder("{\"timestamp\":{\"order\":\"desc\"}}", "sort:timestamp.desc");