Browse Source

fix #2503 add fuzzy query

Shinsuke Sugaya 4 năm trước cách đây
mục cha
commit
cd534c65c4

+ 18 - 2
src/main/java/org/codelibs/fess/helper/QueryHelper.java

@@ -607,7 +607,7 @@ public class QueryHelper {
         } else if (Constants.DEFAULT_FIELD.equals(field)) {
         } else if (Constants.DEFAULT_FIELD.equals(field)) {
             context.addFieldLog(field, text);
             context.addFieldLog(field, text);
             context.addHighlightedQuery(text);
             context.addHighlightedQuery(text);
-            return buildDefaultQueryBuilder((f, b) -> buildMatchPhraseQuery(f, text).boost(b * boost));
+            return buildDefaultTermQueryBuilder(boost, text);
         } else if ("sort".equals(field)) {
         } else if ("sort".equals(field)) {
             split(text, ",").of(stream -> stream.filter(StringUtil::isNotBlank).forEach(t -> {
             split(text, ",").of(stream -> stream.filter(StringUtil::isNotBlank).forEach(t -> {
                 final String[] values = t.split("\\.");
                 final String[] values = t.split("\\.");
@@ -684,7 +684,23 @@ public class QueryHelper {
         return false;
         return false;
     }
     }
 
 
-    protected QueryBuilder buildDefaultQueryBuilder(final DefaultQueryBuilderFunction builder) {
+    protected QueryBuilder buildDefaultTermQueryBuilder(final float boost, final String text) {
+        final BoolQueryBuilder boolQuery = buildDefaultQueryBuilder((f, b) -> buildMatchPhraseQuery(f, text).boost(b * boost));
+        final FessConfig fessConfig = ComponentUtil.getFessConfig();
+        if (text.length() >= fessConfig.getQueryBoostFuzzyMinLengthAsInteger()) {
+            boolQuery.should(QueryBuilders.fuzzyQuery(fessConfig.getIndexFieldTitle(), text)
+                    .boost(fessConfig.getQueryBoostFuzzyTitleAsDecimal().floatValue())
+                    .fuzziness(Fuzziness.build(fessConfig.getQueryBoostFuzzyTitleFuzziness()))
+                    .maxExpansions(fessConfig.getQueryBoostFuzzyTitleExpansionsAsInteger()));
+            boolQuery.should(QueryBuilders.fuzzyQuery(fessConfig.getIndexFieldContent(), text)
+                    .boost(fessConfig.getQueryBoostFuzzyContentAsDecimal().floatValue())
+                    .fuzziness(Fuzziness.build(fessConfig.getQueryBoostFuzzyContentFuzziness()))
+                    .maxExpansions(fessConfig.getQueryBoostFuzzyContentExpansionsAsInteger()));
+        }
+        return boolQuery;
+    }
+
+    protected BoolQueryBuilder buildDefaultQueryBuilder(final DefaultQueryBuilderFunction builder) {
         final BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
         final BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
         final FessConfig fessConfig = ComponentUtil.getFessConfig();
         final FessConfig fessConfig = ComponentUtil.getFessConfig();
         boolQuery.should(builder.apply(fessConfig.getIndexFieldTitle(), fessConfig.getQueryBoostTitleAsDecimal().floatValue()));
         boolQuery.should(builder.apply(fessConfig.getIndexFieldTitle(), fessConfig.getQueryBoostTitleAsDecimal().floatValue()));

+ 165 - 0
src/main/java/org/codelibs/fess/mylasta/direction/FessConfig.java

@@ -944,6 +944,27 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
     /** The key of the configuration. e.g. -1.0 */
     /** The key of the configuration. e.g. -1.0 */
     String QUERY_BOOST_important_content_LANG = "query.boost.important_content.lang";
     String QUERY_BOOST_important_content_LANG = "query.boost.important_content.lang";
 
 
+    /** The key of the configuration. e.g. 4 */
+    String QUERY_BOOST_FUZZY_MIN_LENGTH = "query.boost.fuzzy.min.length";
+
+    /** The key of the configuration. e.g. 0.01 */
+    String QUERY_BOOST_FUZZY_TITLE = "query.boost.fuzzy.title";
+
+    /** The key of the configuration. e.g. AUTO */
+    String QUERY_BOOST_FUZZY_TITLE_FUZZINESS = "query.boost.fuzzy.title.fuzziness";
+
+    /** The key of the configuration. e.g. 10 */
+    String QUERY_BOOST_FUZZY_TITLE_EXPANSIONS = "query.boost.fuzzy.title.expansions";
+
+    /** The key of the configuration. e.g. 0.005 */
+    String QUERY_BOOST_FUZZY_CONTENT = "query.boost.fuzzy.content";
+
+    /** The key of the configuration. e.g. AUTO */
+    String QUERY_BOOST_FUZZY_CONTENT_FUZZINESS = "query.boost.fuzzy.content.fuzziness";
+
+    /** The key of the configuration. e.g. 10 */
+    String QUERY_BOOST_FUZZY_CONTENT_EXPANSIONS = "query.boost.fuzzy.content.expansions";
+
     /** The key of the configuration. e.g. label */
     /** The key of the configuration. e.g. label */
     String QUERY_FACET_FIELDS = "query.facet.fields";
     String QUERY_FACET_FIELDS = "query.facet.fields";
 
 
@@ -4357,6 +4378,95 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
      */
      */
     java.math.BigDecimal getQueryBoostImportantContentLangAsDecimal();
     java.math.BigDecimal getQueryBoostImportantContentLangAsDecimal();
 
 
+    /**
+     * Get the value for the key 'query.boost.fuzzy.min.length'. <br>
+     * The value is, e.g. 4 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getQueryBoostFuzzyMinLength();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.min.length' as {@link Integer}. <br>
+     * The value is, e.g. 4 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getQueryBoostFuzzyMinLengthAsInteger();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.title'. <br>
+     * The value is, e.g. 0.01 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getQueryBoostFuzzyTitle();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.title' as {@link java.math.BigDecimal}. <br>
+     * The value is, e.g. 0.01 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not decimal.
+     */
+    java.math.BigDecimal getQueryBoostFuzzyTitleAsDecimal();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.title.fuzziness'. <br>
+     * The value is, e.g. AUTO <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getQueryBoostFuzzyTitleFuzziness();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.title.expansions'. <br>
+     * The value is, e.g. 10 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getQueryBoostFuzzyTitleExpansions();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.title.expansions' as {@link Integer}. <br>
+     * The value is, e.g. 10 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getQueryBoostFuzzyTitleExpansionsAsInteger();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.content'. <br>
+     * The value is, e.g. 0.005 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getQueryBoostFuzzyContent();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.content' as {@link java.math.BigDecimal}. <br>
+     * The value is, e.g. 0.005 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not decimal.
+     */
+    java.math.BigDecimal getQueryBoostFuzzyContentAsDecimal();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.content.fuzziness'. <br>
+     * The value is, e.g. AUTO <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getQueryBoostFuzzyContentFuzziness();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.content.expansions'. <br>
+     * The value is, e.g. 10 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     */
+    String getQueryBoostFuzzyContentExpansions();
+
+    /**
+     * Get the value for the key 'query.boost.fuzzy.content.expansions' as {@link Integer}. <br>
+     * The value is, e.g. 10 <br>
+     * @return The value of found property. (NotNull: if not found, exception but basically no way)
+     * @throws NumberFormatException When the property is not integer.
+     */
+    Integer getQueryBoostFuzzyContentExpansionsAsInteger();
+
     /**
     /**
      * Get the value for the key 'query.facet.fields'. <br>
      * Get the value for the key 'query.facet.fields'. <br>
      * The value is, e.g. label <br>
      * The value is, e.g. label <br>
@@ -8039,6 +8149,54 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             return getAsDecimal(FessConfig.QUERY_BOOST_important_content_LANG);
             return getAsDecimal(FessConfig.QUERY_BOOST_important_content_LANG);
         }
         }
 
 
+        public String getQueryBoostFuzzyMinLength() {
+            return get(FessConfig.QUERY_BOOST_FUZZY_MIN_LENGTH);
+        }
+
+        public Integer getQueryBoostFuzzyMinLengthAsInteger() {
+            return getAsInteger(FessConfig.QUERY_BOOST_FUZZY_MIN_LENGTH);
+        }
+
+        public String getQueryBoostFuzzyTitle() {
+            return get(FessConfig.QUERY_BOOST_FUZZY_TITLE);
+        }
+
+        public java.math.BigDecimal getQueryBoostFuzzyTitleAsDecimal() {
+            return getAsDecimal(FessConfig.QUERY_BOOST_FUZZY_TITLE);
+        }
+
+        public String getQueryBoostFuzzyTitleFuzziness() {
+            return get(FessConfig.QUERY_BOOST_FUZZY_TITLE_FUZZINESS);
+        }
+
+        public String getQueryBoostFuzzyTitleExpansions() {
+            return get(FessConfig.QUERY_BOOST_FUZZY_TITLE_EXPANSIONS);
+        }
+
+        public Integer getQueryBoostFuzzyTitleExpansionsAsInteger() {
+            return getAsInteger(FessConfig.QUERY_BOOST_FUZZY_TITLE_EXPANSIONS);
+        }
+
+        public String getQueryBoostFuzzyContent() {
+            return get(FessConfig.QUERY_BOOST_FUZZY_CONTENT);
+        }
+
+        public java.math.BigDecimal getQueryBoostFuzzyContentAsDecimal() {
+            return getAsDecimal(FessConfig.QUERY_BOOST_FUZZY_CONTENT);
+        }
+
+        public String getQueryBoostFuzzyContentFuzziness() {
+            return get(FessConfig.QUERY_BOOST_FUZZY_CONTENT_FUZZINESS);
+        }
+
+        public String getQueryBoostFuzzyContentExpansions() {
+            return get(FessConfig.QUERY_BOOST_FUZZY_CONTENT_EXPANSIONS);
+        }
+
+        public Integer getQueryBoostFuzzyContentExpansionsAsInteger() {
+            return getAsInteger(FessConfig.QUERY_BOOST_FUZZY_CONTENT_EXPANSIONS);
+        }
+
         public String getQueryFacetFields() {
         public String getQueryFacetFields() {
             return get(FessConfig.QUERY_FACET_FIELDS);
             return get(FessConfig.QUERY_FACET_FIELDS);
         }
         }
@@ -9538,6 +9696,13 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
             defaultMap.put(FessConfig.QUERY_BOOST_CONTENT_LANG, "0.1");
             defaultMap.put(FessConfig.QUERY_BOOST_CONTENT_LANG, "0.1");
             defaultMap.put(FessConfig.QUERY_BOOST_important_content, "-1.0");
             defaultMap.put(FessConfig.QUERY_BOOST_important_content, "-1.0");
             defaultMap.put(FessConfig.QUERY_BOOST_important_content_LANG, "-1.0");
             defaultMap.put(FessConfig.QUERY_BOOST_important_content_LANG, "-1.0");
+            defaultMap.put(FessConfig.QUERY_BOOST_FUZZY_MIN_LENGTH, "4");
+            defaultMap.put(FessConfig.QUERY_BOOST_FUZZY_TITLE, "0.01");
+            defaultMap.put(FessConfig.QUERY_BOOST_FUZZY_TITLE_FUZZINESS, "AUTO");
+            defaultMap.put(FessConfig.QUERY_BOOST_FUZZY_TITLE_EXPANSIONS, "10");
+            defaultMap.put(FessConfig.QUERY_BOOST_FUZZY_CONTENT, "0.005");
+            defaultMap.put(FessConfig.QUERY_BOOST_FUZZY_CONTENT_FUZZINESS, "AUTO");
+            defaultMap.put(FessConfig.QUERY_BOOST_FUZZY_CONTENT_EXPANSIONS, "10");
             defaultMap.put(FessConfig.QUERY_FACET_FIELDS, "label");
             defaultMap.put(FessConfig.QUERY_FACET_FIELDS, "label");
             defaultMap.put(FessConfig.QUERY_FACET_FIELDS_SIZE, "100");
             defaultMap.put(FessConfig.QUERY_FACET_FIELDS_SIZE, "100");
             defaultMap.put(FessConfig.QUERY_FACET_FIELDS_min_doc_count, "1");
             defaultMap.put(FessConfig.QUERY_FACET_FIELDS_min_doc_count, "1");

+ 7 - 0
src/main/resources/fess_config.properties

@@ -512,6 +512,13 @@ query.boost.content=0.05
 query.boost.content.lang=0.1
 query.boost.content.lang=0.1
 query.boost.important_content=-1.0
 query.boost.important_content=-1.0
 query.boost.important_content.lang=-1.0
 query.boost.important_content.lang=-1.0
+query.boost.fuzzy.min.length=4
+query.boost.fuzzy.title=0.01
+query.boost.fuzzy.title.fuzziness=AUTO
+query.boost.fuzzy.title.expansions=10
+query.boost.fuzzy.content=0.005
+query.boost.fuzzy.content.fuzziness=AUTO
+query.boost.fuzzy.content.expansions=10
 
 
 # facet
 # facet
 query.facet.fields=label
 query.facet.fields=label

+ 8 - 4
src/test/java/org/codelibs/fess/helper/QueryHelperTest.java

@@ -61,8 +61,8 @@ public class QueryHelperTest extends UnitFessTestCase {
     }
     }
 
 
     public void test_build() {
     public void test_build() {
-        float titleBoost = 0.01f;
-        float contentBoost = 0.005f;
+        float titleBoost = 0.5f;
+        float contentBoost = 0.05f;
 
 
         assertQuery(functionScoreQuery(simpleQuery("QUERY", titleBoost, contentBoost)), buildQuery("QUERY"));
         assertQuery(functionScoreQuery(simpleQuery("QUERY", titleBoost, contentBoost)), buildQuery("QUERY"));
         assertQuery(functionScoreQuery(simpleQuery("QUERY", titleBoost, contentBoost)), buildQuery(" QUERY"));
         assertQuery(functionScoreQuery(simpleQuery("QUERY", titleBoost, contentBoost)), buildQuery(" QUERY"));
@@ -163,8 +163,12 @@ public class QueryHelperTest extends UnitFessTestCase {
     }
     }
 
 
     private QueryBuilder simpleQuery(String query, float titleBoost, float contentBoost) {
     private QueryBuilder simpleQuery(String query, float titleBoost, float contentBoost) {
-        return QueryBuilders.boolQuery().should(QueryBuilders.matchPhraseQuery("title", query).boost(titleBoost))
-                .should(QueryBuilders.matchPhraseQuery("content", query).boost(contentBoost));
+        return QueryBuilders.boolQuery()//
+                .should(QueryBuilders.matchPhraseQuery("title", query).boost(titleBoost))//
+                .should(QueryBuilders.matchPhraseQuery("content", query).boost(contentBoost))//
+                .should(QueryBuilders.fuzzyQuery("title", query).boost(0.01f).maxExpansions(10))//
+                .should(QueryBuilders.fuzzyQuery("content", query).boost(0.005f).maxExpansions(10))//
+        ;
     }
     }
 
 
     private QueryBuilder functionScoreQuery(QueryBuilder queryBuilder) {
     private QueryBuilder functionScoreQuery(QueryBuilder queryBuilder) {