From ee5ca6bcf6bbe2d78f296227d6b94df175034345 Mon Sep 17 00:00:00 2001 From: Daoud Clarke Date: Sun, 27 Feb 2022 21:24:16 +0000 Subject: [PATCH] Experiment with score variations (best is simple weighted domain score) --- mwmbl/tinysearchengine/rank.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mwmbl/tinysearchengine/rank.py b/mwmbl/tinysearchengine/rank.py index d1b204a..bc54a7f 100644 --- a/mwmbl/tinysearchengine/rank.py +++ b/mwmbl/tinysearchengine/rank.py @@ -14,7 +14,7 @@ from mwmbl.tinysearchengine.indexer import TinyIndex, Document logger = getLogger(__name__) -SCORE_THRESHOLD = 0.25 +SCORE_THRESHOLD = 0.0 def _get_query_regex(terms, is_complete): @@ -49,9 +49,9 @@ def _score_result(terms, result: Document, is_complete: bool, max_score: float): total_possible_match_length = sum(len(x) for x in terms) match_score = (match_length + 1. / last_match_char) / (total_possible_match_length + 1) - # score = 0.1 * domain_score + 0.9 - # score = (0.01 + 0.99*match_score) * (0.01 + 0.99*(result.score / max_score)) - score = 0.1 * match_score + 0.9 * (result.score / max_score) + score = 0.01 * domain_score + 0.99 * match_score + # score = (0.1 + 0.9*match_score) * (0.1 + 0.9*(result.score / max_score)) + # score = 0.01 * match_score + 0.99 * (result.score / max_score) return score