Restrict to https and strip the prefix and / on the end

This commit is contained in:
Daoud Clarke 2022-08-11 22:23:14 +01:00
parent 23e47e963b
commit eda7870788

View file

@ -16,6 +16,7 @@ SCORE_THRESHOLD = 0.0
LENGTH_PENALTY = 0.04
MATCH_EXPONENT = 2
DOMAIN_SCORE_SMOOTHING = 50
HTTPS_STRING = 'https://'
def _get_query_regex(terms, is_complete, is_url):
@ -151,7 +152,8 @@ class Ranker:
def complete(self, q: str):
ordered_results, terms, completions = self.get_results(q)
filtered_completions = [c for c in completions if c != terms[-1]]
urls = [item.url for item in ordered_results[:5] if all(term in item.url for term in terms)][:1]
urls = [item.url[len(HTTPS_STRING):].rstrip('/') for item in ordered_results[:5]
if item.url.startswith(HTTPS_STRING) and all(term in item.url for term in terms)][:1]
completed = [' '.join(terms[:-1] + [t]) for t in filtered_completions]
return [q, urls + completed]