Sfoglia il codice sorgente

Restrict to https and strip the prefix and / on the end

Daoud Clarke 2 anni fa
parent
commit
eda7870788
1 ha cambiato i file con 3 aggiunte e 1 eliminazioni
  1. 3 1
      mwmbl/tinysearchengine/rank.py

+ 3 - 1
mwmbl/tinysearchengine/rank.py

@@ -16,6 +16,7 @@ SCORE_THRESHOLD = 0.0
 LENGTH_PENALTY = 0.04
 MATCH_EXPONENT = 2
 DOMAIN_SCORE_SMOOTHING = 50
+HTTPS_STRING = 'https://'
 
 
 def _get_query_regex(terms, is_complete, is_url):
@@ -151,7 +152,8 @@ class Ranker:
     def complete(self, q: str):
         ordered_results, terms, completions = self.get_results(q)
         filtered_completions = [c for c in completions if c != terms[-1]]
-        urls = [item.url for item in ordered_results[:5] if all(term in item.url for term in terms)][:1]
+        urls = [item.url[len(HTTPS_STRING):].rstrip('/') for item in ordered_results[:5]
+                if item.url.startswith(HTTPS_STRING) and all(term in item.url for term in terms)][:1]
         completed = [' '.join(terms[:-1] + [t]) for t in filtered_completions]
         return [q, urls + completed]