Explorar o código

Restrict to https and strip the prefix and / on the end

Daoud Clarke %!s(int64=2) %!d(string=hai) anos
pai
achega
eda7870788
Modificáronse 1 ficheiros con 3 adicións e 1 borrados
  1. 3 1
      mwmbl/tinysearchengine/rank.py

+ 3 - 1
mwmbl/tinysearchengine/rank.py

@@ -16,6 +16,7 @@ SCORE_THRESHOLD = 0.0
 LENGTH_PENALTY = 0.04
 LENGTH_PENALTY = 0.04
 MATCH_EXPONENT = 2
 MATCH_EXPONENT = 2
 DOMAIN_SCORE_SMOOTHING = 50
 DOMAIN_SCORE_SMOOTHING = 50
+HTTPS_STRING = 'https://'
 
 
 
 
 def _get_query_regex(terms, is_complete, is_url):
 def _get_query_regex(terms, is_complete, is_url):
@@ -151,7 +152,8 @@ class Ranker:
     def complete(self, q: str):
     def complete(self, q: str):
         ordered_results, terms, completions = self.get_results(q)
         ordered_results, terms, completions = self.get_results(q)
         filtered_completions = [c for c in completions if c != terms[-1]]
         filtered_completions = [c for c in completions if c != terms[-1]]
-        urls = [item.url for item in ordered_results[:5] if all(term in item.url for term in terms)][:1]
+        urls = [item.url[len(HTTPS_STRING):].rstrip('/') for item in ordered_results[:5]
+                if item.url.startswith(HTTPS_STRING) and all(term in item.url for term in terms)][:1]
         completed = [' '.join(terms[:-1] + [t]) for t in filtered_completions]
         completed = [' '.join(terms[:-1] + [t]) for t in filtered_completions]
         return [q, urls + completed]
         return [q, urls + completed]