瀏覽代碼

Limit number of chars used in query

Daoud Clarke 4 年之前
父節點
當前提交
0578f41a73
共有 3 個文件被更改,包括 8 次插入5 次删除
  1. 1 1
      create_app.py
  2. 3 3
      index.py
  3. 4 1
      performance.py

+ 1 - 1
create_app.py

@@ -23,7 +23,7 @@ def create(tiny_index: TinyIndex):
 
     def order_results(query, results: List[Document]):
         ordered_results = sorted(results, key=lambda result: Levenshtein.distance(query, result.title))
-        print("Order results", query, ordered_results, sep='\n')
+        # print("Order results", query, ordered_results, sep='\n')
         return ordered_results
 
     @app.get("/complete")

+ 3 - 3
index.py

@@ -70,7 +70,7 @@ class TinyIndexBase:
         page = self.get_page(index)
         if page is None:
             return []
-        print("REtrieve", self.index_path, page)
+        # print("REtrieve", self.index_path, page)
         return self.convert_items(page)
 
     def _get_key_page_index(self, key):
@@ -128,7 +128,7 @@ class TinyIndexer(TinyIndexBase):
     #             self._index_document(document, token)
 
     def index(self, key: str, value):
-        print("Index", value)
+        # print("Index", value)
         assert type(value) == self.item_type, f"Can only index the specified type" \
                                               f" ({self.item_type.__name__})"
         page_index = self._get_key_page_index(key)
@@ -136,7 +136,7 @@ class TinyIndexer(TinyIndexBase):
         if current_page is None:
             current_page = []
         value_tuple = astuple(value)
-        print("Value tuple", value_tuple)
+        # print("Value tuple", value_tuple)
         current_page.append(value_tuple)
         try:
             # print("Page", current_page)

+ 4 - 1
performance.py

@@ -20,6 +20,8 @@ TEST_NUM_PAGES = 1024
 TEST_DATA_PATH = os.path.join(DATA_DIR, 'test-urls.zstd')
 RECALL_AT_K = 3
 
+NUM_QUERY_CHARS = 10
+
 
 def get_test_pages():
     serializer = ZstdJsonSerializer()
@@ -40,7 +42,8 @@ def query_test():
     hits = 0
     count = 0
     for title, url in titles_and_urls:
-        result = client.get('/complete', params={'q': title})
+        query = title[:NUM_QUERY_CHARS]
+        result = client.get('/complete', params={'q': query})
         assert result.status_code == 200
         data = result.json()