From 0578f41a7343cf66c111708957addba12294989a Mon Sep 17 00:00:00 2001 From: Daoud Clarke Date: Fri, 11 Jun 2021 21:43:12 +0100 Subject: [PATCH] Limit number of chars used in query --- create_app.py | 2 +- index.py | 6 +++--- performance.py | 5 ++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/create_app.py b/create_app.py index 0e256ce..625228b 100644 --- a/create_app.py +++ b/create_app.py @@ -23,7 +23,7 @@ def create(tiny_index: TinyIndex): def order_results(query, results: List[Document]): ordered_results = sorted(results, key=lambda result: Levenshtein.distance(query, result.title)) - print("Order results", query, ordered_results, sep='\n') + # print("Order results", query, ordered_results, sep='\n') return ordered_results @app.get("/complete") diff --git a/index.py b/index.py index b1ef6ef..9f02694 100644 --- a/index.py +++ b/index.py @@ -70,7 +70,7 @@ class TinyIndexBase: page = self.get_page(index) if page is None: return [] - print("REtrieve", self.index_path, page) + # print("REtrieve", self.index_path, page) return self.convert_items(page) def _get_key_page_index(self, key): @@ -128,7 +128,7 @@ class TinyIndexer(TinyIndexBase): # self._index_document(document, token) def index(self, key: str, value): - print("Index", value) + # print("Index", value) assert type(value) == self.item_type, f"Can only index the specified type" \ f" ({self.item_type.__name__})" page_index = self._get_key_page_index(key) @@ -136,7 +136,7 @@ class TinyIndexer(TinyIndexBase): if current_page is None: current_page = [] value_tuple = astuple(value) - print("Value tuple", value_tuple) + # print("Value tuple", value_tuple) current_page.append(value_tuple) try: # print("Page", current_page) diff --git a/performance.py b/performance.py index 9f03b88..d1463cd 100644 --- a/performance.py +++ b/performance.py @@ -20,6 +20,8 @@ TEST_NUM_PAGES = 1024 TEST_DATA_PATH = os.path.join(DATA_DIR, 'test-urls.zstd') RECALL_AT_K = 3 +NUM_QUERY_CHARS = 10 + def get_test_pages(): serializer = ZstdJsonSerializer() @@ -40,7 +42,8 @@ def query_test(): hits = 0 count = 0 for title, url in titles_and_urls: - result = client.get('/complete', params={'q': title}) + query = title[:NUM_QUERY_CHARS] + result = client.get('/complete', params={'q': query}) assert result.status_code == 200 data = result.json()