Remove debug print code

This commit is contained in:
Daoud Clarke 2021-12-26 08:47:33 +00:00
parent 794af00bfb
commit 8cfb8b7a44
3 changed files with 10 additions and 43 deletions

View file

@ -6,6 +6,16 @@ authors = ["Daoud Clarke <daoud.clarke@gmail.com>"]
[tool.poetry.dependencies]
python = "^3.9"
pandas = "^1.3.4"
zstandard = "^0.16.0"
mmh3 = "^3.0.0"
fastapi = "^0.70.1"
uvicorn = "^0.16.0"
# [tool.poetry.dependencies.en_core_web_sm]
# url= "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
[tool.poetry.dev-dependencies]
# botocore = "^1.23.20"
# boto3 = "^1.20.20"
# ujson = "^4.3.0"
@ -14,20 +24,10 @@ python = "^3.9"
# beautifulsoup4 = "^4.10.0"
# lxml = "^4.6.4"
# jusText = "^3.0.0"
pandas = "^1.3.4"
# pyspark = "^3.2.0"
# langdetect = "^1.0.9"
zstandard = "^0.16.0"
# spacy = "^3.2.1"
mmh3 = "^3.0.0"
fastapi = "^0.70.1"
# Levenshtein = "^0.16.0"
uvicorn = "^0.16.0"
# [tool.poetry.dependencies.en_core_web_sm]
# url= "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
[tool.poetry.dev-dependencies]
[build-system]
requires = ["poetry-core>=1.0.0"]

View file

@ -1,24 +0,0 @@
[metadata]
name = tiny-search-engine-daoudc
version = 0.0.1
author = Daoud Clarke
author_email = daoud.clarke@gmail.com
description = Tiny Search Engine
long_description = file: README.md
long_description_content_type = text/markdown
# url = https://github.com/pypa/sampleproject
# project_urls =
# Bug Tracker = https://github.com/pypa/sampleproject/issues
# classifiers =
# Programming Language :: Python :: 3
# License :: OSI Approved :: MIT License
# Operating System :: OS Independent
[options]
package_dir =
= src
packages = find:
python_requires = >=3.9
[options.packages.find]
where = src

View file

@ -63,16 +63,13 @@ def create(tiny_index: TinyIndex):
last_match_char = match.span()[1]
seen_matches.add(value)
# num_words = len(re.findall(r'\b\w+\b', result_string))
total_possible_match_length = sum(len(x) for x in terms)
score = (match_length + 1./last_match_char) / (total_possible_match_length + 1)
# print("Score result", match_length, last_match_char, score, result.title)
return score
def order_results(terms: list[str], results: list[Document]):
results_and_scores = [(score_result(terms, result), result) for result in results]
ordered_results = sorted(results_and_scores, key=itemgetter(0), reverse=True)
# print("Ordered results", ordered_results)
filtered_results = [result for score, result in ordered_results if score > SCORE_THRESHOLD]
return filtered_results
@ -82,21 +79,15 @@ def create(tiny_index: TinyIndex):
results = [item.title.replace("\n", "") + '' +
item.url.replace("\n", "") for item in ordered_results]
if len(results) == 0:
# print("No results")
return []
# print("Results", results)
return [q, results]
# TODO: why does 'leek and potato soup' result not get returned for 'potato soup' query?
def get_results(q):
terms = [x.lower() for x in q.replace('.', ' ').split()]
# completed = complete_term(terms[-1])
# terms = terms[:-1] + [completed]
pages = []
seen_items = set()
for term in terms:
items = tiny_index.retrieve(term)
print("Items", items)
if items is not None:
for item in items:
if term in item.title.lower() or term in item.extract.lower():