Remove debug print code
This commit is contained in:
parent
794af00bfb
commit
8cfb8b7a44
3 changed files with 10 additions and 43 deletions
|
@ -6,6 +6,16 @@ authors = ["Daoud Clarke <daoud.clarke@gmail.com>"]
|
|||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
pandas = "^1.3.4"
|
||||
zstandard = "^0.16.0"
|
||||
mmh3 = "^3.0.0"
|
||||
fastapi = "^0.70.1"
|
||||
uvicorn = "^0.16.0"
|
||||
|
||||
# [tool.poetry.dependencies.en_core_web_sm]
|
||||
# url= "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
# botocore = "^1.23.20"
|
||||
# boto3 = "^1.20.20"
|
||||
# ujson = "^4.3.0"
|
||||
|
@ -14,20 +24,10 @@ python = "^3.9"
|
|||
# beautifulsoup4 = "^4.10.0"
|
||||
# lxml = "^4.6.4"
|
||||
# jusText = "^3.0.0"
|
||||
pandas = "^1.3.4"
|
||||
# pyspark = "^3.2.0"
|
||||
# langdetect = "^1.0.9"
|
||||
zstandard = "^0.16.0"
|
||||
# spacy = "^3.2.1"
|
||||
mmh3 = "^3.0.0"
|
||||
fastapi = "^0.70.1"
|
||||
# Levenshtein = "^0.16.0"
|
||||
uvicorn = "^0.16.0"
|
||||
|
||||
# [tool.poetry.dependencies.en_core_web_sm]
|
||||
# url= "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
|
|
24
setup.cfg
24
setup.cfg
|
@ -1,24 +0,0 @@
|
|||
[metadata]
|
||||
name = tiny-search-engine-daoudc
|
||||
version = 0.0.1
|
||||
author = Daoud Clarke
|
||||
author_email = daoud.clarke@gmail.com
|
||||
description = Tiny Search Engine
|
||||
long_description = file: README.md
|
||||
long_description_content_type = text/markdown
|
||||
# url = https://github.com/pypa/sampleproject
|
||||
# project_urls =
|
||||
# Bug Tracker = https://github.com/pypa/sampleproject/issues
|
||||
# classifiers =
|
||||
# Programming Language :: Python :: 3
|
||||
# License :: OSI Approved :: MIT License
|
||||
# Operating System :: OS Independent
|
||||
|
||||
[options]
|
||||
package_dir =
|
||||
= src
|
||||
packages = find:
|
||||
python_requires = >=3.9
|
||||
|
||||
[options.packages.find]
|
||||
where = src
|
|
@ -63,16 +63,13 @@ def create(tiny_index: TinyIndex):
|
|||
last_match_char = match.span()[1]
|
||||
seen_matches.add(value)
|
||||
|
||||
# num_words = len(re.findall(r'\b\w+\b', result_string))
|
||||
total_possible_match_length = sum(len(x) for x in terms)
|
||||
score = (match_length + 1./last_match_char) / (total_possible_match_length + 1)
|
||||
# print("Score result", match_length, last_match_char, score, result.title)
|
||||
return score
|
||||
|
||||
def order_results(terms: list[str], results: list[Document]):
|
||||
results_and_scores = [(score_result(terms, result), result) for result in results]
|
||||
ordered_results = sorted(results_and_scores, key=itemgetter(0), reverse=True)
|
||||
# print("Ordered results", ordered_results)
|
||||
filtered_results = [result for score, result in ordered_results if score > SCORE_THRESHOLD]
|
||||
return filtered_results
|
||||
|
||||
|
@ -82,21 +79,15 @@ def create(tiny_index: TinyIndex):
|
|||
results = [item.title.replace("\n", "") + ' — ' +
|
||||
item.url.replace("\n", "") for item in ordered_results]
|
||||
if len(results) == 0:
|
||||
# print("No results")
|
||||
return []
|
||||
# print("Results", results)
|
||||
return [q, results]
|
||||
|
||||
# TODO: why does 'leek and potato soup' result not get returned for 'potato soup' query?
|
||||
def get_results(q):
|
||||
terms = [x.lower() for x in q.replace('.', ' ').split()]
|
||||
# completed = complete_term(terms[-1])
|
||||
# terms = terms[:-1] + [completed]
|
||||
pages = []
|
||||
seen_items = set()
|
||||
for term in terms:
|
||||
items = tiny_index.retrieve(term)
|
||||
print("Items", items)
|
||||
if items is not None:
|
||||
for item in items:
|
||||
if term in item.title.lower() or term in item.extract.lower():
|
||||
|
|
Loading…
Reference in a new issue