diff --git a/app.py b/app.py index 0986fae..909030f 100644 --- a/app.py +++ b/app.py @@ -1,3 +1,5 @@ +import uvicorn + import create_app from index import TinyIndex, PAGE_SIZE, NUM_PAGES, Document @@ -5,3 +7,7 @@ from paths import INDEX_PATH tiny_index = TinyIndex(Document, INDEX_PATH, NUM_PAGES, PAGE_SIZE) app = create_app.create(tiny_index) + + +if __name__ == "__main__": + uvicorn.run("app:app", host="127.0.0.1", port=8000, log_level="info") diff --git a/create_app.py b/create_app.py index 625228b..1aafff8 100644 --- a/create_app.py +++ b/create_app.py @@ -1,10 +1,8 @@ -import sqlite3 -from functools import lru_cache from typing import List import Levenshtein from fastapi import FastAPI -from starlette.responses import RedirectResponse, FileResponse +from starlette.responses import RedirectResponse, FileResponse, HTMLResponse from starlette.staticfiles import StaticFiles from index import TinyIndex, Document @@ -15,11 +13,17 @@ def create(tiny_index: TinyIndex): @app.get("/search") def search(s: str): - if '—' in s: - url = s.split('—')[1].strip() - else: - url = f'https://www.google.com/search?q={s}' - return RedirectResponse(url) + results = get_results(s) + doc = "" + for result in results: + doc += f'
\n' + return HTMLResponse(doc) + + # if '—' in s: + # url = s.split('—')[1].strip() + # else: + # url = f'https://www.google.com/search?q={s}' + # return RedirectResponse(url) def order_results(query, results: List[Document]): ordered_results = sorted(results, key=lambda result: Levenshtein.distance(query, result.title)) @@ -28,18 +32,7 @@ def create(tiny_index: TinyIndex): @app.get("/complete") def complete(q: str): - terms = [x.lower() for x in q.replace('.', ' ').split()] - - # completed = complete_term(terms[-1]) - # terms = terms[:-1] + [completed] - - pages = [] - for term in terms: - items = tiny_index.retrieve(term) - if items is not None: - pages += [item for item in items if term in item.title.lower()] - - ordered_results = order_results(q, pages) + ordered_results = get_results(q) results = [item.title.replace("\n", "") + ' — ' + item.url.replace("\n", "") for item in ordered_results] if len(results) == 0: @@ -48,6 +41,18 @@ def create(tiny_index: TinyIndex): # print("Results", results) return [q, results] + def get_results(q): + terms = [x.lower() for x in q.replace('.', ' ').split()] + # completed = complete_term(terms[-1]) + # terms = terms[:-1] + [completed] + pages = [] + for term in terms: + items = tiny_index.retrieve(term) + if items is not None: + pages += [item for item in items if term in item.title.lower()] + ordered_results = order_results(q, pages) + return ordered_results + @app.get('/') def index(): return FileResponse('static/index.html') diff --git a/poetry.lock b/poetry.lock index b7b2398..afbf06c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,3 +1,31 @@ +[[package]] +name = "anyio" +version = "3.4.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"] +test = ["coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "pytest (>=6.0)", "pytest-mock (>=3.6.1)", "trustme", "contextlib2", "uvloop (<0.15)", "mock (>=4)", "uvloop (>=0.15)"] +trio = ["trio (>=0.16)"] + +[[package]] +name = "asgiref" +version = "3.4.1" +description = "ASGI specs, helper code, and adapters" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +tests = ["pytest", "pytest-asyncio", "mypy (>=0.800)"] + [[package]] name = "beautifulsoup4" version = "4.10.0" @@ -136,6 +164,32 @@ spacy = ">=3.2.0,<3.3.0" type = "url" url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl" +[[package]] +name = "fastapi" +version = "0.70.1" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +category = "main" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +pydantic = ">=1.6.2,<1.7 || >1.7,<1.7.1 || >1.7.1,<1.7.2 || >1.7.2,<1.7.3 || >1.7.3,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0" +starlette = "0.16.0" + +[package.extras] +all = ["requests (>=2.24.0,<3.0.0)", "jinja2 (>=2.11.2,<4.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "itsdangerous (>=1.1.0,<3.0.0)", "pyyaml (>=5.3.1,<6.0.0)", "ujson (>=4.0.1,<5.0.0)", "orjson (>=3.2.1,<4.0.0)", "email_validator (>=1.1.1,<2.0.0)", "uvicorn[standard] (>=0.12.0,<0.16.0)"] +dev = ["python-jose[cryptography] (>=3.3.0,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "autoflake (>=1.4.0,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "uvicorn[standard] (>=0.12.0,<0.16.0)"] +doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=7.1.9,<8.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "typer-cli (>=0.0.12,<0.0.13)", "pyyaml (>=5.3.1,<6.0.0)"] +test = ["pytest (>=6.2.4,<7.0.0)", "pytest-cov (>=2.12.0,<4.0.0)", "mypy (==0.910)", "flake8 (>=3.8.3,<4.0.0)", "black (==21.9b0)", "isort (>=5.0.6,<6.0.0)", "requests (>=2.24.0,<3.0.0)", "httpx (>=0.14.0,<0.19.0)", "email_validator (>=1.1.1,<2.0.0)", "sqlalchemy (>=1.3.18,<1.5.0)", "peewee (>=3.13.3,<4.0.0)", "databases[sqlite] (>=0.3.2,<0.6.0)", "orjson (>=3.2.1,<4.0.0)", "ujson (>=4.0.1,<5.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "flask (>=1.1.2,<3.0.0)", "anyio[trio] (>=3.2.1,<4.0.0)", "types-ujson (==0.1.1)", "types-orjson (==3.6.0)", "types-dataclasses (==0.1.7)"] + +[[package]] +name = "h11" +version = "0.12.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "idna" version = "3.3" @@ -199,6 +253,17 @@ python-versions = "*" [package.dependencies] six = "*" +[[package]] +name = "levenshtein" +version = "0.16.0" +description = "Python extension for computing string edit distances and similarities." +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +rapidfuzz = ">=1.8.2,<1.9" + [[package]] name = "lxml" version = "4.6.4" @@ -385,6 +450,17 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "rapidfuzz" +version = "1.8.3" +description = "rapid fuzzy string matching" +category = "main" +optional = false +python-versions = ">=2.7" + +[package.extras] +full = ["numpy"] + [[package]] name = "requests" version = "2.26.0" @@ -442,6 +518,14 @@ s3 = ["boto3"] test = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests", "moto[server] (==1.3.14)", "pathlib2", "responses", "paramiko", "parameterizedtestcase", "pytest", "pytest-rerunfailures"] webhdfs = ["requests"] +[[package]] +name = "sniffio" +version = "1.2.0" +description = "Sniff out which async library your code is running under" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "soupsieve" version = "2.3.1" @@ -531,6 +615,20 @@ python-versions = ">=3.6" [package.dependencies] catalogue = ">=2.0.3,<2.1.0" +[[package]] +name = "starlette" +version = "0.16.0" +description = "The little ASGI library that shines." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +anyio = ">=3.0.0,<4" + +[package.extras] +full = ["itsdangerous", "jinja2", "python-multipart", "pyyaml", "requests", "graphene"] + [[package]] name = "thinc" version = "8.0.13" @@ -631,6 +729,22 @@ brotli = ["brotlipy (>=0.6.0)"] secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +[[package]] +name = "uvicorn" +version = "0.16.0" +description = "The lightning-fast ASGI server." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +asgiref = ">=3.4.0" +click = ">=7.0" +h11 = ">=0.8" + +[package.extras] +standard = ["httptools (>=0.2.0,<0.4.0)", "watchgod (>=0.6)", "python-dotenv (>=0.13)", "PyYAML (>=5.1)", "websockets (>=9.1)", "websockets (>=10.0)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "colorama (>=0.4)"] + [[package]] name = "warcio" version = "1.7.4" @@ -667,9 +781,17 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "8e573b5968296b81e95cfe0308ad10a5a5e2f80e2a9020a2478d61ae751c4d0c" +content-hash = "0c25a6a61ebd393d094509d22979d4a08438fa4712d18f39801c6bca91895fc6" [metadata.files] +anyio = [ + {file = "anyio-3.4.0-py3-none-any.whl", hash = "sha256:2855a9423524abcdd652d942f8932fda1735210f77a6b392eafd9ff34d3fe020"}, + {file = "anyio-3.4.0.tar.gz", hash = "sha256:24adc69309fb5779bc1e06158e143e0b6d2c56b302a3ac3de3083c705a6ed39d"}, +] +asgiref = [ + {file = "asgiref-3.4.1-py3-none-any.whl", hash = "sha256:ffc141aa908e6f175673e7b1b3b7af4fdb0ecb738fc5c8b88f69f055c2415214"}, + {file = "asgiref-3.4.1.tar.gz", hash = "sha256:4ef1ab46b484e3c706329cedeff284a5d40824200638503f5768edb6de7d58e9"}, +] beautifulsoup4 = [ {file = "beautifulsoup4-4.10.0-py3-none-any.whl", hash = "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf"}, {file = "beautifulsoup4-4.10.0.tar.gz", hash = "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"}, @@ -791,6 +913,14 @@ cymem = [ {file = "cymem-2.0.6.tar.gz", hash = "sha256:169725b5816959d34de2545b33fee6a8021a6e08818794a426c5a4f981f17e5e"}, ] en-core-web-sm = [] +fastapi = [ + {file = "fastapi-0.70.1-py3-none-any.whl", hash = "sha256:5367226c7bcd7bfb2e17edaf225fd9a983095b1372281e9a3eb661336fb93748"}, + {file = "fastapi-0.70.1.tar.gz", hash = "sha256:21d03979b5336375c66fa5d1f3126c6beca650d5d2166fbb78345a30d33c8d06"}, +] +h11 = [ + {file = "h11-0.12.0-py3-none-any.whl", hash = "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6"}, + {file = "h11-0.12.0.tar.gz", hash = "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"}, +] idna = [ {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, @@ -815,6 +945,60 @@ langdetect = [ {file = "langdetect-1.0.9-py2-none-any.whl", hash = "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a"}, {file = "langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0"}, ] +levenshtein = [ + {file = "Levenshtein-0.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:02a977be78bc1ab6e58ba594e98ef8b5c27b7f301f3ac408cb12bcf23cc67fec"}, + {file = "Levenshtein-0.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:485cd2b940ae740c65b8b3964600f3d4bd64e9362fd01a90ee8105c7348595a1"}, + {file = "Levenshtein-0.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0780aec6f770e25cf280393e2f003dcca9d6e72487cdd3501e8d84957a429b6d"}, + {file = "Levenshtein-0.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:300336f879a15a635adb6c64626bdcbfb8b4167abd0f9664ebd124eace2890e8"}, + {file = "Levenshtein-0.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cb3d3fd721541ca561a58cdc3a8fc7b548e6d7f9c076bc7dc5f9881a7c5625c0"}, + {file = "Levenshtein-0.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e67a54e3cae2a9a8258bedb7fa7fde75ad5f58546820f3cf03f9546e8553847d"}, + {file = "Levenshtein-0.16.0-cp310-cp310-win32.whl", hash = "sha256:f5e1fbbae1e2744dd94471193a7adf1713bb75281de880d2a5caf06b954529a0"}, + {file = "Levenshtein-0.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:1c8c10e67b1e14636a21b94e5e8412b02684f9cda6e33d670be87730b256e091"}, + {file = "Levenshtein-0.16.0-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:a9d0f5f61d4cb55351aa92d2815b985e4f8fbdae257a08352e43d81ce68416be"}, + {file = "Levenshtein-0.16.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:b3130721ccf4817384f22ea335605dc49d65ee7c78d7dd33dc09fe55d3212f35"}, + {file = "Levenshtein-0.16.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:1cfdf50304b3d8454ce0bb662e3c7810997d599fc5f232ed3ad4effd4a3505b5"}, + {file = "Levenshtein-0.16.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:1f3ef34555a3f5fe92ad12073a241d6b921a44f03e2a50ee733926df56582a55"}, + {file = "Levenshtein-0.16.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:89d5e91722ca1efa0fa7fc5483d264df09237d472bda0a782667211bf9fdc5cd"}, + {file = "Levenshtein-0.16.0-cp35-cp35m-win32.whl", hash = "sha256:fdd09d21a5a4793c320a852a99e4484b716cf54beaf622004f9ebdbed673c591"}, + {file = "Levenshtein-0.16.0-cp35-cp35m-win_amd64.whl", hash = "sha256:9901ea03c7aec5ca497a16c91a4aa8cff4c5dd72eac9cf457e66cde307a3e91e"}, + {file = "Levenshtein-0.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d8cac37456ef4d1fa23b83958bfed2bf09f9228e62fa06ec24bb7f76da129e95"}, + {file = "Levenshtein-0.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cc997fd24cd67572d8ed719f306896477cafb0fd2d13ac3ff9d3cffa3be9803"}, + {file = "Levenshtein-0.16.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2fff380979dc2e7bda5f8a43b76263bdfb289371b38099b5927be0bace1b1fda"}, + {file = "Levenshtein-0.16.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc3b2ee8c7c33e23d18574ba95c2b7448e08c8ef4b6675a27cf9d1077fc4eda5"}, + {file = "Levenshtein-0.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d54286dad923ab2b3954794c9cb6a10e123ad67597a86d4d383fe753e8fee314"}, + {file = "Levenshtein-0.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b651c2336eab31dee2f2f422ea619e5d3b452ba4fe6b10aeea205935b02fb51b"}, + {file = "Levenshtein-0.16.0-cp36-cp36m-win32.whl", hash = "sha256:e79e0de22c1497f9de874394bb1c0b59bcbfd18a873e0de94188718ef8399155"}, + {file = "Levenshtein-0.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:e5beda373da7405468b66eccf58206767ce4c241cebb55b21a2c3261436c6646"}, + {file = "Levenshtein-0.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bac8762ccaa5cdb062310b49b21ec912beaf8968a4da3fd561cc3baa227754a8"}, + {file = "Levenshtein-0.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e7dac968360ec2ac9c903d249c67628f1aa66c6db20bd7ac9eb6fa820eada94"}, + {file = "Levenshtein-0.16.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6fff6bbe6b0333572e3a46dd0eb36e02b487605af143bb3946033b1d28d6c4d6"}, + {file = "Levenshtein-0.16.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4043399b08c0c334874c2e3d394ff282cabeb6c70e6ba76a0b2ba63b2c876dc8"}, + {file = "Levenshtein-0.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:022a4b5f7c8384beeaa693788c00a43e42ce5ee5ed7d41ec7d6248ce50b73830"}, + {file = "Levenshtein-0.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:36a664c6cc598ae3069f96cbfbc1ae2c530e86ccf5044d5de6178be21f1d5783"}, + {file = "Levenshtein-0.16.0-cp37-cp37m-win32.whl", hash = "sha256:3a4657a304397e90845953d6afb001b7357e426b2bd8c2006f59da2d590a96b5"}, + {file = "Levenshtein-0.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d0460d0f6de5f748f8344f03db2f333be96c5ff366f3ab3e2df9e63baa6916a4"}, + {file = "Levenshtein-0.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9b49f7723bd65caad02e42c0530125700463cd3d3ae91557e7afc4f6c4edeee4"}, + {file = "Levenshtein-0.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f40ea2504e41eaa4b94c35c91bef3f4579c9cfcf5de8a69126a6d9da899245c0"}, + {file = "Levenshtein-0.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47faac65d16b24792d30137f2ec82ecf01d788da9ae60918ae6743ef94f7e2a9"}, + {file = "Levenshtein-0.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dca8e805a24c08a98333cc5445eb78e00a9f093a0ce7d87c34b1ec5cbf0e14c"}, + {file = "Levenshtein-0.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a04e991b4a05c7d64939686554e6825e01478603505784ccb684b2f78c5ca26d"}, + {file = "Levenshtein-0.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fd3cd4d1fd751a8d862bbdbabfee0c5214c5ba32b6188458df7f73513da2718a"}, + {file = "Levenshtein-0.16.0-cp38-cp38-win32.whl", hash = "sha256:dedac87e0942276d73168b5ff7df142b00097301b7865d9b185a6ac152fdf15d"}, + {file = "Levenshtein-0.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:56700e5c3a1290ab2105bf95be994d2c56c9fd09cdf81e241a4470c4dc64a1c4"}, + {file = "Levenshtein-0.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5cf1bf0408e20020ccc134c1125f740814d37d14e3a59359c890a6665ac79e29"}, + {file = "Levenshtein-0.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b348844b191c6974cd61744223cb637c99d9854f2e9659b3b214134c6125915"}, + {file = "Levenshtein-0.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6fe0a68f117961c59bbe970f6000a528e755c7a945c46c21ab17e643fd25d502"}, + {file = "Levenshtein-0.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c706b1e89302f4292b37685da66fb19b9b69dbb8f1c786228a554317821bde93"}, + {file = "Levenshtein-0.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:495648499b47dbc963e619126e39026163b48fd2760b0dc9102bb8126c936cfd"}, + {file = "Levenshtein-0.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:970f0e9b9ddda30b9581ae8cc203b3ecfaac59c8e24ba2efa998601e7a970936"}, + {file = "Levenshtein-0.16.0-cp39-cp39-win32.whl", hash = "sha256:e4befca089b463b46403523006fc1edc79ec5c594dd104ebb26b4607fd796557"}, + {file = "Levenshtein-0.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:a05a34de68e67669e4803992441a7f2b59a38f432c6c4c8309be59949a57c874"}, + {file = "Levenshtein-0.16.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a37e56c1018dbdae18a9a32d2b333eaa8b8ebbf2bb2bf28ab5a20fdca828c501"}, + {file = "Levenshtein-0.16.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a1d5e7a9ec327588046d18788a453b8a42f9bab64345750516c394a8cd62f67c"}, + {file = "Levenshtein-0.16.0-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:558b10e0fd1bc81571ea2c131d31e7df2d2b1997326770067228a84e32ef0f9a"}, + {file = "Levenshtein-0.16.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a5181a65a838f434d65bf3e9368eb82f688db15e28686742b55a8b22bcae2267"}, + {file = "Levenshtein-0.16.0.tar.gz", hash = "sha256:bb38dc5fc67bbe31574b64ea55c4b44d549340024601887d5302d5a6723f205a"}, +] lxml = [ {file = "lxml-4.6.4-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bbf2dc330bd44bfc0254ab37677ec60f7c7ecea55ad8ba1b8b2ea7bf20c265f5"}, {file = "lxml-4.6.4-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b667c51682fe9b9788c69465956baa8b6999531876ccedcafc895c74ad716cd8"}, @@ -1124,6 +1308,62 @@ pytz = [ {file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"}, {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"}, ] +rapidfuzz = [ + {file = "rapidfuzz-1.8.3-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:0aa566e46bf1bf8e98e7a009fb0119c6601aece029af2e9566cfdf7662526c20"}, + {file = "rapidfuzz-1.8.3-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:6854b2399fa39dbf480a55fe359e1012590b29e683035645dd8d56c8d367ca9b"}, + {file = "rapidfuzz-1.8.3-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:7f79d77e2d20d2042c7fa00c07e979e28d684d875e5a523a51c06e8b1a2f579c"}, + {file = "rapidfuzz-1.8.3-cp27-cp27m-win32.whl", hash = "sha256:b896fc68897611354d78285262e475e387f539cef85d11983c0c06c7aa0ac20c"}, + {file = "rapidfuzz-1.8.3-cp27-cp27m-win_amd64.whl", hash = "sha256:39ec5cec3f9054a1176906972b4d900b5ed314d25dab709156d1e9b7f957de11"}, + {file = "rapidfuzz-1.8.3-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:c69e0bbbfb6e4add79fe6919dea7e6936401c7708ed76280223a954dfb8a3277"}, + {file = "rapidfuzz-1.8.3-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1e0c30a631fac14469d18d19190ef8b53d97a95aceecb0ffa103d13a76d7bbac"}, + {file = "rapidfuzz-1.8.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d15cb1176d77962ef9af567aa3d33459930f290a0bf06355ac7b6d3bfb001aa"}, + {file = "rapidfuzz-1.8.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e77f537bc28af69de0066e09191be746600f3b51c1d1c820b3e82c9e1b0152bd"}, + {file = "rapidfuzz-1.8.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8fa4ef5f82762274558a7afe2037b016aee2c81b3d5d2c749a25771875013091"}, + {file = "rapidfuzz-1.8.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4a83917d28f23d87f6ad1c6c201ff8385bd5dfd37d5da9c4cb5967e9e3a431da"}, + {file = "rapidfuzz-1.8.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8a477f5d75aef642e14f0051fe5e7315730dff4df4f6c02e2ddb046d3ba94791"}, + {file = "rapidfuzz-1.8.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e3c7d39c97414952ce687db2ef7966612511d23561222c04cb226e9871d0cdb"}, + {file = "rapidfuzz-1.8.3-cp310-cp310-win32.whl", hash = "sha256:0ed81b389274736675a7815b8f65b0492be65548cf03b5cc81687c66188ff9dd"}, + {file = "rapidfuzz-1.8.3-cp310-cp310-win_amd64.whl", hash = "sha256:ced1bf333f228c4fd31db8d55185366b090755c5c634c51afadf3c4a079fe1fc"}, + {file = "rapidfuzz-1.8.3-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:50c25c058616b9c3a3b5814db1560e9ecbdeec3d987e51b641dc3bc261c55bbc"}, + {file = "rapidfuzz-1.8.3-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:87056d8106cb3f118b5fcc4a7c8ab77e40dcf7e5b5904a83a344d8a916feefd4"}, + {file = "rapidfuzz-1.8.3-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:0a255c6bd346dbb3c0fc96daf5ed10473a2568365ab76de7d779732d3a304af2"}, + {file = "rapidfuzz-1.8.3-cp35-cp35m-win32.whl", hash = "sha256:60168de30ea1280884a2ebf83ff028966c670b0c56840095939b987e3a372aaa"}, + {file = "rapidfuzz-1.8.3-cp35-cp35m-win_amd64.whl", hash = "sha256:d7f9cd0836689a6a928c79005108475c9e95cf9ea3ec850b54017f49a3cc961d"}, + {file = "rapidfuzz-1.8.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:31082c7cf4ba405c054d149cb04e32f68cfb13c736d09354dab81aa60d553194"}, + {file = "rapidfuzz-1.8.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ee1149e2038e6ea6065a439b14e2f7a6939d3bc9fb19fa9d4e32161f678ca555"}, + {file = "rapidfuzz-1.8.3-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:205cf9193aa1cc6c368e1a744a35e205f152ca2f63f516802ed9322764ece04a"}, + {file = "rapidfuzz-1.8.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2985f9694f2732a968f8af8cb7e4ab0325a7d80d9e8fd29f3b2b4621da6ccef8"}, + {file = "rapidfuzz-1.8.3-cp36-cp36m-win32.whl", hash = "sha256:4bf7a88deade25cb91eff36f79e40b174b6dc1fdb467e50a3aca65ab8a951431"}, + {file = "rapidfuzz-1.8.3-cp36-cp36m-win_amd64.whl", hash = "sha256:8a443341fbc171df6eed302fcf1adf4975045565988edeaee4302636c0a7e6c1"}, + {file = "rapidfuzz-1.8.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:70efbce4e2c80f091ae5f7040c6afe4f6e04836a2b0d27ab554fd6fb56b46ed5"}, + {file = "rapidfuzz-1.8.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5954565dbc0a376971c4b8a65f698d8f12226b9e275ce1bef7874c2fc5a3a433"}, + {file = "rapidfuzz-1.8.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a0b820f3279253d9deae6bed82c699d43903a2676208ac4d849f54a00919c473"}, + {file = "rapidfuzz-1.8.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3435b497bc78e548977a671b91f9655c20045dbbeee6ca4ce5219cac1411682"}, + {file = "rapidfuzz-1.8.3-cp37-cp37m-win32.whl", hash = "sha256:2fe0e9272e35a1f98fdbeef16f2e969e29a9226f187f540febfc064d82878668"}, + {file = "rapidfuzz-1.8.3-cp37-cp37m-win_amd64.whl", hash = "sha256:1df3455ffed5cdcc28b6e2b53dfc3ec068b298dceb3782e2e654f50ab16b2e34"}, + {file = "rapidfuzz-1.8.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d856c8ab95df935636108868e0579a1d78f68222d79fd35853e6d8ba54ced617"}, + {file = "rapidfuzz-1.8.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3fcab917846c1c28fec36e8cd22c1a072cfb5ce5a297c6bda2017c01e309a892"}, + {file = "rapidfuzz-1.8.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6571adf845b4e464a3b748de0b1cdd4acc66c01a0e9fd51e5d43cbf0d4a85524"}, + {file = "rapidfuzz-1.8.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:05d21cb420848838d6c2c2816181325ab1ae3109bfbe45df863635ef8f159714"}, + {file = "rapidfuzz-1.8.3-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dfc18bb38085e1b4a4dd2fe99b17770dcbf286408510477ff542fbdd0ffbe017"}, + {file = "rapidfuzz-1.8.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:496ac913d5917838d92965873b3d9540be44619a9693123f6fb0d1074f1c63b5"}, + {file = "rapidfuzz-1.8.3-cp38-cp38-win32.whl", hash = "sha256:e0013c270b8c097a90b92b6a4664e410cfb2195b2573431b651634a28c13ee6f"}, + {file = "rapidfuzz-1.8.3-cp38-cp38-win_amd64.whl", hash = "sha256:124cfe1a3cfd0fa5069873aaa8933df50d9c0a1a0db126739aa3a129e09024da"}, + {file = "rapidfuzz-1.8.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a08453c1f5a6b25e4cc61b99e0601adbb1daed3a360b1270abf24625d83d52f0"}, + {file = "rapidfuzz-1.8.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3d5a4befaa266fc75c5d9bd414029dc89a19ad0ad475ac527f5505119647a914"}, + {file = "rapidfuzz-1.8.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8da386372e1bf7579c7a32c28a263bc417b14fbc66c6c1df76baf30d6efa98ae"}, + {file = "rapidfuzz-1.8.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:29a4ea3a779dd1c8fafdff241f3737c079d7905a1c33beab306e2179bb9bd6ff"}, + {file = "rapidfuzz-1.8.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0f9310f3d651aa50d4cb023de727bf3f8a96a76082ca3478a01d7a63109e3fd3"}, + {file = "rapidfuzz-1.8.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:915ed93f12f551b0670d8f0d5949c660e533046e9efdfb49016de6c2ddca793c"}, + {file = "rapidfuzz-1.8.3-cp39-cp39-win32.whl", hash = "sha256:55c1772561900bf08fc15efa359f971723785d8b42419c4ea18eacd001bad5fc"}, + {file = "rapidfuzz-1.8.3-cp39-cp39-win_amd64.whl", hash = "sha256:911fb926f0237b67b6f566c4e1b029dd38888675228ad9e1613b2f8deb94d8a3"}, + {file = "rapidfuzz-1.8.3-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:4fc3f4430ca680bc576a789914d029fa1f332cd5836ca954ef8e12b11fd48801"}, + {file = "rapidfuzz-1.8.3-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:5d45e00b29594e4a785f413869a43815bc29d977c940410255ea51adca61644d"}, + {file = "rapidfuzz-1.8.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d11a69e5a33cbcb665d03f63f77d46bd2d4f4e8fc10f48e734d2880bba0b3ab7"}, + {file = "rapidfuzz-1.8.3-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d9205db2beda1b83fbfaf968039fbbd05f1c278c6e13782c699ef1ad4d2c43af"}, + {file = "rapidfuzz-1.8.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:cee3f7daab7813314d61c6d81ba32fdd3c75f7cf6910cc630c76905195c4a2a4"}, + {file = "rapidfuzz-1.8.3.tar.gz", hash = "sha256:e85fa8110dc1271b7f193f225e5c6c63be81c3cf1a48648d01ed5d55955fbc4c"}, +] requests = [ {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"}, {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"}, @@ -1140,6 +1380,10 @@ smart-open = [ {file = "smart_open-5.2.1-py3-none-any.whl", hash = "sha256:71d14489da58b60ce12fc3ecb823facc59a8b23cd1b58edb97175640350d3a62"}, {file = "smart_open-5.2.1.tar.gz", hash = "sha256:75abf758717a92a8f53aa96953f0c245c8cedf8e1e4184903db3659b419d4c17"}, ] +sniffio = [ + {file = "sniffio-1.2.0-py3-none-any.whl", hash = "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663"}, + {file = "sniffio-1.2.0.tar.gz", hash = "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"}, +] soupsieve = [ {file = "soupsieve-2.3.1-py3-none-any.whl", hash = "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb"}, {file = "soupsieve-2.3.1.tar.gz", hash = "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"}, @@ -1188,6 +1432,10 @@ srsly = [ {file = "srsly-2.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:090072830cf2d5bd6765705a02463f586db8a586805d1c31a72080f971d311b5"}, {file = "srsly-2.4.2.tar.gz", hash = "sha256:2aba252292767875086adf4e4380e27b024d73655456f796f8e07eb3a4dfacc0"}, ] +starlette = [ + {file = "starlette-0.16.0-py3-none-any.whl", hash = "sha256:38eb24bf705a2c317e15868e384c1b8a12ca396e5a3c3a003db7e667c43f939f"}, + {file = "starlette-0.16.0.tar.gz", hash = "sha256:e1904b5d0007aee24bdd3c43994be9b3b729f4f58e740200de1d623f8c3a8870"}, +] thinc = [ {file = "thinc-8.0.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f818b9f012169a11beb3561c43dc52080588e50cf495733e492efab8b9b4135e"}, {file = "thinc-8.0.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f520daf45b7f42a04363852df43be1b423ae42d9327709d74f6c3279b3f73778"}, @@ -1268,6 +1516,10 @@ urllib3 = [ {file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"}, {file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"}, ] +uvicorn = [ + {file = "uvicorn-0.16.0-py3-none-any.whl", hash = "sha256:d8c839231f270adaa6d338d525e2652a0b4a5f4c2430b5c4ef6ae4d11776b0d2"}, + {file = "uvicorn-0.16.0.tar.gz", hash = "sha256:eacb66afa65e0648fcbce5e746b135d09722231ffffc61883d4fac2b62fbea8d"}, +] warcio = [ {file = "warcio-1.7.4-py2.py3-none-any.whl", hash = "sha256:ced1a162d76434d56abd81b37ac152821d1a11e1db835ead5d649f58068c2203"}, {file = "warcio-1.7.4.tar.gz", hash = "sha256:e1889dad9ecac654de5b0973247f335a55827b1b14a8203772d18c749143ea51"}, diff --git a/pyproject.toml b/pyproject.toml index 2aacbbf..746924a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,9 @@ langdetect = "^1.0.9" zstandard = "^0.16.0" spacy = "^3.2.1" mmh3 = "^3.0.0" +fastapi = "^0.70.1" +Levenshtein = "^0.16.0" +uvicorn = "^0.16.0" [tool.poetry.dependencies.en_core_web_sm] url= "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"