added config and binary/entrypoint for mwmbl.tinysearchengine
- using pydantic to validate the config - added a default bootstrap config at config/tinysearchengine.yaml - refactored app.py to include parsing CLI argument using argparse - refactored app.py to use fewer global variables - added "mwmbl-tinysearchengine" binary/entrypoint in pyproject.toml - updated Dockerfile to work with these changes and added comments to it
This commit is contained in:
parent
da8797f5ef
commit
a72a08a7d9
7 changed files with 168 additions and 20 deletions
30
Dockerfile
30
Dockerfile
|
@ -13,22 +13,30 @@ ENV PIP_DEFAULT_TIMEOUT=100 \
|
|||
PIP_NO_CACHE_DIR=1 \
|
||||
POETRY_VERSION=1.1.12
|
||||
|
||||
# RUN apk add --no-cache gcc libffi-dev musl-dev postgresql-dev
|
||||
RUN pip install "poetry==$POETRY_VERSION"
|
||||
# Create a /venv directory & environment.
|
||||
# This directory will be copied into the final stage of docker build.
|
||||
RUN python -m venv /venv
|
||||
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
RUN poetry export -f requirements.txt | /venv/bin/pip install -r /dev/stdin
|
||||
# Copy only the necessary files to build/install the python package
|
||||
COPY pyproject.toml poetry.lock /app/
|
||||
COPY mwmbl /app/mwmbl
|
||||
|
||||
COPY . .
|
||||
RUN poetry build && /venv/bin/pip install dist/*.whl
|
||||
# Working directory is /app
|
||||
# Use pip to install the mwmbl python package
|
||||
# PEP 518, PEP 517 and others have allowed for a standardized python packaging API, which allows
|
||||
# pip to be able to install poetry packages.
|
||||
RUN /venv/bin/pip install pip --upgrade && \
|
||||
/venv/bin/pip install .
|
||||
|
||||
FROM base as final
|
||||
|
||||
#RUN apk add --no-cache libffi libpq
|
||||
# Copy only the required /venv directory from the builder image that contains mwmbl and its dependencies
|
||||
COPY --from=builder /venv /venv
|
||||
COPY data /data
|
||||
#COPY docker-entrypoint.sh wsgi.py ./
|
||||
#CMD ["./docker-entrypoint.sh"]
|
||||
|
||||
CMD ["/venv/bin/python", "-m", "mwmbl.tinysearchengine.app", "/data/index.tinysearch"]
|
||||
# Working directory is /app
|
||||
# Copying data and config into /app so that relative (default) paths in the config work
|
||||
COPY data /app/data
|
||||
COPY config /app/config
|
||||
|
||||
# Using the mwmbl-tinysearchengine binary/entrypoint which comes packaged with mwmbl
|
||||
CMD ["/venv/bin/mwmbl-tinysearchengine", "--config", "config/tinysearchengine.yaml"]
|
||||
|
|
10
README.md
10
README.md
|
@ -115,9 +115,13 @@ author (email address is in the git commit history).
|
|||
Development
|
||||
===========
|
||||
|
||||
Using Docker:
|
||||
|
||||
### Using Docker
|
||||
1. Create a new folder called `data` in the root of the repository
|
||||
2. Download the [index file](https://storage.googleapis.com/mwmbl/index.tinysearch) and place it the new data folder
|
||||
3. Run `$ docker build . -t mwmbl`
|
||||
4. Run `$ docker run -p 8080:8080 mwmbl`
|
||||
4. Run `$ docker run -p 8080:8080 mwmbl`
|
||||
|
||||
### Local Testing
|
||||
1. Create and activate a python (3.9) environment using any tool you like e.g. poetry,venv, conda etc.
|
||||
2. Run `$ pip install .`
|
||||
3. Run `$ mwmbl-tinysearchengine --config config/tinysearchengine.yaml`
|
12
config/tinysearchengine.yaml
Normal file
12
config/tinysearchengine.yaml
Normal file
|
@ -0,0 +1,12 @@
|
|||
# Config for bootstrapping tinysearchengine.
|
||||
# Follows the schema/model defined by mwmbl.tinysearchengine.config.ConfigModel
|
||||
|
||||
server_config:
|
||||
host: "0.0.0.0"
|
||||
port: 8080
|
||||
log_level: "info"
|
||||
|
||||
index_config:
|
||||
index_path: data/index.tinysearch
|
||||
num_pages: 25600
|
||||
page_size: 4096
|
|
@ -1,17 +1,54 @@
|
|||
import logging
|
||||
import sys
|
||||
|
||||
from typing import Optional
|
||||
import argparse
|
||||
from fastapi import FastAPI
|
||||
import uvicorn
|
||||
|
||||
from mwmbl.tinysearchengine import create_app
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, NUM_PAGES, PAGE_SIZE, Document
|
||||
from mwmbl.tinysearchengine.config import parse_config_file
|
||||
|
||||
logging.basicConfig()
|
||||
|
||||
app: Optional[FastAPI] = None
|
||||
|
||||
|
||||
def setup_args():
|
||||
"""Read all the args."""
|
||||
parser = argparse.ArgumentParser(description="mwmbl-tinysearchengine")
|
||||
parser.add_argument("--config", help="Path to tinysearchengine's yaml config.", required=True)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entrypoint for tinysearchengine.
|
||||
|
||||
* Parses CLI args
|
||||
* Parses and validates config
|
||||
* Initializes TinyIndex
|
||||
* Populates global app (FastAPI) variable so that uvicorn can run the app server
|
||||
"""
|
||||
args = setup_args()
|
||||
config = parse_config_file(config_filename=args.config)
|
||||
|
||||
# Initialize TinyIndex using index config params
|
||||
tiny_index = TinyIndex(
|
||||
item_factory=Document,
|
||||
**config.index_config.dict()
|
||||
)
|
||||
|
||||
# Update global app variable
|
||||
global app
|
||||
app = create_app.create(tiny_index)
|
||||
|
||||
# Initialize uvicorn server using global app instance and server config params
|
||||
uvicorn.run(
|
||||
"mwmbl.tinysearchengine.app:app",
|
||||
**config.server_config.dict()
|
||||
)
|
||||
|
||||
index_path = sys.argv[1]
|
||||
tiny_index = TinyIndex(Document, index_path, NUM_PAGES, PAGE_SIZE)
|
||||
app = create_app.create(tiny_index)
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("mwmbl.tinysearchengine.app:app", host="0.0.0.0", port=8080, log_level="info")
|
||||
main()
|
||||
|
|
39
mwmbl/tinysearchengine/config.py
Normal file
39
mwmbl/tinysearchengine/config.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
import pathlib
|
||||
import yaml
|
||||
from pydantic import BaseModel, StrictInt, StrictStr, Field
|
||||
|
||||
|
||||
class ServerConfigModel(BaseModel):
|
||||
host: StrictStr = "0.0.0.0"
|
||||
port: StrictInt = 8080
|
||||
log_level: StrictStr = "info"
|
||||
|
||||
|
||||
class IndexConfigModel(BaseModel):
|
||||
index_path: StrictStr = "data/index.tinysearch"
|
||||
num_pages: StrictInt = 25600
|
||||
page_size: StrictInt = 4096
|
||||
|
||||
|
||||
class ConfigModel(BaseModel):
|
||||
server_config: ServerConfigModel = Field(default_factory=ServerConfigModel)
|
||||
index_config: IndexConfigModel = Field(default_factory=IndexConfigModel)
|
||||
|
||||
|
||||
def parse_config_file(config_filename: str) -> ConfigModel:
|
||||
"""Parse config dictionary and return ConfigModel."""
|
||||
if not pathlib.Path(config_filename).is_file():
|
||||
raise ValueError(
|
||||
f"config_filename: {config_filename} is not a file. Please check if it exists."
|
||||
)
|
||||
|
||||
with open(config_filename) as f:
|
||||
config = yaml.load(f, yaml.Loader)
|
||||
|
||||
return ConfigModel(**config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Call this from the root of the repo using "python -m mwmbl.tinysearchengine.config"
|
||||
config_model = parse_config_file(config_filename="config/tinysearchengine.yaml")
|
||||
print(config_model.dict())
|
45
poetry.lock
generated
45
poetry.lock
generated
|
@ -460,6 +460,14 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0"
|
||||
description = "YAML parser and emitter for Python"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "rapidfuzz"
|
||||
version = "1.8.3"
|
||||
|
@ -794,7 +802,7 @@ indexer = ["botocore", "boto3", "ujson", "warcio", "idna", "beautifulsoup4", "lx
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "ed117b25eca3bb999bb98376c8c4e1ff1e9288a9c3bb35c8c0b1e9233ba0b5cc"
|
||||
content-hash = "b45f9def8dcadfaa6ce23560b51bdee7f81c335598f6cc84d11fd3d596e3da5b"
|
||||
|
||||
[metadata.files]
|
||||
anyio = [
|
||||
|
@ -1359,6 +1367,41 @@ pytz = [
|
|||
{file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
|
||||
{file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
|
||||
]
|
||||
pyyaml = [
|
||||
{file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
|
||||
{file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
|
||||
{file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
|
||||
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
|
||||
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
|
||||
]
|
||||
rapidfuzz = [
|
||||
{file = "rapidfuzz-1.8.3-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:0aa566e46bf1bf8e98e7a009fb0119c6601aece029af2e9566cfdf7662526c20"},
|
||||
{file = "rapidfuzz-1.8.3-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:6854b2399fa39dbf480a55fe359e1012590b29e683035645dd8d56c8d367ca9b"},
|
||||
|
|
|
@ -11,6 +11,8 @@ zstandard = "^0.16.0"
|
|||
mmh3 = "^3.0.0"
|
||||
fastapi = "^0.70.1"
|
||||
uvicorn = "^0.16.0"
|
||||
numpy = "==1.21.1"
|
||||
pyyaml = "==6.0"
|
||||
# Optional dependencies do not get installed by default. Look under tool.poetry.extras section
|
||||
# to see which extras to use.
|
||||
botocore = {version= "==1.23.20", optional = true}
|
||||
|
@ -66,3 +68,6 @@ indexer = [
|
|||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
mwmbl-tinysearchengine = "mwmbl.tinysearchengine.app:main"
|
||||
|
|
Loading…
Reference in a new issue