From 04a33a134bb06264142c13ab9187bbc0239a0415 Mon Sep 17 00:00:00 2001 From: Daoud Clarke Date: Tue, 22 Feb 2022 22:27:02 +0000 Subject: [PATCH] Fixes to mwmbl API for changes to the index --- config/tinysearchengine.yaml | 12 ---------- mwmbl/tinysearchengine/app.py | 34 ++++++++++----------------- mwmbl/tinysearchengine/config.py | 40 -------------------------------- 3 files changed, 12 insertions(+), 74 deletions(-) delete mode 100644 config/tinysearchengine.yaml delete mode 100644 mwmbl/tinysearchengine/config.py diff --git a/config/tinysearchengine.yaml b/config/tinysearchengine.yaml deleted file mode 100644 index 570acc3..0000000 --- a/config/tinysearchengine.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Config for bootstrapping tinysearchengine. -# Follows the schema/model defined by mwmbl.tinysearchengine.config.ConfigModel - -server_config: - host: "0.0.0.0" - port: 8080 - log_level: "info" - -index_config: - index_path: data/index.tinysearch - num_pages: 76800 - page_size: 4096 diff --git a/mwmbl/tinysearchengine/app.py b/mwmbl/tinysearchengine/app.py index dda7702..85078e3 100644 --- a/mwmbl/tinysearchengine/app.py +++ b/mwmbl/tinysearchengine/app.py @@ -1,13 +1,12 @@ -import logging import argparse +import logging import pandas as pd import uvicorn from mwmbl.tinysearchengine import create_app from mwmbl.tinysearchengine.completer import Completer -from mwmbl.tinysearchengine.indexer import TinyIndex, NUM_PAGES, PAGE_SIZE, Document -from mwmbl.tinysearchengine.config import parse_config_file +from mwmbl.tinysearchengine.indexer import TinyIndex, Document from mwmbl.tinysearchengine.rank import Ranker logging.basicConfig() @@ -16,7 +15,8 @@ logging.basicConfig() def setup_args(): """Read all the args.""" parser = argparse.ArgumentParser(description="mwmbl-tinysearchengine") - parser.add_argument("--config", help="Path to tinysearchengine's yaml config.", required=True) + parser.add_argument("--index", help="Path to the tinysearchengine index file", required=True) + parser.add_argument("--terms", help="Path to the tinysearchengine terms CSV file", required=True) args = parser.parse_args() return args @@ -30,30 +30,20 @@ def main(): * Initialize a FastAPI app instance * Starts uvicorn server using app instance """ - config, tiny_index = get_config_and_index() + args = setup_args() # Load term data - terms = pd.read_csv(config.terms_path) + terms = pd.read_csv(args.terms) completer = Completer(terms) - ranker = Ranker(tiny_index, completer) + with TinyIndex(item_factory=Document, index_path=args.index) as tiny_index: + ranker = Ranker(tiny_index, completer) - # Initialize FastApi instance - app = create_app.create(ranker) + # Initialize FastApi instance + app = create_app.create(ranker) - # Initialize uvicorn server using global app instance and server config params - uvicorn.run(app, **config.server_config.dict()) - - -def get_config_and_index(): - args = setup_args() - config = parse_config_file(config_filename=args.config) - # Initialize TinyIndex using index config params - tiny_index = TinyIndex( - item_factory=Document, - **config.index_config.dict() - ) - return config, tiny_index + # Initialize uvicorn server using global app instance and server config params + uvicorn.run(app, host="0.0.0.0", port=8080) if __name__ == "__main__": diff --git a/mwmbl/tinysearchengine/config.py b/mwmbl/tinysearchengine/config.py deleted file mode 100644 index 2fd6f54..0000000 --- a/mwmbl/tinysearchengine/config.py +++ /dev/null @@ -1,40 +0,0 @@ -import pathlib -import yaml -from pydantic import BaseModel, StrictInt, StrictStr, Field - - -class ServerConfigModel(BaseModel): - host: StrictStr = "0.0.0.0" - port: StrictInt = 8080 - log_level: StrictStr = "info" - - -class IndexConfigModel(BaseModel): - index_path: StrictStr = "data/index.tinysearch" - num_pages: StrictInt = 25600 - page_size: StrictInt = 4096 - - -class ConfigModel(BaseModel): - server_config: ServerConfigModel = Field(default_factory=ServerConfigModel) - index_config: IndexConfigModel = Field(default_factory=IndexConfigModel) - terms_path: StrictStr = "data/mwmbl-crawl-terms.csv" - - -def parse_config_file(config_filename: str) -> ConfigModel: - """Parse config dictionary and return ConfigModel.""" - if not pathlib.Path(config_filename).is_file(): - raise ValueError( - f"config_filename: {config_filename} is not a file. Please check if it exists." - ) - - with open(config_filename) as f: - config = yaml.load(f, yaml.Loader) - - return ConfigModel(**config) - - -if __name__ == "__main__": - # Call this from the root of the repo using "python -m mwmbl.tinysearchengine.config" - config_model = parse_config_file(config_filename="config/tinysearchengine.yaml") - print(config_model.dict())