Fixes to mwmbl API for changes to the index
This commit is contained in:
parent
ae3b334a7f
commit
04a33a134b
3 changed files with 12 additions and 74 deletions
|
@ -1,12 +0,0 @@
|
|||
# Config for bootstrapping tinysearchengine.
|
||||
# Follows the schema/model defined by mwmbl.tinysearchengine.config.ConfigModel
|
||||
|
||||
server_config:
|
||||
host: "0.0.0.0"
|
||||
port: 8080
|
||||
log_level: "info"
|
||||
|
||||
index_config:
|
||||
index_path: data/index.tinysearch
|
||||
num_pages: 76800
|
||||
page_size: 4096
|
|
@ -1,13 +1,12 @@
|
|||
import logging
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
import uvicorn
|
||||
|
||||
from mwmbl.tinysearchengine import create_app
|
||||
from mwmbl.tinysearchengine.completer import Completer
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, NUM_PAGES, PAGE_SIZE, Document
|
||||
from mwmbl.tinysearchengine.config import parse_config_file
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
||||
from mwmbl.tinysearchengine.rank import Ranker
|
||||
|
||||
logging.basicConfig()
|
||||
|
@ -16,7 +15,8 @@ logging.basicConfig()
|
|||
def setup_args():
|
||||
"""Read all the args."""
|
||||
parser = argparse.ArgumentParser(description="mwmbl-tinysearchengine")
|
||||
parser.add_argument("--config", help="Path to tinysearchengine's yaml config.", required=True)
|
||||
parser.add_argument("--index", help="Path to the tinysearchengine index file", required=True)
|
||||
parser.add_argument("--terms", help="Path to the tinysearchengine terms CSV file", required=True)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
@ -30,30 +30,20 @@ def main():
|
|||
* Initialize a FastAPI app instance
|
||||
* Starts uvicorn server using app instance
|
||||
"""
|
||||
config, tiny_index = get_config_and_index()
|
||||
args = setup_args()
|
||||
|
||||
# Load term data
|
||||
terms = pd.read_csv(config.terms_path)
|
||||
terms = pd.read_csv(args.terms)
|
||||
completer = Completer(terms)
|
||||
|
||||
ranker = Ranker(tiny_index, completer)
|
||||
with TinyIndex(item_factory=Document, index_path=args.index) as tiny_index:
|
||||
ranker = Ranker(tiny_index, completer)
|
||||
|
||||
# Initialize FastApi instance
|
||||
app = create_app.create(ranker)
|
||||
# Initialize FastApi instance
|
||||
app = create_app.create(ranker)
|
||||
|
||||
# Initialize uvicorn server using global app instance and server config params
|
||||
uvicorn.run(app, **config.server_config.dict())
|
||||
|
||||
|
||||
def get_config_and_index():
|
||||
args = setup_args()
|
||||
config = parse_config_file(config_filename=args.config)
|
||||
# Initialize TinyIndex using index config params
|
||||
tiny_index = TinyIndex(
|
||||
item_factory=Document,
|
||||
**config.index_config.dict()
|
||||
)
|
||||
return config, tiny_index
|
||||
# Initialize uvicorn server using global app instance and server config params
|
||||
uvicorn.run(app, host="0.0.0.0", port=8080)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -1,40 +0,0 @@
|
|||
import pathlib
|
||||
import yaml
|
||||
from pydantic import BaseModel, StrictInt, StrictStr, Field
|
||||
|
||||
|
||||
class ServerConfigModel(BaseModel):
|
||||
host: StrictStr = "0.0.0.0"
|
||||
port: StrictInt = 8080
|
||||
log_level: StrictStr = "info"
|
||||
|
||||
|
||||
class IndexConfigModel(BaseModel):
|
||||
index_path: StrictStr = "data/index.tinysearch"
|
||||
num_pages: StrictInt = 25600
|
||||
page_size: StrictInt = 4096
|
||||
|
||||
|
||||
class ConfigModel(BaseModel):
|
||||
server_config: ServerConfigModel = Field(default_factory=ServerConfigModel)
|
||||
index_config: IndexConfigModel = Field(default_factory=IndexConfigModel)
|
||||
terms_path: StrictStr = "data/mwmbl-crawl-terms.csv"
|
||||
|
||||
|
||||
def parse_config_file(config_filename: str) -> ConfigModel:
|
||||
"""Parse config dictionary and return ConfigModel."""
|
||||
if not pathlib.Path(config_filename).is_file():
|
||||
raise ValueError(
|
||||
f"config_filename: {config_filename} is not a file. Please check if it exists."
|
||||
)
|
||||
|
||||
with open(config_filename) as f:
|
||||
config = yaml.load(f, yaml.Loader)
|
||||
|
||||
return ConfigModel(**config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Call this from the root of the repo using "python -m mwmbl.tinysearchengine.config"
|
||||
config_model = parse_config_file(config_filename="config/tinysearchengine.yaml")
|
||||
print(config_model.dict())
|
Loading…
Add table
Reference in a new issue