소스 검색

Fixes to mwmbl API for changes to the index

Daoud Clarke 3 년 전
부모
커밋
04a33a134b
3개의 변경된 파일12개의 추가작업 그리고 74개의 파일을 삭제
  1. 0 12
      config/tinysearchengine.yaml
  2. 12 22
      mwmbl/tinysearchengine/app.py
  3. 0 40
      mwmbl/tinysearchengine/config.py

+ 0 - 12
config/tinysearchengine.yaml

@@ -1,12 +0,0 @@
-# Config for bootstrapping tinysearchengine.
-# Follows the schema/model defined by mwmbl.tinysearchengine.config.ConfigModel
-
-server_config:
-  host: "0.0.0.0"
-  port: 8080
-  log_level: "info"
-
-index_config:
-  index_path: data/index.tinysearch
-  num_pages: 76800
-  page_size: 4096

+ 12 - 22
mwmbl/tinysearchengine/app.py

@@ -1,13 +1,12 @@
-import logging
 import argparse
+import logging
 
 import pandas as pd
 import uvicorn
 
 from mwmbl.tinysearchengine import create_app
 from mwmbl.tinysearchengine.completer import Completer
-from mwmbl.tinysearchengine.indexer import TinyIndex, NUM_PAGES, PAGE_SIZE, Document
-from mwmbl.tinysearchengine.config import parse_config_file
+from mwmbl.tinysearchengine.indexer import TinyIndex, Document
 from mwmbl.tinysearchengine.rank import Ranker
 
 logging.basicConfig()
@@ -16,7 +15,8 @@ logging.basicConfig()
 def setup_args():
     """Read all the args."""
     parser = argparse.ArgumentParser(description="mwmbl-tinysearchengine")
-    parser.add_argument("--config", help="Path to tinysearchengine's yaml config.", required=True)
+    parser.add_argument("--index", help="Path to the tinysearchengine index file", required=True)
+    parser.add_argument("--terms", help="Path to the tinysearchengine terms CSV file", required=True)
     args = parser.parse_args()
     return args
 
@@ -30,30 +30,20 @@ def main():
     * Initialize a FastAPI app instance
     * Starts uvicorn server using app instance
     """
-    config, tiny_index = get_config_and_index()
+    args = setup_args()
 
     # Load term data
-    terms = pd.read_csv(config.terms_path)
+    terms = pd.read_csv(args.terms)
     completer = Completer(terms)
 
-    ranker = Ranker(tiny_index, completer)
+    with TinyIndex(item_factory=Document, index_path=args.index) as tiny_index:
+        ranker = Ranker(tiny_index, completer)
 
-    # Initialize FastApi instance
-    app = create_app.create(ranker)
+        # Initialize FastApi instance
+        app = create_app.create(ranker)
 
-    # Initialize uvicorn server using global app instance and server config params
-    uvicorn.run(app, **config.server_config.dict())
-
-
-def get_config_and_index():
-    args = setup_args()
-    config = parse_config_file(config_filename=args.config)
-    # Initialize TinyIndex using index config params
-    tiny_index = TinyIndex(
-        item_factory=Document,
-        **config.index_config.dict()
-    )
-    return config, tiny_index
+        # Initialize uvicorn server using global app instance and server config params
+        uvicorn.run(app, host="0.0.0.0", port=8080)
 
 
 if __name__ == "__main__":

+ 0 - 40
mwmbl/tinysearchengine/config.py

@@ -1,40 +0,0 @@
-import pathlib
-import yaml
-from pydantic import BaseModel, StrictInt, StrictStr, Field
-
-
-class ServerConfigModel(BaseModel):
-    host: StrictStr = "0.0.0.0"
-    port: StrictInt = 8080
-    log_level: StrictStr = "info"
-
-
-class IndexConfigModel(BaseModel):
-    index_path: StrictStr = "data/index.tinysearch"
-    num_pages: StrictInt = 25600
-    page_size: StrictInt = 4096
-
-
-class ConfigModel(BaseModel):
-    server_config: ServerConfigModel = Field(default_factory=ServerConfigModel)
-    index_config: IndexConfigModel = Field(default_factory=IndexConfigModel)
-    terms_path: StrictStr = "data/mwmbl-crawl-terms.csv"
-
-
-def parse_config_file(config_filename: str) -> ConfigModel:
-    """Parse config dictionary and return ConfigModel."""
-    if not pathlib.Path(config_filename).is_file():
-        raise ValueError(
-            f"config_filename: {config_filename} is not a file. Please check if it exists."
-        )
-
-    with open(config_filename) as f:
-        config = yaml.load(f, yaml.Loader)
-
-    return ConfigModel(**config)
-
-
-if __name__ == "__main__":
-    # Call this from the root of the repo using "python -m mwmbl.tinysearchengine.config"
-    config_model = parse_config_file(config_filename="config/tinysearchengine.yaml")
-    print(config_model.dict())