|
@@ -3,6 +3,7 @@ import logging
|
|
import os
|
|
import os
|
|
import sys
|
|
import sys
|
|
from multiprocessing import Process
|
|
from multiprocessing import Process
|
|
|
|
+from pathlib import Path
|
|
|
|
|
|
import uvicorn
|
|
import uvicorn
|
|
from fastapi import FastAPI
|
|
from fastapi import FastAPI
|
|
@@ -10,6 +11,7 @@ from fastapi import FastAPI
|
|
from mwmbl import background
|
|
from mwmbl import background
|
|
from mwmbl.indexer import historical, retrieve, preprocess, update_pages
|
|
from mwmbl.indexer import historical, retrieve, preprocess, update_pages
|
|
from mwmbl.crawler.app import router as crawler_router
|
|
from mwmbl.crawler.app import router as crawler_router
|
|
|
|
+from mwmbl.indexer.paths import INDEX_NAME
|
|
from mwmbl.tinysearchengine import search
|
|
from mwmbl.tinysearchengine import search
|
|
from mwmbl.tinysearchengine.completer import Completer
|
|
from mwmbl.tinysearchengine.completer import Completer
|
|
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, NUM_PAGES, PAGE_SIZE
|
|
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, NUM_PAGES, PAGE_SIZE
|
|
@@ -20,7 +22,7 @@ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
|
|
|
|
def setup_args():
|
|
def setup_args():
|
|
parser = argparse.ArgumentParser(description="mwmbl-tinysearchengine")
|
|
parser = argparse.ArgumentParser(description="mwmbl-tinysearchengine")
|
|
- parser.add_argument("--index", help="Path to the tinysearchengine index file", default="/app/storage/index.tinysearch")
|
|
|
|
|
|
+ parser.add_argument("--data", help="Path to the tinysearchengine index file", default="/app/storage/")
|
|
args = parser.parse_args()
|
|
args = parser.parse_args()
|
|
return args
|
|
return args
|
|
|
|
|
|
@@ -28,8 +30,9 @@ def setup_args():
|
|
def run():
|
|
def run():
|
|
args = setup_args()
|
|
args = setup_args()
|
|
|
|
|
|
|
|
+ index_path = Path(args.data) / INDEX_NAME
|
|
try:
|
|
try:
|
|
- existing_index = TinyIndex(item_factory=Document, index_path=args.index)
|
|
|
|
|
|
+ existing_index = TinyIndex(item_factory=Document, index_path=index_path)
|
|
if existing_index.page_size != PAGE_SIZE or existing_index.num_pages != NUM_PAGES:
|
|
if existing_index.page_size != PAGE_SIZE or existing_index.num_pages != NUM_PAGES:
|
|
print(f"Existing index page sizes ({existing_index.page_size}) and number of pages "
|
|
print(f"Existing index page sizes ({existing_index.page_size}) and number of pages "
|
|
f"({existing_index.num_pages}) does not match - removing.")
|
|
f"({existing_index.num_pages}) does not match - removing.")
|
|
@@ -42,15 +45,11 @@ def run():
|
|
print("Creating a new index")
|
|
print("Creating a new index")
|
|
TinyIndex.create(item_factory=Document, index_path=args.index, num_pages=NUM_PAGES, page_size=PAGE_SIZE)
|
|
TinyIndex.create(item_factory=Document, index_path=args.index, num_pages=NUM_PAGES, page_size=PAGE_SIZE)
|
|
|
|
|
|
- Process(target=background.run, args=(args.index,)).start()
|
|
|
|
- # Process(target=historical.run).start()
|
|
|
|
- # Process(target=retrieve.run).start()
|
|
|
|
- # Process(target=preprocess.run, args=(args.index,)).start()
|
|
|
|
- # Process(target=update_pages.run, args=(args.index,)).start()
|
|
|
|
|
|
+ Process(target=background.run, args=(args.data,)).start()
|
|
|
|
|
|
completer = Completer()
|
|
completer = Completer()
|
|
|
|
|
|
- with TinyIndex(item_factory=Document, index_path=args.index) as tiny_index:
|
|
|
|
|
|
+ with TinyIndex(item_factory=Document, index_path=index_path) as tiny_index:
|
|
ranker = HeuristicRanker(tiny_index, completer)
|
|
ranker = HeuristicRanker(tiny_index, completer)
|
|
|
|
|
|
# Initialize FastApi instance
|
|
# Initialize FastApi instance
|