Start background processes
This commit is contained in:
parent
b6fd27352b
commit
a1d6fd8bb1
3 changed files with 24 additions and 99 deletions
22
app/apps.py
Normal file
22
app/apps.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
import os
|
||||
from multiprocessing import Process, Queue
|
||||
|
||||
from django.apps import AppConfig
|
||||
|
||||
from app import settings
|
||||
from app.api import queued_batches
|
||||
from mwmbl import background
|
||||
from mwmbl.indexer.update_urls import update_urls_continuously
|
||||
from mwmbl.url_queue import update_queue_continuously
|
||||
|
||||
|
||||
class MwmblConfig(AppConfig):
|
||||
name = "app"
|
||||
verbose_name = "Mwmbl Application"
|
||||
|
||||
def ready(self):
|
||||
if os.environ.get('RUN_MAIN') and settings.RUN_BACKGROUND_PROCESSES:
|
||||
new_item_queue = Queue()
|
||||
Process(target=background.run, args=(settings.DATA_PATH,)).start()
|
||||
Process(target=update_queue_continuously, args=(new_item_queue, queued_batches,)).start()
|
||||
Process(target=update_urls_continuously, args=(settings.DATA_PATH, new_item_queue)).start()
|
|
@ -37,6 +37,7 @@ INSTALLED_APPS = [
|
|||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
'app',
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
|
@ -125,5 +126,4 @@ DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
|||
# ===================== Custom Settings =========================
|
||||
|
||||
DATA_PATH = "./devdata"
|
||||
|
||||
|
||||
RUN_BACKGROUND_PROCESSES = True
|
||||
|
|
|
@ -1,97 +0,0 @@
|
|||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from multiprocessing import Process, Queue
|
||||
from pathlib import Path
|
||||
|
||||
import uvicorn
|
||||
from fastapi import FastAPI
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
|
||||
from mwmbl import background
|
||||
from mwmbl.crawler import app as crawler
|
||||
from mwmbl.indexer.batch_cache import BatchCache
|
||||
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
|
||||
from mwmbl.platform import user
|
||||
from mwmbl.indexer.update_urls import update_urls_continuously
|
||||
from mwmbl.tinysearchengine import search
|
||||
from mwmbl.tinysearchengine.completer import Completer
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PAGE_SIZE
|
||||
from mwmbl.tinysearchengine.rank import HeuristicRanker
|
||||
from mwmbl.url_queue import update_queue_continuously
|
||||
|
||||
FORMAT = '%(levelname)s %(name)s %(asctime)s %(message)s'
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=FORMAT)
|
||||
|
||||
|
||||
MODEL_PATH = Path(__file__).parent / 'resources' / 'model.pickle'
|
||||
|
||||
|
||||
def setup_args():
|
||||
parser = argparse.ArgumentParser(description="Mwmbl API server and background task processor")
|
||||
parser.add_argument("--num-pages", type=int, help="Number of pages of memory (4096 bytes) to use for the index", default=2560)
|
||||
parser.add_argument("--data", help="Path to the data folder for storing index and cached batches", default="./devdata")
|
||||
parser.add_argument("--port", type=int, help="Port for the server to listen at", default=5000)
|
||||
parser.add_argument("--background", help="Enable running the background tasks to process batches",
|
||||
action='store_true')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def run():
|
||||
args = setup_args()
|
||||
|
||||
index_path = Path(args.data) / INDEX_NAME
|
||||
try:
|
||||
existing_index = TinyIndex(item_factory=Document, index_path=index_path)
|
||||
if existing_index.page_size != PAGE_SIZE or existing_index.num_pages != args.num_pages:
|
||||
raise ValueError(f"Existing index page sizes ({existing_index.page_size}) or number of pages "
|
||||
f"({existing_index.num_pages}) do not match")
|
||||
except FileNotFoundError:
|
||||
print("Creating a new index")
|
||||
TinyIndex.create(item_factory=Document, index_path=index_path, num_pages=args.num_pages, page_size=PAGE_SIZE)
|
||||
|
||||
new_item_queue = Queue()
|
||||
queued_batches = Queue()
|
||||
# curation_queue = Queue()
|
||||
|
||||
if args.background:
|
||||
Process(target=background.run, args=(args.data,)).start()
|
||||
Process(target=update_queue_continuously, args=(new_item_queue, queued_batches,)).start()
|
||||
Process(target=update_urls_continuously, args=(args.data, new_item_queue)).start()
|
||||
|
||||
completer = Completer()
|
||||
|
||||
with TinyIndex(item_factory=Document, index_path=index_path) as tiny_index:
|
||||
ranker = HeuristicRanker(tiny_index, completer)
|
||||
# model = pickle.load(open(MODEL_PATH, 'rb'))
|
||||
# ranker = LTRRanker(model, tiny_index, completer)
|
||||
|
||||
# Initialize FastApi instance
|
||||
app = FastAPI()
|
||||
|
||||
# Try disabling since this is handled by nginx
|
||||
# app.add_middleware(
|
||||
# CORSMiddleware,
|
||||
# allow_origins=["*"],
|
||||
# allow_credentials=True,
|
||||
# allow_methods=["*"],
|
||||
# allow_headers=["*"],
|
||||
# )
|
||||
|
||||
search_router = search.create_router(ranker)
|
||||
app.include_router(search_router)
|
||||
|
||||
batch_cache = BatchCache(Path(args.data) / BATCH_DIR_NAME)
|
||||
crawler_router = crawler.create_router(batch_cache, queued_batches)
|
||||
app.include_router(crawler_router)
|
||||
|
||||
user_router = user.create_router(index_path)
|
||||
app.include_router(user_router)
|
||||
|
||||
# Initialize uvicorn server using global app instance and server config params
|
||||
uvicorn.run(app, host="0.0.0.0", port=args.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
Loading…
Add table
Reference in a new issue