Start processing historical data on startup

This commit is contained in:
Daoud Clarke 2022-06-19 08:56:55 +01:00
parent d400950689
commit 16c2692099
2 changed files with 5 additions and 1 deletions

View file

@ -24,7 +24,6 @@ def run(index_path):
get_batch_time = datetime.now()
print("Get batch time", get_batch_time - start)
batch = HashedBatch.parse_obj(batch_dict)
print("Batch", batch)
create_historical_batch(batch)
create_historical_time = datetime.now()
print("Create historical time", create_historical_time - get_batch_time)

View file

@ -1,11 +1,13 @@
import argparse
import logging
from multiprocessing import Process
import pandas as pd
import uvicorn
from fastapi import FastAPI
from starlette.middleware.cors import CORSMiddleware
from mwmbl.crawler import historical
from mwmbl.crawler.app import router as crawler_router
from mwmbl.tinysearchengine import search
from mwmbl.tinysearchengine.completer import Completer
@ -38,6 +40,9 @@ def run():
except FileExistsError:
print("Index already exists")
historical_batch_process = Process(target=historical.run, args=(args.index,))
historical_batch_process.start()
completer = Completer()
with TinyIndex(item_factory=Document, index_path=args.index) as tiny_index: