Allow more tries so that popular terms can be indexed

This commit is contained in:
Daoud Clarke 2022-07-18 23:42:09 +01:00
parent cfca015efe
commit a097ec9fbe
2 changed files with 11 additions and 12 deletions

View file

@ -13,16 +13,16 @@ logger = getLogger(__name__)
def run(index_path: str):
# historical.run()
historical.run()
while True:
# try:
# retrieve_batches()
# except Exception:
# logger.exception("Error retrieving batches")
# try:
# run_preprocessing(index_path)
# except Exception:
# logger.exception("Error preprocessing")
try:
retrieve_batches()
except Exception:
logger.exception("Error retrieving batches")
try:
run_preprocessing(index_path)
except Exception:
logger.exception("Error preprocessing")
try:
run_update(index_path)
except Exception:

View file

@ -17,14 +17,13 @@ def run_update(index_path):
with TinyIndex(Document, index_path, 'w') as indexer:
with Database() as db:
index_db = IndexDatabase(db.connection)
# pages_to_process = index_db.get_queued_pages()
pages_to_process = [278107]
pages_to_process = index_db.get_queued_pages()
print(f"Got {len(pages_to_process)} pages to process")
for i in pages_to_process:
documents = index_db.get_queued_documents_for_page(i)
print(f"Documents queued for page {i}: {len(documents)}")
if len(documents) > 0:
for j in range(3):
for j in range(20):
try:
indexer.add_to_page(i, documents)
break