Allow more tries so that popular terms can be indexed
This commit is contained in:
parent
cfca015efe
commit
a097ec9fbe
2 changed files with 11 additions and 12 deletions
|
@ -13,16 +13,16 @@ logger = getLogger(__name__)
|
|||
|
||||
|
||||
def run(index_path: str):
|
||||
# historical.run()
|
||||
historical.run()
|
||||
while True:
|
||||
# try:
|
||||
# retrieve_batches()
|
||||
# except Exception:
|
||||
# logger.exception("Error retrieving batches")
|
||||
# try:
|
||||
# run_preprocessing(index_path)
|
||||
# except Exception:
|
||||
# logger.exception("Error preprocessing")
|
||||
try:
|
||||
retrieve_batches()
|
||||
except Exception:
|
||||
logger.exception("Error retrieving batches")
|
||||
try:
|
||||
run_preprocessing(index_path)
|
||||
except Exception:
|
||||
logger.exception("Error preprocessing")
|
||||
try:
|
||||
run_update(index_path)
|
||||
except Exception:
|
||||
|
|
|
@ -17,14 +17,13 @@ def run_update(index_path):
|
|||
with TinyIndex(Document, index_path, 'w') as indexer:
|
||||
with Database() as db:
|
||||
index_db = IndexDatabase(db.connection)
|
||||
# pages_to_process = index_db.get_queued_pages()
|
||||
pages_to_process = [278107]
|
||||
pages_to_process = index_db.get_queued_pages()
|
||||
print(f"Got {len(pages_to_process)} pages to process")
|
||||
for i in pages_to_process:
|
||||
documents = index_db.get_queued_documents_for_page(i)
|
||||
print(f"Documents queued for page {i}: {len(documents)}")
|
||||
if len(documents) > 0:
|
||||
for j in range(3):
|
||||
for j in range(20):
|
||||
try:
|
||||
indexer.add_to_page(i, documents)
|
||||
break
|
||||
|
|
Loading…
Add table
Reference in a new issue