Allow more tries so that popular terms can be indexed

2022-07-18 23:42:09 +01:00 · 2022-07-18 23:42:09 +01:00 · a097ec9fbe
commit a097ec9fbe
parent cfca015efe
2 changed files with 11 additions and 12 deletions
--- a/mwmbl/background.py
+++ b/mwmbl/background.py
@ -13,16 +13,16 @@ logger = getLogger(__name__)


 def run(index_path: str):
-    # historical.run()
+    historical.run()
    while True:
-        # try:
-        #     retrieve_batches()
-        # except Exception:
-        #     logger.exception("Error retrieving batches")
-        # try:
-        #     run_preprocessing(index_path)
-        # except Exception:
-        #     logger.exception("Error preprocessing")
+        try:
+            retrieve_batches()
+        except Exception:
+            logger.exception("Error retrieving batches")
+        try:
+            run_preprocessing(index_path)
+        except Exception:
+            logger.exception("Error preprocessing")
        try:
            run_update(index_path)
        except Exception:
--- a/mwmbl/indexer/update_pages.py
+++ b/mwmbl/indexer/update_pages.py
@ -17,14 +17,13 @@ def run_update(index_path):
    with TinyIndex(Document, index_path, 'w') as indexer:
        with Database() as db:
            index_db = IndexDatabase(db.connection)
-            # pages_to_process = index_db.get_queued_pages()
-            pages_to_process = [278107]
+            pages_to_process = index_db.get_queued_pages()
            print(f"Got {len(pages_to_process)} pages to process")
            for i in pages_to_process:
                documents = index_db.get_queued_documents_for_page(i)
                print(f"Documents queued for page {i}: {len(documents)}")
                if len(documents) > 0:
-                    for j in range(3):
+                    for j in range(20):
                        try:
                            indexer.add_to_page(i, documents)
                            break