3 years ago · 62ba9ddc7e
--- a/mwmbl/crawler/urls.py
+++ b/mwmbl/crawler/urls.py
@@ -2,6 +2,7 @@
 
				 Database storing info on URLs
			
 
				 """
			
 
				 import os
			
 
				+import random
			
 
				 from dataclasses import dataclass
			
 
				 from datetime import datetime, timedelta
			
 
				 from enum import Enum
			
@@ -14,7 +15,7 @@ from psycopg2.extras import execute_values
 
				 # Client has one hour to crawl a URL that has been assigned to them, or it will be reassigned
			
 
				 from mwmbl.database import Database
			
 
				 
			
 
				-REASSIGN_MIN_HOURS = 1
			
 
				+REASSIGN_MIN_HOURS = 5
			
 
				 BATCH_SIZE = 100
			
 
				 
			
 
				 
			
@@ -120,6 +121,9 @@ class URLDatabase:
 
				                 execute_values(cursor, insert_sql, data)
			
 
				 
			
 
				     def get_new_batch_for_user(self, user_id_hash: str):
			
 
				+        timeout_seconds = random.randint(10, 20)
			
 
				+        timeout_sql = f"SET statement_timeout = '{timeout_seconds}s'"
			
 
				+
			
 
				         sql = f"""
			
 
				         UPDATE urls SET status = {URLStatus.ASSIGNED.value}, user_id_hash = %(user_id_hash)s, updated = %(now)s
			
 
				         WHERE url IN (
			
@@ -137,6 +141,7 @@ class URLDatabase:
 
				         now = datetime.utcnow()
			
 
				         min_updated_date = now - timedelta(hours=REASSIGN_MIN_HOURS)
			
 
				         with self.connection.cursor() as cursor:
			
 
				+            cursor.execute(timeout_sql)
			
 
				             cursor.execute(sql, {'user_id_hash': user_id_hash, 'min_updated_date': min_updated_date, 'now': now})
			
 
				             results = cursor.fetchall()