Use a randomised timeout for getting a new batch
This commit is contained in:
parent
a54e093cf1
commit
62ba9ddc7e
1 changed files with 6 additions and 1 deletions
|
@ -2,6 +2,7 @@
|
|||
Database storing info on URLs
|
||||
"""
|
||||
import os
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from enum import Enum
|
||||
|
@ -14,7 +15,7 @@ from psycopg2.extras import execute_values
|
|||
# Client has one hour to crawl a URL that has been assigned to them, or it will be reassigned
|
||||
from mwmbl.database import Database
|
||||
|
||||
REASSIGN_MIN_HOURS = 1
|
||||
REASSIGN_MIN_HOURS = 5
|
||||
BATCH_SIZE = 100
|
||||
|
||||
|
||||
|
@ -120,6 +121,9 @@ class URLDatabase:
|
|||
execute_values(cursor, insert_sql, data)
|
||||
|
||||
def get_new_batch_for_user(self, user_id_hash: str):
|
||||
timeout_seconds = random.randint(10, 20)
|
||||
timeout_sql = f"SET statement_timeout = '{timeout_seconds}s'"
|
||||
|
||||
sql = f"""
|
||||
UPDATE urls SET status = {URLStatus.ASSIGNED.value}, user_id_hash = %(user_id_hash)s, updated = %(now)s
|
||||
WHERE url IN (
|
||||
|
@ -137,6 +141,7 @@ class URLDatabase:
|
|||
now = datetime.utcnow()
|
||||
min_updated_date = now - timedelta(hours=REASSIGN_MIN_HOURS)
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute(timeout_sql)
|
||||
cursor.execute(sql, {'user_id_hash': user_id_hash, 'min_updated_date': min_updated_date, 'now': now})
|
||||
results = cursor.fetchall()
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue