Delete unused SQL

This commit is contained in:
Daoud Clarke 2022-07-30 09:27:44 +01:00
parent c52faeaddc
commit e79f1ce10b
3 changed files with 2 additions and 31 deletions

View file

@ -4,7 +4,7 @@ from mwmbl.crawler.app import get_batches_for_date
from mwmbl.database import Database
from mwmbl.indexer.indexdb import BatchInfo, BatchStatus, IndexDatabase
DAYS = 10
DAYS = 20
def run():

View file

@ -36,7 +36,7 @@ def run(batch_cache: BatchCache, index_path: str):
index_db = IndexDatabase(db.connection)
logger.info("Getting local batches")
batches = index_db.get_batches_by_status(BatchStatus.LOCAL, 1000)
batches = index_db.get_batches_by_status(BatchStatus.LOCAL, 10000)
logger.info(f"Got {len(batches)} batch urls")
if len(batches) == 0:
return

View file

@ -13,11 +13,6 @@ class BatchStatus(Enum):
INDEXED = 2
class DocumentStatus(Enum):
NEW = 0
PREPROCESSING = 1
@dataclass
class BatchInfo:
url: str
@ -38,32 +33,8 @@ class IndexDatabase:
)
"""
documents_sql = """
CREATE TABLE IF NOT EXISTS documents (
url VARCHAR PRIMARY KEY,
title VARCHAR NOT NULL,
extract VARCHAR NOT NULL,
score FLOAT NOT NULL,
status INT NOT NULL
)
"""
document_pages_sql = """
CREATE TABLE IF NOT EXISTS document_pages (
url VARCHAR NOT NULL,
page INT NOT NULL
)
"""
document_pages_index_sql = """
CREATE INDEX IF NOT EXISTS document_pages_page_index ON document_pages (page)
"""
with self.connection.cursor() as cursor:
cursor.execute(batches_sql)
cursor.execute(documents_sql)
cursor.execute(document_pages_sql)
cursor.execute(document_pages_index_sql)
def record_batches(self, batch_infos: list[BatchInfo]):
sql = """