Delete unused SQL
This commit is contained in:
parent
c52faeaddc
commit
e79f1ce10b
3 changed files with 2 additions and 31 deletions
|
@ -4,7 +4,7 @@ from mwmbl.crawler.app import get_batches_for_date
|
|||
from mwmbl.database import Database
|
||||
from mwmbl.indexer.indexdb import BatchInfo, BatchStatus, IndexDatabase
|
||||
|
||||
DAYS = 10
|
||||
DAYS = 20
|
||||
|
||||
|
||||
def run():
|
||||
|
|
|
@ -36,7 +36,7 @@ def run(batch_cache: BatchCache, index_path: str):
|
|||
index_db = IndexDatabase(db.connection)
|
||||
|
||||
logger.info("Getting local batches")
|
||||
batches = index_db.get_batches_by_status(BatchStatus.LOCAL, 1000)
|
||||
batches = index_db.get_batches_by_status(BatchStatus.LOCAL, 10000)
|
||||
logger.info(f"Got {len(batches)} batch urls")
|
||||
if len(batches) == 0:
|
||||
return
|
||||
|
|
|
@ -13,11 +13,6 @@ class BatchStatus(Enum):
|
|||
INDEXED = 2
|
||||
|
||||
|
||||
class DocumentStatus(Enum):
|
||||
NEW = 0
|
||||
PREPROCESSING = 1
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchInfo:
|
||||
url: str
|
||||
|
@ -38,32 +33,8 @@ class IndexDatabase:
|
|||
)
|
||||
"""
|
||||
|
||||
documents_sql = """
|
||||
CREATE TABLE IF NOT EXISTS documents (
|
||||
url VARCHAR PRIMARY KEY,
|
||||
title VARCHAR NOT NULL,
|
||||
extract VARCHAR NOT NULL,
|
||||
score FLOAT NOT NULL,
|
||||
status INT NOT NULL
|
||||
)
|
||||
"""
|
||||
|
||||
document_pages_sql = """
|
||||
CREATE TABLE IF NOT EXISTS document_pages (
|
||||
url VARCHAR NOT NULL,
|
||||
page INT NOT NULL
|
||||
)
|
||||
"""
|
||||
|
||||
document_pages_index_sql = """
|
||||
CREATE INDEX IF NOT EXISTS document_pages_page_index ON document_pages (page)
|
||||
"""
|
||||
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute(batches_sql)
|
||||
cursor.execute(documents_sql)
|
||||
cursor.execute(document_pages_sql)
|
||||
cursor.execute(document_pages_index_sql)
|
||||
|
||||
def record_batches(self, batch_infos: list[BatchInfo]):
|
||||
sql = """
|
||||
|
|
Loading…
Reference in a new issue