Use a more specific exception in case we're discarding ones we shouldn't
This commit is contained in:
parent
ce9f52267a
commit
a471bc2437
2 changed files with 8 additions and 4 deletions
|
@ -6,7 +6,7 @@ from time import sleep
|
|||
|
||||
from mwmbl.database import Database
|
||||
from mwmbl.indexer.indexdb import IndexDatabase
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
|
||||
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PageError
|
||||
|
||||
|
||||
def run_update(index_path):
|
||||
|
@ -27,7 +27,7 @@ def run_update(index_path):
|
|||
try:
|
||||
indexer.add_to_page(i, documents)
|
||||
break
|
||||
except ValueError:
|
||||
except PageError:
|
||||
documents = documents[:len(documents)//2]
|
||||
if len(documents) == 0:
|
||||
print("No more space")
|
||||
|
|
|
@ -36,6 +36,10 @@ class TokenizedDocument(Document):
|
|||
T = TypeVar('T')
|
||||
|
||||
|
||||
class PageError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TinyIndexMetadata:
|
||||
version: int
|
||||
|
@ -68,7 +72,7 @@ def _get_page_data(compressor, page_size, data):
|
|||
def _pad_to_page_size(data: bytes, page_size: int):
|
||||
page_length = len(data)
|
||||
if page_length > page_size:
|
||||
raise ValueError(f"Data is too big ({page_length}) for page size ({page_size})")
|
||||
raise PageError(f"Data is too big ({page_length}) for page size ({page_size})")
|
||||
padding = b'\x00' * (page_size - page_length)
|
||||
page_data = data + padding
|
||||
return page_data
|
||||
|
@ -142,7 +146,7 @@ class TinyIndex(Generic[T]):
|
|||
page_index = self.get_key_page_index(key)
|
||||
try:
|
||||
self.add_to_page(page_index, [value])
|
||||
except ValueError:
|
||||
except PageError:
|
||||
pass
|
||||
|
||||
def add_to_page(self, page_index: int, values: list[T]):
|
||||
|
|
Loading…
Add table
Reference in a new issue