diff --git a/mwmbl/indexer/update_pages.py b/mwmbl/indexer/update_pages.py index a11a216..d3e7afd 100644 --- a/mwmbl/indexer/update_pages.py +++ b/mwmbl/indexer/update_pages.py @@ -6,7 +6,7 @@ from time import sleep from mwmbl.database import Database from mwmbl.indexer.indexdb import IndexDatabase -from mwmbl.tinysearchengine.indexer import TinyIndex, Document +from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PageError def run_update(index_path): @@ -27,7 +27,7 @@ def run_update(index_path): try: indexer.add_to_page(i, documents) break - except ValueError: + except PageError: documents = documents[:len(documents)//2] if len(documents) == 0: print("No more space") diff --git a/mwmbl/tinysearchengine/indexer.py b/mwmbl/tinysearchengine/indexer.py index 4c9bfef..a9aa9f4 100644 --- a/mwmbl/tinysearchengine/indexer.py +++ b/mwmbl/tinysearchengine/indexer.py @@ -36,6 +36,10 @@ class TokenizedDocument(Document): T = TypeVar('T') +class PageError(Exception): + pass + + @dataclass class TinyIndexMetadata: version: int @@ -68,7 +72,7 @@ def _get_page_data(compressor, page_size, data): def _pad_to_page_size(data: bytes, page_size: int): page_length = len(data) if page_length > page_size: - raise ValueError(f"Data is too big ({page_length}) for page size ({page_size})") + raise PageError(f"Data is too big ({page_length}) for page size ({page_size})") padding = b'\x00' * (page_size - page_length) page_data = data + padding return page_data @@ -142,7 +146,7 @@ class TinyIndex(Generic[T]): page_index = self.get_key_page_index(key) try: self.add_to_page(page_index, [value]) - except ValueError: + except PageError: pass def add_to_page(self, page_index: int, values: list[T]):