Bladeren bron

Use a more specific exception in case we're discarding ones we shouldn't

Daoud Clarke 3 jaren geleden
bovenliggende
commit
a471bc2437
2 gewijzigde bestanden met toevoegingen van 8 en 4 verwijderingen
  1. 2 2
      mwmbl/indexer/update_pages.py
  2. 6 2
      mwmbl/tinysearchengine/indexer.py

+ 2 - 2
mwmbl/indexer/update_pages.py

@@ -6,7 +6,7 @@ from time import sleep
 
 
 from mwmbl.database import Database
 from mwmbl.database import Database
 from mwmbl.indexer.indexdb import IndexDatabase
 from mwmbl.indexer.indexdb import IndexDatabase
-from mwmbl.tinysearchengine.indexer import TinyIndex, Document
+from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PageError
 
 
 
 
 def run_update(index_path):
 def run_update(index_path):
@@ -27,7 +27,7 @@ def run_update(index_path):
                         try:
                         try:
                             indexer.add_to_page(i, documents)
                             indexer.add_to_page(i, documents)
                             break
                             break
-                        except ValueError:
+                        except PageError:
                             documents = documents[:len(documents)//2]
                             documents = documents[:len(documents)//2]
                             if len(documents) == 0:
                             if len(documents) == 0:
                                 print("No more space")
                                 print("No more space")

+ 6 - 2
mwmbl/tinysearchengine/indexer.py

@@ -36,6 +36,10 @@ class TokenizedDocument(Document):
 T = TypeVar('T')
 T = TypeVar('T')
 
 
 
 
+class PageError(Exception):
+    pass
+
+
 @dataclass
 @dataclass
 class TinyIndexMetadata:
 class TinyIndexMetadata:
     version: int
     version: int
@@ -68,7 +72,7 @@ def _get_page_data(compressor, page_size, data):
 def _pad_to_page_size(data: bytes, page_size: int):
 def _pad_to_page_size(data: bytes, page_size: int):
     page_length = len(data)
     page_length = len(data)
     if page_length > page_size:
     if page_length > page_size:
-        raise ValueError(f"Data is too big ({page_length}) for page size ({page_size})")
+        raise PageError(f"Data is too big ({page_length}) for page size ({page_size})")
     padding = b'\x00' * (page_size - page_length)
     padding = b'\x00' * (page_size - page_length)
     page_data = data + padding
     page_data = data + padding
     return page_data
     return page_data
@@ -142,7 +146,7 @@ class TinyIndex(Generic[T]):
         page_index = self.get_key_page_index(key)
         page_index = self.get_key_page_index(key)
         try:
         try:
             self.add_to_page(page_index, [value])
             self.add_to_page(page_index, [value])
-        except ValueError:
+        except PageError:
             pass
             pass
 
 
     def add_to_page(self, page_index: int, values: list[T]):
     def add_to_page(self, page_index: int, values: list[T]):