From a471bc2437ded7aea51b2cef9a7abec4c03db1fb Mon Sep 17 00:00:00 2001
From: Daoud Clarke <daoud.clarke@gmail.com>
Date: Mon, 18 Jul 2022 22:05:24 +0100
Subject: [PATCH] Use a more specific exception in case we're discarding ones
 we shouldn't

---
 mwmbl/indexer/update_pages.py     | 4 ++--
 mwmbl/tinysearchengine/indexer.py | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/mwmbl/indexer/update_pages.py b/mwmbl/indexer/update_pages.py
index a11a216..d3e7afd 100644
--- a/mwmbl/indexer/update_pages.py
+++ b/mwmbl/indexer/update_pages.py
@@ -6,7 +6,7 @@ from time import sleep
 
 from mwmbl.database import Database
 from mwmbl.indexer.indexdb import IndexDatabase
-from mwmbl.tinysearchengine.indexer import TinyIndex, Document
+from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PageError
 
 
 def run_update(index_path):
@@ -27,7 +27,7 @@ def run_update(index_path):
                         try:
                             indexer.add_to_page(i, documents)
                             break
-                        except ValueError:
+                        except PageError:
                             documents = documents[:len(documents)//2]
                             if len(documents) == 0:
                                 print("No more space")
diff --git a/mwmbl/tinysearchengine/indexer.py b/mwmbl/tinysearchengine/indexer.py
index 4c9bfef..a9aa9f4 100644
--- a/mwmbl/tinysearchengine/indexer.py
+++ b/mwmbl/tinysearchengine/indexer.py
@@ -36,6 +36,10 @@ class TokenizedDocument(Document):
 T = TypeVar('T')
 
 
+class PageError(Exception):
+    pass
+
+
 @dataclass
 class TinyIndexMetadata:
     version: int
@@ -68,7 +72,7 @@ def _get_page_data(compressor, page_size, data):
 def _pad_to_page_size(data: bytes, page_size: int):
     page_length = len(data)
     if page_length > page_size:
-        raise ValueError(f"Data is too big ({page_length}) for page size ({page_size})")
+        raise PageError(f"Data is too big ({page_length}) for page size ({page_size})")
     padding = b'\x00' * (page_size - page_length)
     page_data = data + padding
     return page_data
@@ -142,7 +146,7 @@ class TinyIndex(Generic[T]):
         page_index = self.get_key_page_index(key)
         try:
             self.add_to_page(page_index, [value])
-        except ValueError:
+        except PageError:
             pass
 
     def add_to_page(self, page_index: int, values: list[T]):