浏览代码

Catch corrupt data

Daoud Clarke 3 年之前
父节点
当前提交
09a9390c92
共有 1 个文件被更改,包括 6 次插入2 次删除
  1. 6 2
      mwmbl/tinysearchengine/indexer.py

+ 6 - 2
mwmbl/tinysearchengine/indexer.py

@@ -7,7 +7,7 @@ from mmap import mmap, PROT_READ, PROT_WRITE
 from typing import TypeVar, Generic, Callable, List
 
 import mmh3
-from zstandard import ZstdDecompressor, ZstdCompressor
+from zstandard import ZstdDecompressor, ZstdCompressor, ZstdError
 
 VERSION = 1
 METADATA_CONSTANT = b'mwmbl-tiny-search'
@@ -128,7 +128,11 @@ class TinyIndex(Generic[T]):
 
     def _get_page_tuples(self, i):
         page_data = self.mmap[i * self.page_size:(i + 1) * self.page_size]
-        decompressed_data = self.decompressor.decompress(page_data)
+        try:
+            decompressed_data = self.decompressor.decompress(page_data)
+        except ZstdError:
+            logger.exception(f"Error decompressing page data, content: {page_data}")
+            return []
         # logger.debug(f"Decompressed data: {decompressed_data}")
         return json.loads(decompressed_data.decode('utf8'))