ソースを参照

LibCompress: Use prefix tables to decode Huffman codes up to 8 bits long

Huffman codes have a useful property in that they are prefix codes. That
is, a set of bits representing a Huffman-coded symbol is never a prefix
of another symbol. This allows us to create a table, where each index in
the table are integers whose prefix is the entry's corresponding Huffman
code.

With Deflate, we can have codes up to 16 bits in length, thus creating a
prefix table with 2^16 entries. So instead of creating a table fit all
possible codes, we use a cutoff of 8-bit codes. Codes larger than 8 bits
fall back to the binary search method.

Using the "enwik8" file as a test (100MB uncompressed, commonly used in
benchmarks: https://www.mattmahoney.net/dc/enwik8.zip), decompression
time decreases from 3.527s to 2.585s on Linux.
Timothy Flynn 2 年 前
コミット
5aaefe4e62

+ 53 - 11
Userland/Libraries/LibCompress/Deflate.cpp

@@ -63,14 +63,26 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
             last_non_zero = i;
         }
     }
+
     if (non_zero_symbols == 1) { // special case - only 1 symbol
-        code.m_symbol_codes.append(0b10);
-        code.m_symbol_values.append(last_non_zero);
+        code.m_prefix_table[0] = PrefixTableEntry { static_cast<u16>(last_non_zero), 1u };
+        code.m_prefix_table[1] = code.m_prefix_table[0];
+        code.m_max_prefixed_code_length = 1;
+
         code.m_bit_codes[last_non_zero] = 0;
         code.m_bit_code_lengths[last_non_zero] = 1;
+
         return code;
     }
 
+    struct PrefixCode {
+        u16 symbol_code { 0 };
+        u16 symbol_value { 0 };
+        u16 code_length { 0 };
+    };
+    Array<PrefixCode, 1 << CanonicalCode::max_allowed_prefixed_code_length> prefix_codes;
+    size_t number_of_prefix_codes = 0;
+
     auto next_code = 0;
     for (size_t code_length = 1; code_length <= 15; ++code_length) {
         next_code <<= 1;
@@ -83,8 +95,18 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
             if (next_code > start_bit)
                 return {};
 
-            code.m_symbol_codes.append(start_bit | next_code);
-            code.m_symbol_values.append(symbol);
+            if (code_length <= CanonicalCode::max_allowed_prefixed_code_length) {
+                auto& prefix_code = prefix_codes[number_of_prefix_codes++];
+                prefix_code.symbol_code = next_code;
+                prefix_code.symbol_value = symbol;
+                prefix_code.code_length = code_length;
+
+                code.m_max_prefixed_code_length = code_length;
+            } else {
+                code.m_symbol_codes.append(start_bit | next_code);
+                code.m_symbol_values.append(symbol);
+            }
+
             code.m_bit_codes[symbol] = fast_reverse16(start_bit | next_code, code_length); // DEFLATE writes huffman encoded symbols as lsb-first
             code.m_bit_code_lengths[symbol] = code_length;
 
@@ -96,24 +118,44 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
         return {};
     }
 
+    for (auto [symbol_code, symbol_value, code_length] : prefix_codes) {
+        if (code_length == 0 || code_length > CanonicalCode::max_allowed_prefixed_code_length)
+            break;
+
+        auto shift = code.m_max_prefixed_code_length - code_length;
+        symbol_code <<= shift;
+
+        for (size_t j = 0; j < (1u << shift); ++j) {
+            auto index = fast_reverse16(symbol_code + j, code.m_max_prefixed_code_length);
+            code.m_prefix_table[index] = PrefixTableEntry { symbol_value, code_length };
+        }
+    }
+
     return code;
 }
 
 ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& stream) const
 {
-    u32 code_bits = 1;
+    auto prefix = TRY(stream.peek_bits<size_t>(m_max_prefixed_code_length));
 
-    for (;;) {
-        code_bits = code_bits << 1 | TRY(stream.read_bit());
-        if (code_bits >= (1 << 16))
-            return Error::from_string_literal("Symbol exceeds maximum symbol number");
+    if (auto [symbol_value, code_length] = m_prefix_table[prefix]; code_length != 0) {
+        stream.discard_previously_peeked_bits(code_length);
+        return symbol_value;
+    }
+
+    auto code_bits = TRY(stream.read_bits<u16>(m_max_prefixed_code_length));
+    code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length);
+    code_bits |= 1 << m_max_prefixed_code_length;
 
-        // FIXME: This is very inefficient and could greatly be improved by implementing this
-        //        algorithm: https://www.hanshq.net/zip.html#huffdec
+    for (size_t i = m_max_prefixed_code_length; i < 16; ++i) {
         size_t index;
         if (binary_search(m_symbol_codes.span(), code_bits, &index))
             return m_symbol_values[index];
+
+        code_bits = code_bits << 1 | TRY(stream.read_bit());
     }
+
+    return Error::from_string_literal("Symbol exceeds maximum symbol number");
 }
 
 ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& stream, u32 symbol) const

+ 10 - 0
Userland/Libraries/LibCompress/Deflate.h

@@ -30,10 +30,20 @@ public:
     static Optional<CanonicalCode> from_bytes(ReadonlyBytes);
 
 private:
+    static constexpr size_t max_allowed_prefixed_code_length = 8;
+
+    struct PrefixTableEntry {
+        u16 symbol_value { 0 };
+        u16 code_length { 0 };
+    };
+
     // Decompression - indexed by code
     Vector<u16> m_symbol_codes;
     Vector<u16> m_symbol_values;
 
+    Array<PrefixTableEntry, 1 << max_allowed_prefixed_code_length> m_prefix_table {};
+    size_t m_max_prefixed_code_length { 0 };
+
     // Compression - indexed by symbol
     Array<u16, 288> m_bit_codes {}; // deflate uses a maximum of 288 symbols (maximum of 32 for distances)
     Array<u16, 288> m_bit_code_lengths {};