2 年前 · 5aaefe4e62
--- a/Userland/Libraries/LibCompress/Deflate.cpp
+++ b/Userland/Libraries/LibCompress/Deflate.cpp
@@ -63,14 +63,26 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
 
				             last_non_zero = i;
			
 
				         }
			
 
				     }
			
 
				+
			
 
				     if (non_zero_symbols == 1) { // special case - only 1 symbol
			
 
				-        code.m_symbol_codes.append(0b10);
			
 
				-        code.m_symbol_values.append(last_non_zero);
			
 
				+        code.m_prefix_table[0] = PrefixTableEntry { static_cast<u16>(last_non_zero), 1u };
			
 
				+        code.m_prefix_table[1] = code.m_prefix_table[0];
			
 
				+        code.m_max_prefixed_code_length = 1;
			
 
				+
			
 
				         code.m_bit_codes[last_non_zero] = 0;
			
 
				         code.m_bit_code_lengths[last_non_zero] = 1;
			
 
				+
			
 
				         return code;
			
 
				     }
			
 
				 
			
 
				+    struct PrefixCode {
			
 
				+        u16 symbol_code { 0 };
			
 
				+        u16 symbol_value { 0 };
			
 
				+        u16 code_length { 0 };
			
 
				+    };
			
 
				+    Array<PrefixCode, 1 << CanonicalCode::max_allowed_prefixed_code_length> prefix_codes;
			
 
				+    size_t number_of_prefix_codes = 0;
			
 
				+
			
 
				     auto next_code = 0;
			
 
				     for (size_t code_length = 1; code_length <= 15; ++code_length) {
			
 
				         next_code <<= 1;
			
@@ -83,8 +95,18 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
 
				             if (next_code > start_bit)
			
 
				                 return {};
			
 
				 
			
 
				-            code.m_symbol_codes.append(start_bit | next_code);
			
 
				-            code.m_symbol_values.append(symbol);
			
 
				+            if (code_length <= CanonicalCode::max_allowed_prefixed_code_length) {
			
 
				+                auto& prefix_code = prefix_codes[number_of_prefix_codes++];
			
 
				+                prefix_code.symbol_code = next_code;
			
 
				+                prefix_code.symbol_value = symbol;
			
 
				+                prefix_code.code_length = code_length;
			
 
				+
			
 
				+                code.m_max_prefixed_code_length = code_length;
			
 
				+            } else {
			
 
				+                code.m_symbol_codes.append(start_bit | next_code);
			
 
				+                code.m_symbol_values.append(symbol);
			
 
				+            }
			
 
				+
			
 
				             code.m_bit_codes[symbol] = fast_reverse16(start_bit | next_code, code_length); // DEFLATE writes huffman encoded symbols as lsb-first
			
 
				             code.m_bit_code_lengths[symbol] = code_length;
			
 
				 
			
@@ -96,24 +118,44 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
 
				         return {};
			
 
				     }
			
 
				 
			
 
				+    for (auto [symbol_code, symbol_value, code_length] : prefix_codes) {
			
 
				+        if (code_length == 0 || code_length > CanonicalCode::max_allowed_prefixed_code_length)
			
 
				+            break;
			
 
				+
			
 
				+        auto shift = code.m_max_prefixed_code_length - code_length;
			
 
				+        symbol_code <<= shift;
			
 
				+
			
 
				+        for (size_t j = 0; j < (1u << shift); ++j) {
			
 
				+            auto index = fast_reverse16(symbol_code + j, code.m_max_prefixed_code_length);
			
 
				+            code.m_prefix_table[index] = PrefixTableEntry { symbol_value, code_length };
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     return code;
			
 
				 }
			
 
				 
			
 
				 ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& stream) const
			
 
				 {
			
 
				-    u32 code_bits = 1;
			
 
				+    auto prefix = TRY(stream.peek_bits<size_t>(m_max_prefixed_code_length));
			
 
				 
			
 
				-    for (;;) {
			
 
				-        code_bits = code_bits << 1 | TRY(stream.read_bit());
			
 
				-        if (code_bits >= (1 << 16))
			
 
				-            return Error::from_string_literal("Symbol exceeds maximum symbol number");
			
 
				+    if (auto [symbol_value, code_length] = m_prefix_table[prefix]; code_length != 0) {
			
 
				+        stream.discard_previously_peeked_bits(code_length);
			
 
				+        return symbol_value;
			
 
				+    }
			
 
				+
			
 
				+    auto code_bits = TRY(stream.read_bits<u16>(m_max_prefixed_code_length));
			
 
				+    code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length);
			
 
				+    code_bits |= 1 << m_max_prefixed_code_length;
			
 
				 
			
 
				-        // FIXME: This is very inefficient and could greatly be improved by implementing this
			
 
				-        //        algorithm: https://www.hanshq.net/zip.html#huffdec
			
 
				+    for (size_t i = m_max_prefixed_code_length; i < 16; ++i) {
			
 
				         size_t index;
			
 
				         if (binary_search(m_symbol_codes.span(), code_bits, &index))
			
 
				             return m_symbol_values[index];
			
 
				+
			
 
				+        code_bits = code_bits << 1 | TRY(stream.read_bit());
			
 
				     }
			
 
				+
			
 
				+    return Error::from_string_literal("Symbol exceeds maximum symbol number");
			
 
				 }
			
 
				 
			
 
				 ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& stream, u32 symbol) const
			
--- a/Userland/Libraries/LibCompress/Deflate.h
+++ b/Userland/Libraries/LibCompress/Deflate.h
@@ -30,10 +30,20 @@ public:
 
				     static Optional<CanonicalCode> from_bytes(ReadonlyBytes);
			
 
				 
			
 
				 private:
			
 
				+    static constexpr size_t max_allowed_prefixed_code_length = 8;
			
 
				+
			
 
				+    struct PrefixTableEntry {
			
 
				+        u16 symbol_value { 0 };
			
 
				+        u16 code_length { 0 };
			
 
				+    };
			
 
				+
			
 
				     // Decompression - indexed by code
			
 
				     Vector<u16> m_symbol_codes;
			
 
				     Vector<u16> m_symbol_values;
			
 
				 
			
 
				+    Array<PrefixTableEntry, 1 << max_allowed_prefixed_code_length> m_prefix_table {};
			
 
				+    size_t m_max_prefixed_code_length { 0 };
			
 
				+
			
 
				     // Compression - indexed by symbol
			
 
				     Array<u16, 288> m_bit_codes {}; // deflate uses a maximum of 288 symbols (maximum of 32 for distances)
			
 
				     Array<u16, 288> m_bit_code_lengths {};