Pārlūkot izejas kodu

LibCompress: Handle literal only lz77 streams in DeflateCompressor

Very incompressible data could sometimes produce no backreferences
which would result in no distance huffman code being created (as it
was not needed), so VERIFY the code exists only if it is actually
needed for writing the stream.
Idan Horowitz 4 gadi atpakaļ
vecāks
revīzija
7e587a615e

+ 6 - 6
Userland/Libraries/LibCompress/Deflate.cpp

@@ -792,21 +792,22 @@ size_t DeflateCompressor::dynamic_block_length(const Array<u8, max_huffman_liter
     return length + huffman_block_length(literal_bit_lengths, distance_bit_lengths);
 }
 
-void DeflateCompressor::write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code)
+void DeflateCompressor::write_huffman(const CanonicalCode& literal_code, const Optional<CanonicalCode>& distance_code)
 {
+    auto has_distances = distance_code.has_value();
     for (size_t i = 0; i < m_pending_symbol_size; i++) {
         if (m_symbol_buffer[i].distance == 0) {
             literal_code.write_symbol(m_output_stream, m_symbol_buffer[i].literal);
             continue;
         }
-
+        VERIFY(has_distances);
         auto symbol = length_to_symbol[m_symbol_buffer[i].length];
         literal_code.write_symbol(m_output_stream, symbol);
         // Emit extra bits if needed
         m_output_stream.write_bits(m_symbol_buffer[i].length - packed_length_symbols[symbol - 257].base_length, packed_length_symbols[symbol - 257].extra_bits);
 
         auto base_distance = distance_to_base(m_symbol_buffer[i].distance);
-        distance_code.write_symbol(m_output_stream, base_distance);
+        distance_code.value().write_symbol(m_output_stream, base_distance);
         // Emit extra bits if needed
         m_output_stream.write_bits(m_symbol_buffer[i].distance - packed_distances[base_distance].base_distance, packed_distances[base_distance].extra_bits);
     }
@@ -880,7 +881,7 @@ size_t DeflateCompressor::encode_block_lengths(const Array<u8, max_huffman_liter
     return encode_huffman_lengths(all_lengths, lengths_count, encoded_lengths);
 }
 
-void DeflateCompressor::write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count)
+void DeflateCompressor::write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const Optional<CanonicalCode>& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count)
 {
     m_output_stream.write_bits(literal_code_count - 257, 5);
     m_output_stream.write_bits(distance_code_count - 1, 5);
@@ -991,8 +992,7 @@ void DeflateCompressor::flush()
         auto literal_code = CanonicalCode::from_bytes(dynamic_literal_bit_lengths);
         VERIFY(literal_code.has_value());
         auto distance_code = CanonicalCode::from_bytes(dynamic_distance_bit_lengths);
-        VERIFY(distance_code.has_value());
-        write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code.value(), distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count);
+        write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code, distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count);
     }
     if (m_finished)
         m_output_stream.align_to_byte_boundary();

+ 2 - 2
Userland/Libraries/LibCompress/Deflate.h

@@ -190,10 +190,10 @@ private:
     template<size_t Size>
     static void generate_huffman_lengths(Array<u8, Size>& lengths, const Array<u16, Size>& frequencies, size_t max_bit_length, u16 frequency_cap = UINT16_MAX);
     size_t huffman_block_length(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths);
-    void write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code);
+    void write_huffman(const CanonicalCode& literal_code, const Optional<CanonicalCode>& distance_code);
     static size_t encode_huffman_lengths(const Array<u8, max_huffman_literals + max_huffman_distances>& lengths, size_t lengths_count, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths);
     size_t encode_block_lengths(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count);
-    void write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count);
+    void write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const Optional<CanonicalCode>& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count);
 
     size_t uncompressed_block_length();
     size_t fixed_block_length();

+ 8 - 0
Userland/Utilities/test-compress.cpp

@@ -168,6 +168,14 @@ TEST_CASE(deflate_round_trip_compress_large)
     EXPECT(uncompressed.value() == original);
 }
 
+TEST_CASE(deflate_compress_literals)
+{
+    // This byte array is known to not produce any back references with our lz77 implementation even at the highest compression settings
+    Array<u8, 0x13> test { 0, 0, 0, 0, 0x72, 0, 0, 0xee, 0, 0, 0, 0x26, 0, 0, 0, 0x28, 0, 0, 0x72 };
+    auto compressed = Compress::DeflateCompressor::compress_all(test, Compress::DeflateCompressor::CompressionLevel::GOOD);
+    EXPECT(compressed.has_value());
+}
+
 TEST_CASE(zlib_decompress_simple)
 {
     const Array<u8, 40> compressed {