浏览代码

LibCompress: Implement DEFLATE compression

This commit adds a fully functional DEFLATE compression
implementation that can be used to implement compression
for higher level formats like gzip, zlib or zip.

A large part of this commit is based on Hans Wennborg's
great article about the DEFLATE and zip specifications:
https://www.hanshq.net/zip.html
Idan Horowitz 4 年之前
父节点
当前提交
bcbfa7db62

+ 600 - 17
Userland/Libraries/LibCompress/Deflate.cpp

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2020, the SerenityOS developers
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,8 +27,10 @@
 
 #include <AK/Array.h>
 #include <AK/Assertions.h>
+#include <AK/BinaryHeap.h>
 #include <AK/BinarySearch.h>
 #include <AK/MemoryStream.h>
+#include <string.h>
 
 #include <LibCompress/Deflate.h>
 
@@ -41,13 +44,7 @@ const CanonicalCode& CanonicalCode::fixed_literal_codes()
     if (initialized)
         return code;
 
-    Array<u8, 288> data;
-    data.span().slice(0, 144 - 0).fill(8);
-    data.span().slice(144, 256 - 144).fill(9);
-    data.span().slice(256, 280 - 256).fill(7);
-    data.span().slice(280, 288 - 280).fill(8);
-
-    code = CanonicalCode::from_bytes(data).value();
+    code = CanonicalCode::from_bytes(fixed_literal_bit_lengths).value();
     initialized = true;
 
     return code;
@@ -61,10 +58,7 @@ const CanonicalCode& CanonicalCode::fixed_distance_codes()
     if (initialized)
         return code;
 
-    Array<u8, 32> data;
-    data.span().fill(5);
-
-    code = CanonicalCode::from_bytes(data).value();
+    code = CanonicalCode::from_bytes(fixed_distance_bit_lengths).value();
     initialized = true;
 
     return code;
@@ -76,6 +70,22 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
 
     CanonicalCode code;
 
+    auto non_zero_symbols = 0;
+    auto last_non_zero = -1;
+    for (size_t i = 0; i < bytes.size(); i++) {
+        if (bytes[i] != 0) {
+            non_zero_symbols++;
+            last_non_zero = i;
+        }
+    }
+    if (non_zero_symbols == 1) { // special case - only 1 symbol
+        code.m_symbol_codes.append(0b10);
+        code.m_symbol_values.append(last_non_zero);
+        code.m_bit_codes[last_non_zero] = 0;
+        code.m_bit_code_lengths[last_non_zero] = 1;
+        return code;
+    }
+
     auto next_code = 0;
     for (size_t code_length = 1; code_length <= 15; ++code_length) {
         next_code <<= 1;
@@ -90,6 +100,8 @@ Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
 
             code.m_symbol_codes.append(start_bit | next_code);
             code.m_symbol_values.append(symbol);
+            code.m_bit_codes[symbol] = fast_reverse16(start_bit | next_code, code_length); // DEFLATE writes huffman encoded symbols as lsb-first
+            code.m_bit_code_lengths[symbol] = code_length;
 
             next_code++;
         }
@@ -118,6 +130,11 @@ u32 CanonicalCode::read_symbol(InputBitStream& stream) const
     }
 }
 
+void CanonicalCode::write_symbol(OutputBitStream& stream, u32 symbol) const
+{
+    stream.write_bits(m_bit_codes[symbol], m_bit_code_lengths[symbol]);
+}
+
 DeflateDecompressor::CompressedBlock::CompressedBlock(DeflateDecompressor& decompressor, CanonicalCode literal_codes, Optional<CanonicalCode> distance_codes)
     : m_decompressor(decompressor)
     , m_literal_codes(literal_codes)
@@ -366,8 +383,7 @@ void DeflateDecompressor::decode_codes(CanonicalCode& literal_code, Optional<Can
 
     u8 code_lengths_code_lengths[19] = { 0 };
     for (size_t i = 0; i < code_length_count; ++i) {
-        static const size_t indices[] { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
-        code_lengths_code_lengths[indices[i]] = m_input_stream.read_bits(3);
+        code_lengths_code_lengths[code_lengths_code_lengths_order[i]] = m_input_stream.read_bits(3);
     }
 
     // Now we can extract the code that was used to encode the code lengths of the code that was used to
@@ -386,21 +402,21 @@ void DeflateDecompressor::decode_codes(CanonicalCode& literal_code, Optional<Can
     while (code_lengths.size() < literal_code_count + distance_code_count) {
         auto symbol = code_length_code.read_symbol(m_input_stream);
 
-        if (symbol <= 15) {
+        if (symbol < DeflateSpecialCodeLengths::COPY) {
             code_lengths.append(static_cast<u8>(symbol));
             continue;
-        } else if (symbol == 17) {
+        } else if (symbol == DeflateSpecialCodeLengths::ZEROS) {
             auto nrepeat = 3 + m_input_stream.read_bits(3);
             for (size_t j = 0; j < nrepeat; ++j)
                 code_lengths.append(0);
             continue;
-        } else if (symbol == 18) {
+        } else if (symbol == DeflateSpecialCodeLengths::LONG_ZEROS) {
             auto nrepeat = 11 + m_input_stream.read_bits(7);
             for (size_t j = 0; j < nrepeat; ++j)
                 code_lengths.append(0);
             continue;
         } else {
-            VERIFY(symbol == 16);
+            VERIFY(symbol == DeflateSpecialCodeLengths::COPY);
 
             if (code_lengths.is_empty()) {
                 set_fatal_error();
@@ -448,4 +464,571 @@ void DeflateDecompressor::decode_codes(CanonicalCode& literal_code, Optional<Can
     distance_code = distance_code_result.value();
 }
 
+DeflateCompressor::DeflateCompressor(OutputStream& stream, CompressionLevel compression_level)
+    : m_compression_level(compression_level)
+    , m_compression_constants(compression_constants[static_cast<int>(m_compression_level)])
+    , m_output_stream(stream)
+{
+    m_symbol_frequencies.fill(0);
+    m_distance_frequencies.fill(0);
+}
+
+DeflateCompressor::~DeflateCompressor()
+{
+    VERIFY(m_finished);
+}
+
+size_t DeflateCompressor::write(ReadonlyBytes bytes)
+{
+    VERIFY(!m_finished);
+
+    if (bytes.size() == 0)
+        return 0; // recursion base case
+
+    auto n_written = bytes.copy_trimmed_to(pending_block().slice(m_pending_block_size));
+    m_pending_block_size += n_written;
+
+    if (m_pending_block_size == block_size)
+        flush();
+
+    return n_written + write(bytes.slice(n_written));
+}
+
+bool DeflateCompressor::write_or_error(ReadonlyBytes bytes)
+{
+    if (write(bytes) < bytes.size()) {
+        set_fatal_error();
+        return false;
+    }
+
+    return true;
+}
+
+// Knuth's multiplicative hash on 4 bytes
+u16 DeflateCompressor::hash_sequence(const u8* bytes)
+{
+    constexpr const u32 knuth_constant = 2654435761; // shares no common factors with 2^32
+    return ((bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24) * knuth_constant) >> (32 - hash_bits);
+}
+
+size_t DeflateCompressor::compare_match_candidate(size_t start, size_t candidate, size_t previous_match_length, size_t maximum_match_length)
+{
+    VERIFY(previous_match_length < maximum_match_length);
+
+    // We firstly check that the match is at least (prev_match_length + 1) long, we check backwards as theres a higher chance the end mismatches
+    for (ssize_t i = previous_match_length; i >= 0; i--) {
+        if (m_rolling_window[start + i] != m_rolling_window[candidate + i])
+            return 0;
+    }
+
+    // Find the actual length
+    auto match_length = previous_match_length + 1;
+    while (match_length < maximum_match_length && m_rolling_window[start + match_length] == m_rolling_window[candidate + match_length]) {
+        match_length++;
+    }
+
+    VERIFY(match_length > previous_match_length);
+    VERIFY(match_length <= maximum_match_length);
+    return match_length;
+}
+
+size_t DeflateCompressor::find_back_match(size_t start, u16 hash, size_t previous_match_length, size_t maximum_match_length, size_t& match_position)
+{
+    auto max_chain_length = m_compression_constants.max_chain;
+    if (previous_match_length == 0)
+        previous_match_length = min_match_length - 1; // we only care about matches that are at least min_match_length long
+    if (previous_match_length >= maximum_match_length)
+        return 0; // we cant improve a maximum length match
+    if (previous_match_length >= m_compression_constants.max_lazy_length)
+        return 0; // the previous match is already pretty, we shouldn't waste another full search
+    if (previous_match_length >= m_compression_constants.good_match_length)
+        max_chain_length /= 4; // we already have a pretty good much, so do a shorter search
+
+    auto candidate = m_hash_head[hash];
+    auto match_found = false;
+    while (max_chain_length--) {
+        if (candidate == empty_slot)
+            break; // no remaining candidates
+
+        VERIFY(candidate < start);
+        if (start - candidate > window_size)
+            break; // outside the window
+
+        auto match_length = compare_match_candidate(start, candidate, previous_match_length, maximum_match_length);
+
+        if (match_length != 0) {
+            match_found = true;
+            match_position = candidate;
+            previous_match_length = match_length;
+
+            if (match_length == maximum_match_length)
+                return match_length; // bail if we got the maximum possible length
+        }
+
+        candidate = m_hash_prev[candidate % window_size];
+    }
+    if (!match_found)
+        return 0;                 // we didnt find any matches
+    return previous_match_length; // we found matches, but they were at most previous_match_length long
+}
+
+ALWAYS_INLINE u8 DeflateCompressor::distance_to_base(u16 distance)
+{
+    return (distance <= 256) ? distance_to_base_lo[distance - 1] : distance_to_base_hi[(distance - 1) >> 7];
+}
+
+template<size_t Size>
+void DeflateCompressor::generate_huffman_lengths(Array<u8, Size>& lengths, const Array<u16, Size>& frequencies, size_t max_bit_length)
+{
+    VERIFY((1u << max_bit_length) >= Size);
+    u16 heap_keys[Size]; // Used for O(n) heap construction
+    u16 heap_values[Size];
+
+    u16 huffman_links[Size * 2 + 1] = { 0 };
+    u16 frequency_cap = UINT16_MAX;
+try_again:
+    size_t non_zero_freqs = 0;
+    for (size_t i = 0; i < Size; i++) {
+        auto frequency = frequencies[i];
+        if (frequency == 0)
+            continue;
+
+        if (frequency > frequency_cap) {
+            frequency = frequency_cap;
+        }
+
+        heap_keys[non_zero_freqs] = frequency;               // sort symbols by frequency
+        heap_values[non_zero_freqs] = Size + non_zero_freqs; // huffman_links "links"
+        non_zero_freqs++;
+    }
+
+    // special case for only 1 used symbol
+    if (non_zero_freqs < 2) {
+        for (size_t i = 0; i < Size; i++)
+            lengths[i] = (frequencies[i] == 0) ? 0 : 1;
+        return;
+    }
+
+    BinaryHeap<u16, u16, Size> heap { heap_keys, heap_values, non_zero_freqs };
+
+    // build the huffman tree - binary heap is used for efficient frequency comparisons
+    while (heap.size() > 1) {
+        u16 lowest_frequency = heap.peek_min_key();
+        u16 lowest_link = heap.pop_min();
+        u16 second_lowest_frequency = heap.peek_min_key();
+        u16 second_lowest_link = heap.pop_min();
+
+        u16 new_link = heap.size() + 2;
+
+        heap.insert(lowest_frequency + second_lowest_frequency, new_link);
+
+        huffman_links[lowest_link] = new_link;
+        huffman_links[second_lowest_link] = new_link;
+    }
+
+    non_zero_freqs = 0;
+    for (size_t i = 0; i < Size; i++) {
+        if (frequencies[i] == 0) {
+            lengths[i] = 0;
+            continue;
+        }
+
+        u16 link = huffman_links[Size + non_zero_freqs];
+        non_zero_freqs++;
+
+        size_t bit_length = 1;
+        while (link != 2) {
+            bit_length++;
+            link = huffman_links[link];
+        }
+
+        if (bit_length > max_bit_length) {
+            VERIFY(frequency_cap != 1);
+            frequency_cap /= 2;
+            goto try_again; // FIXME: gotos are ugly, but i cant think of a good way to flatten this
+        }
+
+        lengths[i] = bit_length;
+    }
+}
+
+void DeflateCompressor::lz77_compress_block()
+{
+    for (auto& slot : m_hash_head) { // initialize chained hash table
+        slot = empty_slot;
+    }
+
+    auto insert_hash = [&](auto pos, auto hash) {
+        auto window_pos = pos % window_size;
+        m_hash_prev[window_pos] = m_hash_head[hash];
+        m_hash_head[hash] = window_pos;
+    };
+
+    auto emit_literal = [&](auto literal) {
+        VERIFY(m_pending_symbol_size <= block_size + 1);
+        auto index = m_pending_symbol_size++;
+        m_symbol_buffer[index].distance = 0;
+        m_symbol_buffer[index].literal = literal;
+        m_symbol_frequencies[literal]++;
+    };
+
+    auto emit_back_reference = [&](auto distance, auto length) {
+        VERIFY(m_pending_symbol_size <= block_size + 1);
+        auto index = m_pending_symbol_size++;
+        m_symbol_buffer[index].distance = distance;
+        m_symbol_buffer[index].length = length;
+        m_symbol_frequencies[length_to_symbol[length]]++;
+        m_distance_frequencies[distance_to_base(distance)]++;
+    };
+
+    size_t previous_match_length = 0;
+    size_t previous_match_position = 0;
+
+    VERIFY(m_compression_constants.great_match_length <= max_match_length);
+
+    // our block starts at block_size and is m_pending_block_size in length
+    auto block_end = block_size + m_pending_block_size;
+    size_t current_position;
+    for (current_position = block_size; current_position < block_end - min_match_length + 1; current_position++) {
+        auto hash = hash_sequence(&m_rolling_window[current_position]);
+        size_t match_position;
+        auto match_length = find_back_match(current_position, hash, previous_match_length,
+            min(m_compression_constants.great_match_length, block_end - current_position), match_position);
+
+        insert_hash(current_position, hash);
+
+        // if the previous match is as good as the new match, just use it
+        if (previous_match_length != 0 && previous_match_length >= match_length) {
+            emit_back_reference((current_position - 1) - previous_match_position, previous_match_length);
+
+            // skip all the bytes that are included in this match
+            for (size_t j = current_position + 1; j < min(current_position - 1 + previous_match_length, block_end - min_match_length + 1); j++) {
+                insert_hash(j, hash_sequence(&m_rolling_window[j]));
+            }
+            current_position = (current_position - 1) + previous_match_length - 1;
+            previous_match_length = 0;
+            continue;
+        }
+
+        if (match_length == 0) {
+            VERIFY(previous_match_length == 0);
+            emit_literal(m_rolling_window[current_position]);
+            continue;
+        }
+
+        // if this is a lazy match, and the new match is better than the old one, output previous as literal
+        if (previous_match_length != 0) {
+            emit_literal(m_rolling_window[current_position - 1]);
+        }
+
+        previous_match_length = match_length;
+        previous_match_position = match_position;
+    }
+
+    // clean up leftover lazy match
+    if (previous_match_length != 0) {
+        emit_back_reference((current_position - 1) - previous_match_position, previous_match_length);
+        current_position = (current_position - 1) + previous_match_length;
+    }
+
+    // output remaining literals
+    while (current_position < block_end) {
+        emit_literal(m_rolling_window[current_position++]);
+    }
+}
+
+size_t DeflateCompressor::huffman_block_length(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths)
+{
+    size_t length = 0;
+
+    for (size_t i = 0; i < 286; i++) {
+        auto frequency = m_symbol_frequencies[i];
+        length += literal_bit_lengths[i] * frequency;
+
+        if (i >= 257) // back reference length symbols
+            length += packed_length_symbols[i - 257].extra_bits * frequency;
+    }
+
+    for (size_t i = 0; i < 30; i++) {
+        auto frequency = m_distance_frequencies[i];
+        length += distance_bit_lengths[i] * frequency;
+        length += packed_distances[i].extra_bits * frequency;
+    }
+
+    return length;
+}
+
+size_t DeflateCompressor::uncompressed_block_length()
+{
+    auto padding = 8 - ((m_output_stream.bit_offset() + 3) % 8);
+    // 3 bit block header + align to byte + 2 * 16 bit length fields + block contents
+    return 3 + padding + (2 * 16) + m_pending_block_size * 8;
+}
+
+size_t DeflateCompressor::fixed_block_length()
+{
+    // block header + fixed huffman encoded block contents
+    return 3 + huffman_block_length(fixed_literal_bit_lengths, fixed_distance_bit_lengths);
+}
+
+size_t DeflateCompressor::dynamic_block_length(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths, const Array<u8, 19>& code_lengths_bit_lengths, const Array<u16, 19>& code_lengths_frequencies, size_t code_lengths_count)
+{
+    // block header + literal code count + distance code count + code length count
+    auto length = 3 + 5 + 5 + 4;
+
+    // 3 bits per code_length
+    length += 3 * code_lengths_count;
+
+    for (size_t i = 0; i < code_lengths_frequencies.size(); i++) {
+        auto frequency = code_lengths_frequencies[i];
+        length += code_lengths_bit_lengths[i] * frequency;
+
+        if (i == DeflateSpecialCodeLengths::COPY) {
+            length += 2 * frequency;
+        } else if (i == DeflateSpecialCodeLengths::ZEROS) {
+            length += 3 * frequency;
+        } else if (i == DeflateSpecialCodeLengths::LONG_ZEROS) {
+            length += 7 * frequency;
+        }
+    }
+
+    return length + huffman_block_length(literal_bit_lengths, distance_bit_lengths);
+}
+
+void DeflateCompressor::write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code)
+{
+    for (size_t i = 0; i < m_pending_symbol_size; i++) {
+        if (m_symbol_buffer[i].distance == 0) {
+            literal_code.write_symbol(m_output_stream, m_symbol_buffer[i].literal);
+            continue;
+        }
+
+        auto symbol = length_to_symbol[m_symbol_buffer[i].length];
+        literal_code.write_symbol(m_output_stream, symbol);
+        // Emit extra bits if needed
+        m_output_stream.write_bits(m_symbol_buffer[i].length - packed_length_symbols[symbol - 257].base_length, packed_length_symbols[symbol - 257].extra_bits);
+
+        auto base_distance = distance_to_base(m_symbol_buffer[i].distance);
+        distance_code.write_symbol(m_output_stream, base_distance);
+        // Emit extra bits if needed
+        m_output_stream.write_bits(m_symbol_buffer[i].distance - packed_distances[base_distance].base_distance, packed_distances[base_distance].extra_bits);
+    }
+}
+
+size_t DeflateCompressor::encode_huffman_lengths(const Array<u8, max_huffman_literals + max_huffman_distances>& lengths, size_t lengths_count, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths)
+{
+    size_t encoded_count = 0;
+    size_t i = 0;
+    while (i < lengths_count) {
+        if (lengths[i] == 0) {
+            auto zero_count = 0;
+            for (size_t j = i; j < min(lengths_count, i + 138) && lengths[j] == 0; j++)
+                zero_count++;
+
+            if (zero_count < 3) { // below minimum repeated zero count
+                encoded_lengths[encoded_count++].symbol = 0;
+                i++;
+                continue;
+            }
+
+            if (zero_count <= 10) {
+                encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::ZEROS;
+                encoded_lengths[encoded_count++].count = zero_count;
+            } else {
+                encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::LONG_ZEROS;
+                encoded_lengths[encoded_count++].count = zero_count;
+            }
+            i += zero_count;
+            continue;
+        }
+
+        encoded_lengths[encoded_count++].symbol = lengths[i++];
+
+        auto copy_count = 0;
+        for (size_t j = i; j < min(lengths_count, i + 6) && lengths[j] == lengths[i - 1]; j++)
+            copy_count++;
+
+        if (copy_count >= 3) {
+            encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::COPY;
+            encoded_lengths[encoded_count++].count = copy_count;
+            i += copy_count;
+            continue;
+        }
+    }
+    return encoded_count;
+}
+
+size_t DeflateCompressor::encode_block_lengths(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count)
+{
+    literal_code_count = max_huffman_literals;
+    distance_code_count = max_huffman_distances;
+
+    VERIFY(literal_bit_lengths[256] != 0); // Make sure at least the EndOfBlock marker is present
+    while (literal_bit_lengths[literal_code_count - 1] == 0)
+        literal_code_count--;
+
+    // Drop trailing zero lengths, keeping at least one
+    while (distance_bit_lengths[distance_code_count - 1] == 0 && distance_code_count > 1)
+        distance_code_count--;
+
+    Array<u8, max_huffman_literals + max_huffman_distances> all_lengths {};
+    size_t lengths_count = 0;
+    for (size_t i = 0; i < literal_code_count; i++) {
+        all_lengths[lengths_count++] = literal_bit_lengths[i];
+    }
+    for (size_t i = 0; i < distance_code_count; i++) {
+        all_lengths[lengths_count++] = distance_bit_lengths[i];
+    }
+
+    return encode_huffman_lengths(all_lengths, lengths_count, encoded_lengths);
+}
+
+void DeflateCompressor::write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count)
+{
+    m_output_stream.write_bits(literal_code_count - 257, 5);
+    m_output_stream.write_bits(distance_code_count - 1, 5);
+    m_output_stream.write_bits(code_length_count - 4, 4);
+
+    for (size_t i = 0; i < code_length_count; i++) {
+        m_output_stream.write_bits(code_lengths_bit_lengths[code_lengths_code_lengths_order[i]], 3);
+    }
+
+    auto code_lengths_code = CanonicalCode::from_bytes(code_lengths_bit_lengths);
+    VERIFY(code_lengths_code.has_value());
+    for (size_t i = 0; i < encoded_lengths_count; i++) {
+        auto encoded_length = encoded_lengths[i];
+        code_lengths_code->write_symbol(m_output_stream, encoded_length.symbol);
+        if (encoded_length.symbol == DeflateSpecialCodeLengths::COPY) {
+            m_output_stream.write_bits(encoded_length.count - 3, 2);
+        } else if (encoded_length.symbol == DeflateSpecialCodeLengths::ZEROS) {
+            m_output_stream.write_bits(encoded_length.count - 3, 3);
+        } else if (encoded_length.symbol == DeflateSpecialCodeLengths::LONG_ZEROS) {
+            m_output_stream.write_bits(encoded_length.count - 11, 7);
+        }
+    }
+
+    write_huffman(literal_code, distance_code);
+}
+
+void DeflateCompressor::flush()
+{
+    if (m_output_stream.handle_any_error()) {
+        set_fatal_error();
+        return;
+    }
+
+    m_output_stream.write_bit(m_finished);
+
+    // if this is just an empty block to signify the end of the deflate stream use the smallest block possible (10 bits total)
+    if (m_pending_block_size == 0) {
+        VERIFY(m_finished);                       // we shouldn't be writing empty blocks unless this is the final one
+        m_output_stream.write_bits(0b01, 2);      // fixed huffman codes
+        m_output_stream.write_bits(0b0000000, 7); // end of block symbol
+        m_output_stream.align_to_byte_boundary();
+        return;
+    }
+
+    auto write_uncompressed = [&]() {
+        m_output_stream.write_bits(0b00, 2); // no compression
+        m_output_stream.align_to_byte_boundary();
+        LittleEndian<u16> len = m_pending_block_size;
+        m_output_stream << len;
+        LittleEndian<u16> nlen = ~m_pending_block_size;
+        m_output_stream << nlen;
+        m_output_stream.write_or_error(pending_block().slice(0, m_pending_block_size));
+    };
+
+    if (m_compression_level == CompressionLevel::STORE) { // disabled compression fast path
+        write_uncompressed();
+        m_pending_block_size = 0;
+        return;
+    }
+
+    // The following implementation of lz77 compression and huffman encoding is based on the reference implementation by Hans Wennborg https://www.hanshq.net/zip.html
+
+    // this reads from the pending block and writes to m_symbol_buffer
+    lz77_compress_block();
+
+    // insert EndOfBlock marker to the symbol buffer
+    m_symbol_buffer[m_pending_symbol_size].distance = 0;
+    m_symbol_buffer[m_pending_symbol_size++].literal = 256;
+    m_symbol_frequencies[256]++;
+
+    // generate optimal dynamic huffman code lengths
+    Array<u8, max_huffman_literals> dynamic_literal_bit_lengths {};
+    Array<u8, max_huffman_distances> dynamic_distance_bit_lengths {};
+    generate_huffman_lengths(dynamic_literal_bit_lengths, m_symbol_frequencies, 15); // deflate data huffman can use up to 15 bits per symbol
+    generate_huffman_lengths(dynamic_distance_bit_lengths, m_distance_frequencies, 15);
+
+    // encode literal and distance lengths together in deflate format
+    Array<code_length_symbol, max_huffman_literals + max_huffman_distances> encoded_lengths {};
+    size_t literal_code_count;
+    size_t distance_code_count;
+    auto encoded_lengths_count = encode_block_lengths(dynamic_literal_bit_lengths, dynamic_distance_bit_lengths, encoded_lengths, literal_code_count, distance_code_count);
+
+    // count code length frequencies
+    Array<u16, 19> code_lengths_frequencies { 0 };
+    for (size_t i = 0; i < encoded_lengths_count; i++) {
+        code_lengths_frequencies[encoded_lengths[i].symbol]++;
+    }
+    // generate optimal huffman code lengths code lengths
+    Array<u8, 19> code_lengths_bit_lengths {};
+    generate_huffman_lengths(code_lengths_bit_lengths, code_lengths_frequencies, 7); // deflate code length huffman can use up to 7 bits per symbol
+    // calculate actual code length code lengths count (without trailing zeros)
+    auto code_lengths_count = code_lengths_bit_lengths.size();
+    while (code_lengths_bit_lengths[code_lengths_code_lengths_order[code_lengths_count - 1]] == 0)
+        code_lengths_count--;
+
+    auto uncompressed_size = uncompressed_block_length();
+    auto fixed_huffman_size = fixed_block_length();
+    auto dynamic_huffman_size = dynamic_block_length(dynamic_literal_bit_lengths, dynamic_distance_bit_lengths, code_lengths_bit_lengths, code_lengths_frequencies, code_lengths_count);
+
+    // If the compression somehow didnt reduce the size enough, just write out the block uncompressed as it allows for much faster decompression
+    if (uncompressed_size <= min(fixed_huffman_size, dynamic_huffman_size)) {
+        write_uncompressed();
+    } else if (fixed_huffman_size <= dynamic_huffman_size) { // If the fixed and dynamic huffman codes come out the same size, prefer the fixed version, as it takes less time to decode
+        m_output_stream.write_bits(0b01, 2);                 // fixed huffman codes
+        write_huffman(CanonicalCode::fixed_literal_codes(), CanonicalCode::fixed_distance_codes());
+    } else {
+        m_output_stream.write_bits(0b10, 2); // dynamic huffman codes
+        auto literal_code = CanonicalCode::from_bytes(dynamic_literal_bit_lengths);
+        VERIFY(literal_code.has_value());
+        auto distance_code = CanonicalCode::from_bytes(dynamic_distance_bit_lengths);
+        VERIFY(distance_code.has_value());
+        write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code.value(), distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count);
+    }
+    if (m_finished)
+        m_output_stream.align_to_byte_boundary();
+
+    // reset all block specific members
+    m_pending_block_size = 0;
+    m_pending_symbol_size = 0;
+    m_symbol_frequencies.fill(0);
+    m_distance_frequencies.fill(0);
+    // On the final block this copy will potentially produce an invalid search window, but since its the final block we dont care
+    pending_block().copy_trimmed_to({ m_rolling_window, block_size });
+}
+
+void DeflateCompressor::final_flush()
+{
+    VERIFY(!m_finished);
+    m_finished = true;
+    flush();
+}
+
+Optional<ByteBuffer> DeflateCompressor::compress_all(const ReadonlyBytes& bytes, CompressionLevel compression_level)
+{
+    DuplexMemoryStream output_stream;
+    DeflateCompressor deflate_stream { output_stream, compression_level };
+
+    deflate_stream.write_or_error(bytes);
+
+    deflate_stream.final_flush();
+
+    if (deflate_stream.handle_any_error())
+        return {};
+
+    return output_stream.copy_into_contiguous_buffer();
+}
+
 }

+ 111 - 1
Userland/Libraries/LibCompress/Deflate.h

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2020, the SerenityOS developers
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,6 +32,7 @@
 #include <AK/CircularDuplexStream.h>
 #include <AK/Endian.h>
 #include <AK/Vector.h>
+#include <LibCompress/DeflateTables.h>
 
 namespace Compress {
 
@@ -38,6 +40,7 @@ class CanonicalCode {
 public:
     CanonicalCode() = default;
     u32 read_symbol(InputBitStream&) const;
+    void write_symbol(OutputBitStream&, u32) const;
 
     static const CanonicalCode& fixed_literal_codes();
     static const CanonicalCode& fixed_distance_codes();
@@ -45,8 +48,13 @@ public:
     static Optional<CanonicalCode> from_bytes(ReadonlyBytes);
 
 private:
+    // Decompression - indexed by code
     Vector<u32> m_symbol_codes;
     Vector<u32> m_symbol_values;
+
+    // Compression - indexed by symbol
+    Array<u32, 288> m_bit_codes {}; // deflate uses a maximum of 288 symbols (maximum of 32 for distances)
+    Array<u32, 288> m_bit_code_lengths {};
 };
 
 class DeflateDecompressor final : public InputStream {
@@ -111,7 +119,109 @@ private:
     };
 
     InputBitStream m_input_stream;
-    CircularDuplexStream<32 * 1024> m_output_stream;
+    CircularDuplexStream<32 * KiB> m_output_stream;
+};
+
+enum DeflateSpecialCodeLengths : u32 {
+    COPY = 16,
+    ZEROS = 17,
+    LONG_ZEROS = 18
+};
+
+class DeflateCompressor final : public OutputStream {
+public:
+    static constexpr size_t block_size = 32 * KiB - 1; // TODO: this can theoretically be increased to 64 KiB - 2
+    static constexpr size_t window_size = block_size * 2;
+    static constexpr size_t hash_bits = 15;
+    static constexpr size_t max_huffman_literals = 288;
+    static constexpr size_t max_huffman_distances = 32;
+    static constexpr size_t min_match_length = 4;   // matches smaller than these are not worth the size of the back reference
+    static constexpr size_t max_match_length = 258; // matches longer than these cannot be encoded using huffman codes
+    static constexpr u16 empty_slot = UINT16_MAX;
+
+    struct CompressionConstants {
+        size_t good_match_length;  // Once we find a match of at least this length (a good enough match) we reduce max_chain to lower processing time
+        size_t max_lazy_length;    // If the match is at least this long we dont defer matching to the next byte (which takes time) as its good enough
+        size_t great_match_length; // Once we find a match of at least this length (a great match) we can just stop searching for longer ones
+        size_t max_chain;          // We only check the actual length of the max_chain closest matches
+    };
+
+    // These constants were shamelessly "borrowed" from zlib
+    static constexpr CompressionConstants compression_constants[] = {
+        { 0, 0, 0, 0 },
+        { 4, 4, 8, 4 },
+        { 8, 16, 128, 128 },
+        { 32, 258, 258, 4096 },
+        { max_match_length, max_match_length, max_match_length, 1 << hash_bits } // disable all limits
+    };
+
+    enum class CompressionLevel : int {
+        STORE = 0,
+        FAST,
+        GOOD,
+        GREAT,
+        BEST // WARNING: this one can take an unreasonable amount of time!
+    };
+
+    DeflateCompressor(OutputStream&, CompressionLevel = CompressionLevel::GOOD);
+    ~DeflateCompressor();
+
+    size_t write(ReadonlyBytes) override;
+    bool write_or_error(ReadonlyBytes) override;
+    void final_flush();
+
+    static Optional<ByteBuffer> compress_all(const ReadonlyBytes& bytes, CompressionLevel = CompressionLevel::GOOD);
+
+private:
+    Bytes pending_block() { return { m_rolling_window + block_size, block_size }; }
+
+    // LZ77 Compression
+    static u16 hash_sequence(const u8* bytes);
+    size_t compare_match_candidate(size_t start, size_t candidate, size_t prev_match_length, size_t max_match_length);
+    size_t find_back_match(size_t start, u16 hash, size_t previous_match_length, size_t max_match_length, size_t& match_position);
+    void lz77_compress_block();
+
+    // Huffman Coding
+    struct code_length_symbol {
+        u8 symbol;
+        u8 count; // used for special symbols 16-18
+    };
+    static u8 distance_to_base(u16 distance);
+    template<size_t Size>
+    static void generate_huffman_lengths(Array<u8, Size>& lengths, const Array<u16, Size>& frequencies, size_t max_bit_length);
+    size_t huffman_block_length(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths);
+    void write_huffman(const CanonicalCode& literal_code, const CanonicalCode& distance_code);
+    static size_t encode_huffman_lengths(const Array<u8, max_huffman_literals + max_huffman_distances>& lengths, size_t lengths_count, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths);
+    size_t encode_block_lengths(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count);
+    void write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const CanonicalCode& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count);
+
+    size_t uncompressed_block_length();
+    size_t fixed_block_length();
+    size_t dynamic_block_length(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths, const Array<u8, 19>& code_lengths_bit_lengths, const Array<u16, 19>& code_lengths_frequencies, size_t code_lengths_count);
+    void flush();
+
+    bool m_finished { false };
+    CompressionLevel m_compression_level;
+    CompressionConstants m_compression_constants;
+    OutputBitStream m_output_stream;
+
+    u8 m_rolling_window[window_size];
+    size_t m_pending_block_size { 0 };
+
+    struct [[gnu::packed]] {
+        u16 distance; // back reference length
+        union {
+            u16 literal; // literal byte or on of block symbol
+            u16 length;  // back reference length (if distance != 0)
+        };
+    } m_symbol_buffer[block_size + 1];
+    size_t m_pending_symbol_size { 0 };
+    Array<u16, max_huffman_literals> m_symbol_frequencies;    // there are 286 valid symbol values (symbols 286-287 never occur)
+    Array<u16, max_huffman_distances> m_distance_frequencies; // there are 30 valid distance values (distances 30-31 never occur)
+
+    // LZ77 Chained hash table
+    u16 m_hash_head[1 << hash_bits];
+    u16 m_hash_prev[window_size];
 };
 
 }

+ 210 - 0
Userland/Libraries/LibCompress/DeflateTables.h

@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+namespace Compress {
+
+// RFC 1951 - 3.2.5
+static constexpr struct {
+    u16 symbol;
+    u16 base_length;
+    u16 extra_bits;
+} packed_length_symbols[29] = {
+    { 257, 3, 0 },
+    { 258, 4, 0 },
+    { 259, 5, 0 },
+    { 260, 6, 0 },
+    { 261, 7, 0 },
+    { 262, 8, 0 },
+    { 263, 9, 0 },
+    { 264, 10, 0 },
+    { 265, 11, 1 },
+    { 266, 13, 1 },
+    { 267, 15, 1 },
+    { 268, 17, 1 },
+    { 269, 19, 2 },
+    { 270, 23, 2 },
+    { 271, 27, 2 },
+    { 272, 31, 2 },
+    { 273, 35, 3 },
+    { 274, 43, 3 },
+    { 275, 51, 3 },
+    { 276, 59, 3 },
+    { 277, 67, 4 },
+    { 278, 83, 4 },
+    { 279, 99, 4 },
+    { 280, 115, 4 },
+    { 281, 131, 5 },
+    { 282, 163, 5 },
+    { 283, 195, 5 },
+    { 284, 227, 5 },
+    { 285, 258, 0 }
+};
+
+// RFC 1951 - 3.2.5
+static constexpr struct {
+    u16 symbol;
+    u16 base_distance;
+    u16 extra_bits;
+} packed_distances[31] = {
+    { 0, 1, 0 },
+    { 1, 2, 0 },
+    { 2, 3, 0 },
+    { 3, 4, 0 },
+    { 4, 5, 1 },
+    { 5, 7, 1 },
+    { 6, 9, 2 },
+    { 7, 13, 2 },
+    { 8, 17, 3 },
+    { 9, 25, 3 },
+    { 10, 33, 4 },
+    { 11, 49, 4 },
+    { 12, 65, 5 },
+    { 13, 97, 5 },
+    { 14, 129, 6 },
+    { 15, 193, 6 },
+    { 16, 257, 7 },
+    { 17, 385, 7 },
+    { 18, 513, 8 },
+    { 19, 769, 8 },
+    { 20, 1025, 9 },
+    { 21, 1537, 9 },
+    { 22, 2049, 10 },
+    { 23, 3073, 10 },
+    { 24, 4097, 11 },
+    { 25, 6145, 11 },
+    { 26, 8193, 12 },
+    { 27, 12289, 12 },
+    { 28, 16385, 13 },
+    { 29, 24577, 13 },
+    { 30, 32 * KiB + 1, 0 }, // signifies end
+};
+
+// RFC 1951 - 3.2.6
+static constexpr struct {
+    u16 base_value;
+    u16 bits;
+} fixed_literal_bits[5] = {
+    { 0, 8 },
+    { 144, 9 },
+    { 256, 7 },
+    { 280, 8 },
+    { 288, 0 } // signifies end
+};
+
+// RFC 1951 - 3.2.7
+static constexpr size_t code_lengths_code_lengths_order[] { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+
+static consteval Array<u16, 259> generate_length_to_symbol()
+{
+    Array<u16, 259> array = { UINT16_MAX, UINT16_MAX, UINT16_MAX }; // there are 256 valid lengths (3-258) + 3 invalid lengths (0-2)
+    size_t base_length = 0;
+    for (size_t len = 3; len < 259; len++) {
+        if (len == packed_length_symbols[base_length + 1].base_length)
+            base_length++;
+        array[len] = packed_length_symbols[base_length].symbol;
+    }
+    return array;
+};
+static constexpr auto length_to_symbol = generate_length_to_symbol();
+
+static consteval Array<u16, 256> generate_distance_to_base_lo()
+{
+    Array<u16, 256> array;
+    size_t base_distance = 0;
+    for (size_t dist = 1; dist <= 256; dist++) {
+        if (dist == packed_distances[base_distance + 1].base_distance)
+            base_distance++;
+        array[dist - 1] = packed_distances[base_distance].symbol;
+    }
+    return array;
+};
+static constexpr auto distance_to_base_lo = generate_distance_to_base_lo();
+static consteval Array<u16, 256> generate_distance_to_base_hi()
+{
+    Array<u16, 256> array = { UINT16_MAX, UINT16_MAX };
+    size_t base_distance = 16;
+    for (size_t dist = 257; dist <= 32 * KiB; dist++) {
+        if (dist == packed_distances[base_distance + 1].base_distance)
+            base_distance++;
+        array[(dist - 1) >> 7] = packed_distances[base_distance].symbol;
+    }
+    return array;
+};
+static constexpr auto distance_to_base_hi = generate_distance_to_base_hi();
+
+static consteval Array<u8, 288> generate_fixed_literal_bit_lengths()
+{
+    Array<u8, 288> array;
+    for (size_t i = 0; i < 4; i++) {
+        array.span().slice(fixed_literal_bits[i].base_value, fixed_literal_bits[i + 1].base_value - fixed_literal_bits[i].base_value).fill(fixed_literal_bits[i].bits);
+    }
+    return array;
+};
+static constexpr auto fixed_literal_bit_lengths = generate_fixed_literal_bit_lengths();
+
+static consteval Array<u8, 32> generate_fixed_distance_bit_lengths()
+{
+    Array<u8, 32> array;
+    array.fill(5);
+    return array;
+};
+static constexpr auto fixed_distance_bit_lengths = generate_fixed_distance_bit_lengths();
+
+static consteval u8 reverse8(u8 value)
+{
+    u8 result = 0;
+    for (size_t i = 0; i < 8; i++) {
+        if (value & (1 << i))
+            result |= 1 << (7 - i);
+    }
+    return result;
+}
+static consteval Array<u8, UINT8_MAX + 1> generate_reverse8_lookup_table()
+{
+    Array<u8, UINT8_MAX + 1> array;
+    for (size_t i = 0; i <= UINT8_MAX; i++) {
+        array[i] = reverse8(i);
+    }
+    return array;
+}
+static constexpr auto reverse8_lookup_table = generate_reverse8_lookup_table();
+
+// Lookup-table based bit swap
+ALWAYS_INLINE static u16 fast_reverse16(u16 value, size_t bits)
+{
+    VERIFY(bits <= 16);
+
+    u16 lo = value & 0xff;
+    u16 hi = value >> 8;
+
+    u16 reversed = (u16)((reverse8_lookup_table[lo] << 8) | reverse8_lookup_table[hi]);
+
+    return reversed >> (16 - bits);
+}
+
+}