2 gadi atpakaļ · 85a54cc796
--- a/Userland/Libraries/LibCompress/Lzma.cpp
+++ b/Userland/Libraries/LibCompress/Lzma.cpp
@@ -4,6 +4,8 @@
 
				  * SPDX-License-Identifier: BSD-2-Clause
			
 
				  */
			
 
				 
			
 
				+#include <AK/Debug.h>
			
 
				+#include <AK/IntegralMath.h>
			
 
				 #include <LibCompress/Lzma.h>
			
 
				 
			
 
				 namespace Compress {
			
@@ -29,7 +31,7 @@ Optional<u64> LzmaHeader::uncompressed_size() const
 
				     // "If "Uncompressed size" field contains ones in all 64 bits, it means that
			
 
				     //  uncompressed size is unknown and there is the "end marker" in stream,
			
 
				     //  that indicates the end of decoding point."
			
 
				-    if (uncompressed_size == UINT64_MAX)
			
 
				+    if (uncompressed_size == placeholder_for_unknown_uncompressed_size)
			
 
				         return {};
			
 
				 
			
 
				     // "In opposite case, if the value from "Uncompressed size" field is not
			
@@ -71,6 +73,20 @@ ErrorOr<LzmaModelProperties> LzmaHeader::decode_model_properties(u8 input_bits)
 
				     };
			
 
				 }
			
 
				 
			
 
				+ErrorOr<u8> LzmaHeader::encode_model_properties(LzmaModelProperties const& model_properties)
			
 
				+{
			
 
				+    if (model_properties.literal_context_bits > 8)
			
 
				+        return Error::from_string_literal("LZMA literal context bits are too large to encode");
			
 
				+
			
 
				+    if (model_properties.literal_position_bits > 4)
			
 
				+        return Error::from_string_literal("LZMA literal position bits are too large to encode");
			
 
				+
			
 
				+    if (model_properties.position_bits > 4)
			
 
				+        return Error::from_string_literal("LZMA position bits are too large to encode");
			
 
				+
			
 
				+    return (model_properties.position_bits * 5 + model_properties.literal_position_bits) * 9 + model_properties.literal_context_bits;
			
 
				+}
			
 
				+
			
 
				 ErrorOr<LzmaDecompressorOptions> LzmaHeader::as_decompressor_options() const
			
 
				 {
			
 
				     auto model_properties = TRY(decode_model_properties(encoded_model_properties));
			
@@ -85,6 +101,21 @@ ErrorOr<LzmaDecompressorOptions> LzmaHeader::as_decompressor_options() const
 
				     };
			
 
				 }
			
 
				 
			
 
				+ErrorOr<LzmaHeader> LzmaHeader::from_compressor_options(LzmaCompressorOptions const& options)
			
 
				+{
			
 
				+    auto encoded_model_properties = TRY(encode_model_properties({
			
 
				+        .literal_context_bits = options.literal_context_bits,
			
 
				+        .literal_position_bits = options.literal_position_bits,
			
 
				+        .position_bits = options.position_bits,
			
 
				+    }));
			
 
				+
			
 
				+    return LzmaHeader {
			
 
				+        .encoded_model_properties = encoded_model_properties,
			
 
				+        .unchecked_dictionary_size = options.dictionary_size,
			
 
				+        .encoded_uncompressed_size = options.uncompressed_size.value_or(placeholder_for_unknown_uncompressed_size),
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				 void LzmaState::initialize_to_default_probability(Span<Probability> span)
			
 
				 {
			
 
				     for (auto& entry : span)
			
@@ -218,6 +249,39 @@ ErrorOr<void> LzmaDecompressor::normalize_range_decoder()
 
				     return {};
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::normalize_range_encoder()
			
 
				+{
			
 
				+    u64 const maximum_range_value = m_range_encoder_code + m_range_encoder_range;
			
 
				+
			
 
				+    // If we hit this, we have the potential to overflow into a byte that we already flushed.
			
 
				+    VERIFY((maximum_range_value & ((1ull << m_range_encoder_code_used_bits) - 1)) == maximum_range_value);
			
 
				+
			
 
				+    constexpr u32 minimum_range_value = 1 << 24;
			
 
				+
			
 
				+    if (m_range_encoder_range >= minimum_range_value)
			
 
				+        return {};
			
 
				+
			
 
				+    u64 const flipped_bits = maximum_range_value ^ m_range_encoder_code;
			
 
				+    u64 const size_of_flipped_bits = count_required_bits(flipped_bits);
			
 
				+
			
 
				+    // If we can flush a full byte without impacting future bits, do so.
			
 
				+    while (m_range_encoder_code_used_bits - 8 >= size_of_flipped_bits) {
			
 
				+        u8 const next_byte = (m_range_encoder_code >> (m_range_encoder_code_used_bits - 8));
			
 
				+        m_range_encoder_code -= static_cast<u64>(next_byte) << (m_range_encoder_code_used_bits - 8);
			
 
				+        m_range_encoder_code_used_bits -= 8;
			
 
				+        TRY(m_stream->write_value(next_byte));
			
 
				+    }
			
 
				+
			
 
				+    // Now, shift in a fresh null byte from the bottom.
			
 
				+    m_range_encoder_range <<= 8;
			
 
				+    m_range_encoder_code <<= 8;
			
 
				+    m_range_encoder_code_used_bits += 8;
			
 
				+
			
 
				+    VERIFY(m_range_encoder_range >= minimum_range_value);
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 ErrorOr<u8> LzmaDecompressor::decode_direct_bit()
			
 
				 {
			
 
				     m_range_decoder_range >>= 1;
			
@@ -235,6 +299,18 @@ ErrorOr<u8> LzmaDecompressor::decode_direct_bit()
 
				     return temp + 1;
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::encode_direct_bit(u8 value)
			
 
				+{
			
 
				+    m_range_encoder_range >>= 1;
			
 
				+
			
 
				+    if (value != 0)
			
 
				+        m_range_encoder_code += m_range_encoder_range;
			
 
				+
			
 
				+    TRY(normalize_range_encoder());
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 ErrorOr<u8> LzmaDecompressor::decode_bit_with_probability(Probability& probability)
			
 
				 {
			
 
				     // "The LZMA decoder provides the pointer to CProb variable that contains
			
@@ -260,6 +336,25 @@ ErrorOr<u8> LzmaDecompressor::decode_bit_with_probability(Probability& probabili
 
				     }
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::encode_bit_with_probability(Probability& probability, u8 value)
			
 
				+{
			
 
				+    constexpr size_t probability_shift_width = 5;
			
 
				+
			
 
				+    u32 bound = (m_range_encoder_range >> probability_bit_count) * probability;
			
 
				+
			
 
				+    if (value == 0) {
			
 
				+        probability += ((1 << probability_bit_count) - probability) >> probability_shift_width;
			
 
				+        m_range_encoder_range = bound;
			
 
				+    } else {
			
 
				+        probability -= probability >> probability_shift_width;
			
 
				+        m_range_encoder_code += bound;
			
 
				+        m_range_encoder_range -= bound;
			
 
				+    }
			
 
				+
			
 
				+    TRY(normalize_range_encoder());
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 ErrorOr<u16> LzmaDecompressor::decode_symbol_using_bit_tree(size_t bit_count, Span<Probability> probability_tree)
			
 
				 {
			
 
				     VERIFY(bit_count <= sizeof(u16) * 8);
			
@@ -280,6 +375,27 @@ ErrorOr<u16> LzmaDecompressor::decode_symbol_using_bit_tree(size_t bit_count, Sp
 
				     return result;
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::encode_symbol_using_bit_tree(size_t bit_count, Span<Probability> probability_tree, u16 value)
			
 
				+{
			
 
				+    VERIFY(bit_count <= sizeof(u16) * 8);
			
 
				+    VERIFY(probability_tree.size() >= 1ul << bit_count);
			
 
				+    VERIFY(value <= (1 << bit_count) - 1);
			
 
				+
			
 
				+    // Shift value to make the first sent byte the most significant bit. This makes the shifting logic a lot easier to read.
			
 
				+    value <<= sizeof(u16) * 8 - bit_count;
			
 
				+
			
 
				+    size_t tree_index = 1;
			
 
				+
			
 
				+    for (size_t i = 0; i < bit_count; i++) {
			
 
				+        u8 const next_bit = (value & 0x8000) >> (sizeof(u16) * 8 - 1);
			
 
				+        value <<= 1;
			
 
				+        TRY(encode_bit_with_probability(probability_tree[tree_index], next_bit));
			
 
				+        tree_index = (tree_index << 1) | next_bit;
			
 
				+    }
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 ErrorOr<u16> LzmaDecompressor::decode_symbol_using_reverse_bit_tree(size_t bit_count, Span<Probability> probability_tree)
			
 
				 {
			
 
				     VERIFY(bit_count <= sizeof(u16) * 8);
			
@@ -297,6 +413,24 @@ ErrorOr<u16> LzmaDecompressor::decode_symbol_using_reverse_bit_tree(size_t bit_c
 
				     return result;
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::encode_symbol_using_reverse_bit_tree(size_t bit_count, Span<Probability> probability_tree, u16 value)
			
 
				+{
			
 
				+    VERIFY(bit_count <= sizeof(u16) * 8);
			
 
				+    VERIFY(probability_tree.size() >= 1ul << bit_count);
			
 
				+    VERIFY(value <= (1 << bit_count) - 1);
			
 
				+
			
 
				+    size_t tree_index = 1;
			
 
				+
			
 
				+    for (size_t i = 0; i < bit_count; i++) {
			
 
				+        u8 const next_bit = value & 1;
			
 
				+        value >>= 1;
			
 
				+        TRY(encode_bit_with_probability(probability_tree[tree_index], next_bit));
			
 
				+        tree_index = (tree_index << 1) | next_bit;
			
 
				+    }
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 ErrorOr<void> LzmaDecompressor::decode_literal_to_output_buffer()
			
 
				 {
			
 
				     u8 previous_byte = 0;
			
@@ -353,6 +487,139 @@ ErrorOr<void> LzmaDecompressor::decode_literal_to_output_buffer()
 
				     return {};
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::encode_literal(u8 literal)
			
 
				+{
			
 
				+    // This function largely mirrors `decode_literal_to_output_buffer`, so specification comments have been omitted.
			
 
				+
			
 
				+    TRY(encode_match_type(MatchType::Literal));
			
 
				+
			
 
				+    // Note: We have already read the next byte from the input buffer, so it's now in the seekback buffer, shifting all seekback offsets by one.
			
 
				+    u8 previous_byte = 0;
			
 
				+    if (m_dictionary->seekback_limit() - m_dictionary->used_space() > 1) {
			
 
				+        auto read_bytes = MUST(m_dictionary->read_with_seekback({ &previous_byte, sizeof(previous_byte) }, 2 + m_dictionary->used_space()));
			
 
				+        VERIFY(read_bytes.size() == sizeof(previous_byte));
			
 
				+    }
			
 
				+    u16 const literal_state_bits_from_position = m_total_processed_bytes & ((1 << m_options.literal_position_bits) - 1);
			
 
				+    u16 const literal_state_bits_from_output = previous_byte >> (8 - m_options.literal_context_bits);
			
 
				+    u16 const literal_state = literal_state_bits_from_position << m_options.literal_context_bits | literal_state_bits_from_output;
			
 
				+
			
 
				+    Span<Probability> selected_probability_table = m_literal_probabilities.span().slice(literal_probability_table_size * literal_state, literal_probability_table_size);
			
 
				+
			
 
				+    u16 result = 1;
			
 
				+
			
 
				+    if (m_state >= 7) {
			
 
				+        u8 matched_byte = 0;
			
 
				+        auto read_bytes = TRY(m_dictionary->read_with_seekback({ &matched_byte, sizeof(matched_byte) }, current_repetition_offset() + m_dictionary->used_space() + 1));
			
 
				+        VERIFY(read_bytes.size() == sizeof(matched_byte));
			
 
				+
			
 
				+        do {
			
 
				+            u8 const match_bit = (matched_byte >> 7) & 1;
			
 
				+            matched_byte <<= 1;
			
 
				+
			
 
				+            u8 const encoded_bit = (literal & 0x80) >> 7;
			
 
				+            literal <<= 1;
			
 
				+
			
 
				+            TRY(encode_bit_with_probability(selected_probability_table[((1 + match_bit) << 8) + result], encoded_bit));
			
 
				+            result = result << 1 | encoded_bit;
			
 
				+
			
 
				+            if (match_bit != encoded_bit)
			
 
				+                break;
			
 
				+        } while (result < 0x100);
			
 
				+    }
			
 
				+
			
 
				+    while (result < 0x100) {
			
 
				+        u8 const encoded_bit = (literal & 0x80) >> 7;
			
 
				+        literal <<= 1;
			
 
				+
			
 
				+        TRY(encode_bit_with_probability(selected_probability_table[result], encoded_bit));
			
 
				+
			
 
				+        result = (result << 1) | encoded_bit;
			
 
				+    }
			
 
				+
			
 
				+    m_total_processed_bytes += sizeof(literal);
			
 
				+
			
 
				+    update_state_after_literal();
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				+ErrorOr<void> LzmaCompressor::encode_existing_match(size_t real_distance, size_t real_length)
			
 
				+{
			
 
				+    VERIFY(real_distance >= normalized_to_real_match_distance_offset);
			
 
				+    u32 const normalized_distance = real_distance - normalized_to_real_match_distance_offset;
			
 
				+
			
 
				+    VERIFY(real_length >= normalized_to_real_match_length_offset);
			
 
				+    u16 const normalized_length = real_length - normalized_to_real_match_length_offset;
			
 
				+
			
 
				+    if (normalized_distance == m_rep0) {
			
 
				+        TRY(encode_match_type(MatchType::RepMatch0));
			
 
				+    } else if (normalized_distance == m_rep1) {
			
 
				+        TRY(encode_match_type(MatchType::RepMatch1));
			
 
				+
			
 
				+        u32 const distance = m_rep1;
			
 
				+        m_rep1 = m_rep0;
			
 
				+        m_rep0 = distance;
			
 
				+    } else if (normalized_distance == m_rep2) {
			
 
				+        TRY(encode_match_type(MatchType::RepMatch2));
			
 
				+
			
 
				+        u32 const distance = m_rep2;
			
 
				+        m_rep2 = m_rep1;
			
 
				+        m_rep1 = m_rep0;
			
 
				+        m_rep0 = distance;
			
 
				+    } else if (normalized_distance == m_rep3) {
			
 
				+        TRY(encode_match_type(MatchType::RepMatch3));
			
 
				+
			
 
				+        u32 const distance = m_rep3;
			
 
				+        m_rep3 = m_rep2;
			
 
				+        m_rep2 = m_rep1;
			
 
				+        m_rep1 = m_rep0;
			
 
				+        m_rep0 = distance;
			
 
				+    } else {
			
 
				+        VERIFY_NOT_REACHED();
			
 
				+    }
			
 
				+
			
 
				+    TRY(encode_normalized_match_length(m_rep_length_coder, normalized_length));
			
 
				+    update_state_after_rep();
			
 
				+    MUST(m_dictionary->discard(real_length));
			
 
				+    m_total_processed_bytes += real_length;
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				+ErrorOr<void> LzmaCompressor::encode_new_match(size_t real_distance, size_t real_length)
			
 
				+{
			
 
				+    VERIFY(real_distance >= normalized_to_real_match_distance_offset);
			
 
				+    u32 const normalized_distance = real_distance - normalized_to_real_match_distance_offset;
			
 
				+
			
 
				+    VERIFY(real_length >= normalized_to_real_match_length_offset);
			
 
				+    u16 const normalized_length = real_length - normalized_to_real_match_length_offset;
			
 
				+
			
 
				+    TRY(encode_normalized_simple_match(normalized_distance, normalized_length));
			
 
				+
			
 
				+    MUST(m_dictionary->discard(real_length));
			
 
				+    m_total_processed_bytes += real_length;
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				+ErrorOr<void> LzmaCompressor::encode_normalized_simple_match(u32 normalized_distance, u16 normalized_length)
			
 
				+{
			
 
				+    TRY(encode_match_type(MatchType::SimpleMatch));
			
 
				+
			
 
				+    m_rep3 = m_rep2;
			
 
				+    m_rep2 = m_rep1;
			
 
				+    m_rep1 = m_rep0;
			
 
				+
			
 
				+    TRY(encode_normalized_match_length(m_length_coder, normalized_length));
			
 
				+
			
 
				+    update_state_after_match();
			
 
				+
			
 
				+    TRY(encode_normalized_match_distance(normalized_length, normalized_distance));
			
 
				+    m_rep0 = normalized_distance;
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 LzmaState::LzmaLengthCoderState::LzmaLengthCoderState()
			
 
				 {
			
 
				     for (auto& array : m_low_length_probabilities)
			
@@ -387,6 +654,29 @@ ErrorOr<u16> LzmaDecompressor::decode_normalized_match_length(LzmaLengthCoderSta
 
				     return TRY(decode_symbol_using_bit_tree(8, length_decoder_state.m_high_length_probabilities.span())) + 16;
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::encode_normalized_match_length(LzmaLengthCoderState& length_coder_state, u16 normalized_length)
			
 
				+{
			
 
				+    u16 const position_state = m_total_processed_bytes & ((1 << m_options.position_bits) - 1);
			
 
				+
			
 
				+    if (normalized_length < 8) {
			
 
				+        TRY(encode_bit_with_probability(length_coder_state.m_first_choice_probability, 0));
			
 
				+        TRY(encode_symbol_using_bit_tree(3, length_coder_state.m_low_length_probabilities[position_state].span(), normalized_length));
			
 
				+        return {};
			
 
				+    }
			
 
				+
			
 
				+    TRY(encode_bit_with_probability(length_coder_state.m_first_choice_probability, 1));
			
 
				+
			
 
				+    if (normalized_length < 16) {
			
 
				+        TRY(encode_bit_with_probability(length_coder_state.m_second_choice_probability, 0));
			
 
				+        TRY(encode_symbol_using_bit_tree(3, length_coder_state.m_medium_length_probabilities[position_state].span(), normalized_length - 8));
			
 
				+        return {};
			
 
				+    }
			
 
				+
			
 
				+    TRY(encode_bit_with_probability(length_coder_state.m_second_choice_probability, 1));
			
 
				+    TRY(encode_symbol_using_bit_tree(8, length_coder_state.m_high_length_probabilities.span(), normalized_length - 16));
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 ErrorOr<u32> LzmaDecompressor::decode_normalized_match_distance(u16 normalized_match_length)
			
 
				 {
			
 
				     // "LZMA uses normalized match length (zero-based length)
			
@@ -460,6 +750,51 @@ ErrorOr<u32> LzmaDecompressor::decode_normalized_match_distance(u16 normalized_m
 
				     return (distance_prefix << number_of_alignment_bits) | TRY(decode_symbol_using_reverse_bit_tree(number_of_alignment_bits, m_alignment_bit_probabilities));
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::encode_normalized_match_distance(u16 normalized_match_length, u32 normalized_match_distance)
			
 
				+{
			
 
				+    u16 const length_state = min(normalized_match_length, number_of_length_to_position_states - 1);
			
 
				+
			
 
				+    if (normalized_match_distance < first_position_slot_with_binary_tree_bits) {
			
 
				+        // The normalized distance gets encoded as the position slot.
			
 
				+        TRY(encode_symbol_using_bit_tree(6, m_length_to_position_states[length_state].span(), normalized_match_distance));
			
 
				+        return {};
			
 
				+    }
			
 
				+
			
 
				+    // Note: This has been deduced, there is no immediate relation to the decoding function.
			
 
				+    u16 const distance_log2 = AK::log2(normalized_match_distance);
			
 
				+    u16 number_of_distance_bits = count_required_bits(normalized_match_distance);
			
 
				+    u16 const position_slot = (distance_log2 << 1) + ((normalized_match_distance >> (distance_log2 - 1)) & 1);
			
 
				+
			
 
				+    TRY(encode_symbol_using_bit_tree(6, m_length_to_position_states[length_state].span(), position_slot));
			
 
				+
			
 
				+    // Mask off the top two bits of the value, those are already encoded by the position slot.
			
 
				+    normalized_match_distance &= (1 << (number_of_distance_bits - 2)) - 1;
			
 
				+    number_of_distance_bits -= 2;
			
 
				+
			
 
				+    if (position_slot < first_position_slot_with_direct_encoded_bits) {
			
 
				+        // The value gets encoded using only a reverse bit tree coder.
			
 
				+        auto& selected_probability_tree = m_binary_tree_distance_probabilities[position_slot - first_position_slot_with_binary_tree_bits];
			
 
				+        TRY(encode_symbol_using_reverse_bit_tree(number_of_distance_bits, selected_probability_tree, normalized_match_distance));
			
 
				+        return {};
			
 
				+    }
			
 
				+
			
 
				+    // The value is split into direct bits (everything except the last four bits) and alignment bits (last four bits).
			
 
				+    auto direct_bits = normalized_match_distance & ~((1 << number_of_alignment_bits) - 1);
			
 
				+    auto const alignment_bits = normalized_match_distance & ((1 << number_of_alignment_bits) - 1);
			
 
				+
			
 
				+    // Shift to-be-written direct bits to the most significant position for easier access.
			
 
				+    direct_bits <<= sizeof(direct_bits) * 8 - number_of_distance_bits;
			
 
				+
			
 
				+    for (auto i = 0u; i < number_of_distance_bits - number_of_alignment_bits; i++) {
			
 
				+        TRY(encode_direct_bit((direct_bits & 0x80000000) ? 1 : 0));
			
 
				+        direct_bits <<= 1;
			
 
				+    }
			
 
				+
			
 
				+    TRY(encode_symbol_using_reverse_bit_tree(number_of_alignment_bits, m_alignment_bit_probabilities, alignment_bits));
			
 
				+
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 u32 LzmaState::current_repetition_offset() const
			
 
				 {
			
 
				     // LZMA never needs to read at offset 0 (i.e. the actual read head of the buffer).
			
@@ -554,6 +889,77 @@ ErrorOr<LzmaDecompressor::MatchType> LzmaDecompressor::decode_match_type()
 
				     return MatchType::RepMatch3;
			
 
				 }
			
 
				 
			
 
				+ErrorOr<void> LzmaCompressor::encode_match_type(MatchType match_type)
			
 
				+{
			
 
				+    u16 position_state = m_total_processed_bytes & ((1 << m_options.position_bits) - 1);
			
 
				+    u16 state2 = (m_state << maximum_number_of_position_bits) + position_state;
			
 
				+
			
 
				+    if (match_type == MatchType::Literal) {
			
 
				+        TRY(encode_bit_with_probability(m_is_match_probabilities[state2], 0));
			
 
				+        return {};
			
 
				+    }
			
 
				+    TRY(encode_bit_with_probability(m_is_match_probabilities[state2], 1));
			
 
				+
			
 
				+    if (match_type == MatchType::SimpleMatch) {
			
 
				+        TRY(encode_bit_with_probability(m_is_rep_probabilities[m_state], 0));
			
 
				+        return {};
			
 
				+    }
			
 
				+    TRY(encode_bit_with_probability(m_is_rep_probabilities[m_state], 1));
			
 
				+
			
 
				+    if (match_type == MatchType::ShortRepMatch || match_type == MatchType::RepMatch0) {
			
 
				+        TRY(encode_bit_with_probability(m_is_rep_g0_probabilities[m_state], 0));
			
 
				+        TRY(encode_bit_with_probability(m_is_rep0_long_probabilities[state2], match_type == MatchType::RepMatch0));
			
 
				+        return {};
			
 
				+    }
			
 
				+    TRY(encode_bit_with_probability(m_is_rep_g0_probabilities[m_state], 1));
			
 
				+
			
 
				+    if (match_type == MatchType::RepMatch1) {
			
 
				+        TRY(encode_bit_with_probability(m_is_rep_g1_probabilities[m_state], 0));
			
 
				+        return {};
			
 
				+    }
			
 
				+    TRY(encode_bit_with_probability(m_is_rep_g1_probabilities[m_state], 1));
			
 
				+
			
 
				+    if (match_type == MatchType::RepMatch2) {
			
 
				+        TRY(encode_bit_with_probability(m_is_rep_g2_probabilities[m_state], 0));
			
 
				+        return {};
			
 
				+    }
			
 
				+    TRY(encode_bit_with_probability(m_is_rep_g2_probabilities[m_state], 1));
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				+ErrorOr<void> LzmaCompressor::encode_once()
			
 
				+{
			
 
				+    // Check if any of our existing match distances are currently usable.
			
 
				+    Vector<size_t> const existing_distance_hints {
			
 
				+        m_rep0 + normalized_to_real_match_distance_offset,
			
 
				+        m_rep1 + normalized_to_real_match_distance_offset,
			
 
				+        m_rep2 + normalized_to_real_match_distance_offset,
			
 
				+        m_rep3 + normalized_to_real_match_distance_offset,
			
 
				+    };
			
 
				+    auto existing_distance_results = TRY(m_dictionary->find_copy_in_seekback(m_dictionary->used_space(), normalized_to_real_match_length_offset, existing_distance_hints));
			
 
				+
			
 
				+    if (existing_distance_results.size() > 0) {
			
 
				+        auto selected_match = existing_distance_results[0];
			
 
				+        TRY(encode_existing_match(selected_match.distance, selected_match.length));
			
 
				+        return {};
			
 
				+    }
			
 
				+
			
 
				+    // If we weren't able to find any viable existing offsets, we now have to search the rest of the dictionary for possible new offsets.
			
 
				+    auto new_distance_results = TRY(m_dictionary->find_copy_in_seekback(m_dictionary->used_space(), normalized_to_real_match_length_offset));
			
 
				+
			
 
				+    if (new_distance_results.size() > 0) {
			
 
				+        auto selected_match = new_distance_results[0];
			
 
				+        TRY(encode_new_match(selected_match.distance, selected_match.length));
			
 
				+        return {};
			
 
				+    }
			
 
				+
			
 
				+    // If we weren't able to find any matches, we don't have any other choice than to encode the next byte as a literal.
			
 
				+    u8 next_byte { 0 };
			
 
				+    m_dictionary->read({ &next_byte, sizeof(next_byte) });
			
 
				+    TRY(encode_literal(next_byte));
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				 ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
			
 
				 {
			
 
				     while (m_dictionary->used_space() < bytes.size() && m_dictionary->empty_space() != 0) {
			
@@ -628,7 +1034,7 @@ ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
 
				             // "If the value of "rep0" is equal to 0xFFFFFFFF, it means that we have
			
 
				             //  "End of stream" marker, so we can stop decoding and check finishing
			
 
				             //  condition in Range Decoder"
			
 
				-            if (m_rep0 == 0xFFFFFFFF) {
			
 
				+            if (m_rep0 == end_of_stream_marker) {
			
 
				                 // If we should reject end-of-stream markers, do so now.
			
 
				                 // Note that this is not part of LZMA, as LZMA allows end-of-stream markers in all contexts, so pure LZMA should never set this option.
			
 
				                 if (m_options.reject_end_of_stream_marker)
			
@@ -744,4 +1150,116 @@ void LzmaDecompressor::close()
 
				 {
			
 
				 }
			
 
				 
			
 
				+ErrorOr<NonnullOwnPtr<LzmaCompressor>> LzmaCompressor::create_container(MaybeOwned<Stream> stream, LzmaCompressorOptions const& options)
			
 
				+{
			
 
				+    auto dictionary = TRY(try_make<CircularBuffer>(TRY(CircularBuffer::create_empty(options.dictionary_size + largest_real_match_length))));
			
 
				+
			
 
				+    // "The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where each table contains 0x300 CProb values."
			
 
				+    auto literal_probabilities = TRY(FixedArray<Probability>::create(literal_probability_table_size * (1 << (options.literal_context_bits + options.literal_position_bits))));
			
 
				+
			
 
				+    auto header = TRY(LzmaHeader::from_compressor_options(options));
			
 
				+    TRY(stream->write_value(header));
			
 
				+
			
 
				+    // Note: The reference LZMA implementation has a starting null byte due to how their overflow reservoir is implemented and subsequently wrote it into the specification.
			
 
				+    //       Therefore, we just have to add it manually.
			
 
				+    TRY(stream->write_value<u8>(0x00));
			
 
				+
			
 
				+    auto compressor = TRY(adopt_nonnull_own_or_enomem(new (nothrow) LzmaCompressor(move(stream), options, move(dictionary), move(literal_probabilities))));
			
 
				+
			
 
				+    return compressor;
			
 
				+}
			
 
				+
			
 
				+LzmaCompressor::LzmaCompressor(MaybeOwned<AK::Stream> stream, Compress::LzmaCompressorOptions options, MaybeOwned<CircularBuffer> dictionary, FixedArray<Compress::LzmaState::Probability> literal_probabilities)
			
 
				+    : LzmaState(move(literal_probabilities))
			
 
				+    , m_stream(move(stream))
			
 
				+    , m_options(move(options))
			
 
				+    , m_dictionary(move(dictionary))
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+ErrorOr<Bytes> LzmaCompressor::read_some(Bytes)
			
 
				+{
			
 
				+    return Error::from_errno(EBADF);
			
 
				+}
			
 
				+
			
 
				+ErrorOr<size_t> LzmaCompressor::write_some(ReadonlyBytes bytes)
			
 
				+{
			
 
				+    // Fill the input buffer until it's full or until we can't read any more data.
			
 
				+    size_t processed_bytes = min(bytes.size(), largest_real_match_length - m_dictionary->used_space());
			
 
				+    bytes = bytes.trim(processed_bytes);
			
 
				+
			
 
				+    while (bytes.size() > 0) {
			
 
				+        auto const written_bytes = m_dictionary->write(bytes);
			
 
				+        bytes = bytes.slice(written_bytes);
			
 
				+    }
			
 
				+
			
 
				+    VERIFY(m_dictionary->used_space() <= largest_real_match_length);
			
 
				+
			
 
				+    if (m_options.uncompressed_size.has_value() && m_total_processed_bytes + m_dictionary->used_space() > m_options.uncompressed_size.value())
			
 
				+        return Error::from_string_literal("Tried to compress more LZMA data than announced");
			
 
				+
			
 
				+    TRY(encode_once());
			
 
				+
			
 
				+    // If we read enough data to reach the final uncompressed size, flush automatically.
			
 
				+    // Flushing will handle encoding the remaining data for us and finalize the stream.
			
 
				+    if (m_options.uncompressed_size.has_value() && m_total_processed_bytes + m_dictionary->used_space() >= m_options.uncompressed_size.value())
			
 
				+        TRY(flush());
			
 
				+
			
 
				+    return processed_bytes;
			
 
				+}
			
 
				+
			
 
				+ErrorOr<void> LzmaCompressor::flush()
			
 
				+{
			
 
				+    if (m_has_flushed_data)
			
 
				+        return Error::from_string_literal("Flushed an LZMA stream twice");
			
 
				+
			
 
				+    while (m_dictionary->used_space() > 0)
			
 
				+        TRY(encode_once());
			
 
				+
			
 
				+    if (m_options.uncompressed_size.has_value() && m_total_processed_bytes < m_options.uncompressed_size.value())
			
 
				+        return Error::from_string_literal("Flushing LZMA data with known but unreached uncompressed size");
			
 
				+
			
 
				+    // The LZMA specification technically also allows both a known size and an end-of-stream marker simultaneously,
			
 
				+    // but LZMA2 rejects them, so skip emitting the end-of-stream marker if we know the uncompressed size.
			
 
				+    if (!m_options.uncompressed_size.has_value())
			
 
				+        TRY(encode_normalized_simple_match(end_of_stream_marker, 0));
			
 
				+
			
 
				+    while (m_range_encoder_code_used_bits > 0) {
			
 
				+        VERIFY(m_range_encoder_code_used_bits >= 8);
			
 
				+        u8 const next_byte = (m_range_encoder_code >> (m_range_encoder_code_used_bits - 8));
			
 
				+        m_range_encoder_code -= static_cast<u64>(next_byte) << (m_range_encoder_code_used_bits - 8);
			
 
				+        m_range_encoder_code_used_bits -= 8;
			
 
				+        TRY(m_stream->write_value(next_byte));
			
 
				+    }
			
 
				+
			
 
				+    m_has_flushed_data = true;
			
 
				+    return {};
			
 
				+}
			
 
				+
			
 
				+bool LzmaCompressor::is_eof() const
			
 
				+{
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+bool LzmaCompressor::is_open() const
			
 
				+{
			
 
				+    return !m_has_flushed_data;
			
 
				+}
			
 
				+
			
 
				+void LzmaCompressor::close()
			
 
				+{
			
 
				+    if (!m_has_flushed_data) {
			
 
				+        // Note: We need a better API for specifying things like this.
			
 
				+        flush().release_value_but_fixme_should_propagate_errors();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+LzmaCompressor::~LzmaCompressor()
			
 
				+{
			
 
				+    if (!m_has_flushed_data) {
			
 
				+        // Note: We need a better API for specifying things like this.
			
 
				+        flush().release_value_but_fixme_should_propagate_errors();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 }
			
--- a/Userland/Libraries/LibCompress/Lzma.h
+++ b/Userland/Libraries/LibCompress/Lzma.h
@@ -32,18 +32,31 @@ struct LzmaDecompressorOptions {
 
				     bool reject_end_of_stream_marker { false };
			
 
				 };
			
 
				 
			
 
				+struct LzmaCompressorOptions {
			
 
				+    // Note: The default settings have been chosen based on the default settings of other LZMA compressors.
			
 
				+    u8 literal_context_bits { 3 };
			
 
				+    u8 literal_position_bits { 0 };
			
 
				+    u8 position_bits { 2 };
			
 
				+    u32 dictionary_size { 8 * MiB };
			
 
				+    Optional<u64> uncompressed_size {};
			
 
				+};
			
 
				+
			
 
				 // Described in section "lzma file format".
			
 
				 struct [[gnu::packed]] LzmaHeader {
			
 
				     u32 dictionary_size() const;
			
 
				     Optional<u64> uncompressed_size() const;
			
 
				 
			
 
				     ErrorOr<LzmaDecompressorOptions> as_decompressor_options() const;
			
 
				+    static ErrorOr<LzmaHeader> from_compressor_options(LzmaCompressorOptions const&);
			
 
				 
			
 
				     static ErrorOr<LzmaModelProperties> decode_model_properties(u8 input_bits);
			
 
				+    static ErrorOr<u8> encode_model_properties(LzmaModelProperties const&);
			
 
				 
			
 
				     u8 encoded_model_properties;
			
 
				     u32 unchecked_dictionary_size;
			
 
				     u64 encoded_uncompressed_size;
			
 
				+
			
 
				+    static constexpr u64 placeholder_for_unknown_uncompressed_size = UINT64_MAX;
			
 
				 };
			
 
				 static_assert(sizeof(LzmaHeader) == 13);
			
 
				 
			
@@ -83,6 +96,13 @@ protected:
 
				     static constexpr u16 normalized_to_real_match_length_offset = 2;
			
 
				     static constexpr u32 normalized_to_real_match_distance_offset = 1;
			
 
				 
			
 
				+    // According to the specification, the largest possible normalized match length is provided by the high coder,
			
 
				+    // which processes 8 bits (0 to 255) and adds a displacement of 16 on top.
			
 
				+    // This is the minimum size that our input buffer has to have to not miss any possible repetitions while encoding.
			
 
				+    static constexpr u16 largest_real_match_length = 255 + 16 + normalized_to_real_match_length_offset;
			
 
				+
			
 
				+    static constexpr u32 end_of_stream_marker = 0xFFFFFFFF;
			
 
				+
			
 
				     static constexpr size_t number_of_length_to_position_states = 4;
			
 
				     Array<Array<Probability, (1 << 6)>, number_of_length_to_position_states> m_length_to_position_states;
			
 
				 
			
@@ -185,6 +205,57 @@ private:
 
				     ErrorOr<u32> decode_normalized_match_distance(u16 normalized_match_length);
			
 
				 };
			
 
				 
			
 
				+class LzmaCompressor : public Stream
			
 
				+    , LzmaState {
			
 
				+public:
			
 
				+    /// Creates a compressor for a standalone LZMA container (.lzma file extension, occasionally known as an LZMA 'archive').
			
 
				+    static ErrorOr<NonnullOwnPtr<LzmaCompressor>> create_container(MaybeOwned<Stream>, LzmaCompressorOptions const&);
			
 
				+
			
 
				+    /// Finishes the archive by writing out the remaining data from the range coder.
			
 
				+    ErrorOr<void> flush();
			
 
				+
			
 
				+    virtual ErrorOr<Bytes> read_some(Bytes) override;
			
 
				+    virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
			
 
				+    virtual bool is_eof() const override;
			
 
				+    virtual bool is_open() const override;
			
 
				+    virtual void close() override;
			
 
				+
			
 
				+    virtual ~LzmaCompressor();
			
 
				+
			
 
				+private:
			
 
				+    LzmaCompressor(MaybeOwned<Stream>, LzmaCompressorOptions, MaybeOwned<CircularBuffer>, FixedArray<Probability> literal_probabilities);
			
 
				+
			
 
				+    ErrorOr<void> normalize_range_encoder();
			
 
				+    ErrorOr<void> encode_direct_bit(u8 value);
			
 
				+    ErrorOr<void> encode_bit_with_probability(Probability&, u8 value);
			
 
				+    ErrorOr<void> encode_symbol_using_bit_tree(size_t bit_count, Span<Probability> probability_tree, u16 value);
			
 
				+    ErrorOr<void> encode_symbol_using_reverse_bit_tree(size_t bit_count, Span<Probability> probability_tree, u16 value);
			
 
				+    ErrorOr<void> encode_normalized_match_length(LzmaLengthCoderState&, u16 normalized_length);
			
 
				+    ErrorOr<void> encode_normalized_match_distance(u16 normalized_match_length, u32 normalized_match_distance);
			
 
				+
			
 
				+    ErrorOr<void> encode_match_type(MatchType);
			
 
				+    ErrorOr<void> encode_literal(u8 literal);
			
 
				+    ErrorOr<void> encode_existing_match(size_t real_distance, size_t real_length);
			
 
				+    ErrorOr<void> encode_new_match(size_t real_distance, size_t real_length);
			
 
				+    ErrorOr<void> encode_normalized_simple_match(u32 normalized_distance, u16 normalized_length);
			
 
				+
			
 
				+    ErrorOr<void> encode_once();
			
 
				+
			
 
				+    bool m_has_flushed_data { false };
			
 
				+
			
 
				+    MaybeOwned<Stream> m_stream;
			
 
				+    LzmaCompressorOptions m_options;
			
 
				+
			
 
				+    // This doubles as an input buffer, which is appended at the very front of the buffer.
			
 
				+    // Therefore, the size of this should at least be the dictionary size + the largest possible repetition length.
			
 
				+    MaybeOwned<CircularBuffer> m_dictionary;
			
 
				+
			
 
				+    // Range encoder state.
			
 
				+    u32 m_range_encoder_range { 0xFFFFFFFF };
			
 
				+    u64 m_range_encoder_code { 0 };
			
 
				+    size_t m_range_encoder_code_used_bits { 32 };
			
 
				+};
			
 
				+
			
 
				 }
			
 
				 
			
 
				 template<>