|
@@ -25,404 +25,332 @@
|
|
*/
|
|
*/
|
|
|
|
|
|
#include <AK/Assertions.h>
|
|
#include <AK/Assertions.h>
|
|
|
|
+#include <AK/BinarySearch.h>
|
|
|
|
+#include <AK/FixedArray.h>
|
|
#include <AK/LogStream.h>
|
|
#include <AK/LogStream.h>
|
|
-#include <AK/Span.h>
|
|
|
|
-#include <AK/Types.h>
|
|
|
|
-#include <AK/Vector.h>
|
|
|
|
|
|
+
|
|
#include <LibCompress/Deflate.h>
|
|
#include <LibCompress/Deflate.h>
|
|
|
|
|
|
namespace Compress {
|
|
namespace Compress {
|
|
|
|
|
|
-bool DeflateStream::read_next_block() const
|
|
|
|
|
|
+// FIXME: This logic needs to go into the deflate decoder somehow, we don't want
|
|
|
|
+// to assert that the input is valid. Instead we need to set m_error on the
|
|
|
|
+// stream.
|
|
|
|
+DeflateDecompressor::CanonicalCode::CanonicalCode(ReadonlyBytes codes)
|
|
{
|
|
{
|
|
- if (m_read_last_block)
|
|
|
|
- return false;
|
|
|
|
|
|
+ // FIXME: I can't quite follow the algorithm here, but it seems to work.
|
|
|
|
|
|
- m_read_last_block = m_reader.read_bits(1);
|
|
|
|
- auto block_type = m_reader.read_bits(2);
|
|
|
|
-
|
|
|
|
- switch (block_type) {
|
|
|
|
- case 0:
|
|
|
|
- decompress_uncompressed_block();
|
|
|
|
- break;
|
|
|
|
- case 1:
|
|
|
|
- decompress_static_block();
|
|
|
|
- break;
|
|
|
|
- case 2:
|
|
|
|
- decompress_dynamic_block();
|
|
|
|
- break;
|
|
|
|
- case 3:
|
|
|
|
- dbg() << "Block contains reserved block type...";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
- break;
|
|
|
|
- default:
|
|
|
|
- dbg() << "Invalid block type was read...";
|
|
|
|
|
|
+ m_symbol_codes.resize(codes.size());
|
|
|
|
+ m_symbol_values.resize(codes.size());
|
|
|
|
+
|
|
|
|
+ auto allocated_symbols_count = 0;
|
|
|
|
+ auto next_code = 0;
|
|
|
|
+
|
|
|
|
+ for (size_t code_length = 1; code_length <= 15; ++code_length) {
|
|
|
|
+ next_code <<= 1;
|
|
|
|
+ auto start_bit = 1 << code_length;
|
|
|
|
+
|
|
|
|
+ for (size_t symbol = 0; symbol < codes.size(); ++symbol) {
|
|
|
|
+ if (codes[symbol] != code_length)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ if (next_code > start_bit) {
|
|
|
|
+ dbg() << "Canonical code overflows the huffman tree";
|
|
|
|
+ ASSERT_NOT_REACHED();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ m_symbol_codes[allocated_symbols_count] = start_bit | next_code;
|
|
|
|
+ m_symbol_values[allocated_symbols_count] = symbol;
|
|
|
|
+
|
|
|
|
+ allocated_symbols_count++;
|
|
|
|
+ next_code++;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (next_code != (1 << 15)) {
|
|
|
|
+ dbg() << "Canonical code underflows the huffman tree " << next_code;
|
|
ASSERT_NOT_REACHED();
|
|
ASSERT_NOT_REACHED();
|
|
- break;
|
|
|
|
}
|
|
}
|
|
|
|
+}
|
|
|
|
|
|
- return true;
|
|
|
|
|
|
+const DeflateDecompressor::CanonicalCode& DeflateDecompressor::CanonicalCode::fixed_literal_codes()
|
|
|
|
+{
|
|
|
|
+ static CanonicalCode* code = nullptr;
|
|
|
|
+
|
|
|
|
+ if (code)
|
|
|
|
+ return *code;
|
|
|
|
+
|
|
|
|
+ FixedArray<u8> data { 288 };
|
|
|
|
+ data.bytes().slice(0, 144 - 0).fill(8);
|
|
|
|
+ data.bytes().slice(144, 256 - 144).fill(9);
|
|
|
|
+ data.bytes().slice(256, 280 - 256).fill(7);
|
|
|
|
+ data.bytes().slice(280, 288 - 280).fill(8);
|
|
|
|
+
|
|
|
|
+ code = new CanonicalCode(data);
|
|
|
|
+ return *code;
|
|
}
|
|
}
|
|
|
|
|
|
-void DeflateStream::decompress_uncompressed_block() const
|
|
|
|
|
|
+const DeflateDecompressor::CanonicalCode& DeflateDecompressor::CanonicalCode::fixed_distance_codes()
|
|
{
|
|
{
|
|
- // Align to the next byte boundary.
|
|
|
|
- while (m_reader.get_bit_byte_offset() != 0) {
|
|
|
|
- m_reader.read();
|
|
|
|
- }
|
|
|
|
|
|
+ static CanonicalCode* code = nullptr;
|
|
|
|
|
|
- auto length = m_reader.read_bits(16) & 0xFFFF;
|
|
|
|
- auto negated_length = m_reader.read_bits(16) & 0xFFFF;
|
|
|
|
|
|
+ if (code)
|
|
|
|
+ return *code;
|
|
|
|
|
|
- if ((length ^ 0xFFFF) != negated_length) {
|
|
|
|
- dbg() << "Block length is invalid...";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
- }
|
|
|
|
|
|
+ FixedArray<u8> data { 32 };
|
|
|
|
+ data.bytes().fill(5);
|
|
|
|
|
|
- for (size_t i = 0; i < length; i++) {
|
|
|
|
- auto byte = m_reader.read_byte();
|
|
|
|
- if (byte < 0) {
|
|
|
|
- dbg() << "Ran out of bytes while reading uncompressed block...";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
- }
|
|
|
|
|
|
+ code = new CanonicalCode(data);
|
|
|
|
+ return *code;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+u32 DeflateDecompressor::CanonicalCode::read_symbol(InputBitStream& stream) const
|
|
|
|
+{
|
|
|
|
+ u32 code_bits = 1;
|
|
|
|
+
|
|
|
|
+ for (;;) {
|
|
|
|
+ code_bits = code_bits << 1 | stream.read_bits(1);
|
|
|
|
|
|
- m_intermediate_stream << byte;
|
|
|
|
|
|
+ size_t index;
|
|
|
|
+ if (AK::binary_search(m_symbol_codes.span(), code_bits, AK::integral_compare<u32>, &index))
|
|
|
|
+ return m_symbol_values[index];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-void DeflateStream::decompress_static_block() const
|
|
|
|
|
|
+DeflateDecompressor::CompressedBlock::CompressedBlock(DeflateDecompressor& decompressor, CanonicalCode literal_codes, Optional<CanonicalCode> distance_codes)
|
|
|
|
+ : m_decompressor(decompressor)
|
|
|
|
+ , m_literal_codes(literal_codes)
|
|
|
|
+ , m_distance_codes(distance_codes)
|
|
{
|
|
{
|
|
- decompress_huffman_block(m_literal_length_codes, &m_fixed_distance_codes);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-void DeflateStream::decompress_dynamic_block() const
|
|
|
|
|
|
+bool DeflateDecompressor::CompressedBlock::try_read_more()
|
|
{
|
|
{
|
|
- auto codes = decode_huffman_codes();
|
|
|
|
- if (codes.size() == 2) {
|
|
|
|
- decompress_huffman_block(codes[0], &codes[1]);
|
|
|
|
|
|
+ if (m_eof == true)
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ const auto symbol = m_literal_codes.read_symbol(m_decompressor.m_input_stream);
|
|
|
|
+
|
|
|
|
+ if (symbol < 256) {
|
|
|
|
+ m_decompressor.m_output_stream << static_cast<u8>(symbol);
|
|
|
|
+ return true;
|
|
|
|
+ } else if (symbol == 256) {
|
|
|
|
+ m_eof = true;
|
|
|
|
+ return false;
|
|
} else {
|
|
} else {
|
|
- decompress_huffman_block(codes[0], nullptr);
|
|
|
|
|
|
+ ASSERT(m_distance_codes.has_value());
|
|
|
|
+
|
|
|
|
+ const auto run_length = m_decompressor.decode_run_length(symbol);
|
|
|
|
+ const auto distance = m_decompressor.decode_distance(m_distance_codes.value().read_symbol(m_decompressor.m_input_stream));
|
|
|
|
+
|
|
|
|
+ auto bytes = m_decompressor.m_output_stream.reserve_contigous_space(run_length);
|
|
|
|
+ m_decompressor.m_output_stream.read(bytes, distance + bytes.size());
|
|
|
|
+
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-void DeflateStream::decompress_huffman_block(CanonicalCode& length_codes, CanonicalCode* distance_codes) const
|
|
|
|
|
|
+DeflateDecompressor::UncompressedBlock::UncompressedBlock(DeflateDecompressor& decompressor, size_t length)
|
|
|
|
+ : m_decompressor(decompressor)
|
|
|
|
+ , m_bytes_remaining(length)
|
|
{
|
|
{
|
|
- for (;;) {
|
|
|
|
- u32 symbol = length_codes.next_symbol(m_reader);
|
|
|
|
-
|
|
|
|
- // End of block.
|
|
|
|
- if (symbol == 256) {
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
|
|
+}
|
|
|
|
|
|
- // literal byte.
|
|
|
|
- if (symbol < 256) {
|
|
|
|
- m_intermediate_stream << static_cast<u8>(symbol);
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
|
|
+bool DeflateDecompressor::UncompressedBlock::try_read_more()
|
|
|
|
+{
|
|
|
|
+ if (m_bytes_remaining == 0)
|
|
|
|
+ return false;
|
|
|
|
|
|
- // Length and distance for copying.
|
|
|
|
- ASSERT(distance_codes);
|
|
|
|
|
|
+ const auto nread = min(m_bytes_remaining, m_decompressor.m_output_stream.remaining_contigous_space());
|
|
|
|
+ m_bytes_remaining -= nread;
|
|
|
|
|
|
- auto run = decode_run_length(symbol);
|
|
|
|
- if (run < 3 || run > 258) {
|
|
|
|
- dbg() << "Invalid run length";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
- }
|
|
|
|
|
|
+ m_decompressor.m_input_stream >> m_decompressor.m_output_stream.reserve_contigous_space(nread);
|
|
|
|
|
|
- auto distance_symbol = distance_codes->next_symbol(m_reader);
|
|
|
|
- auto distance = decode_distance(distance_symbol);
|
|
|
|
- if (distance < 1 || distance > 32768) {
|
|
|
|
- dbg() << "Invalid distance";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
- }
|
|
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
|
|
- copy_from_history(distance, run);
|
|
|
|
- }
|
|
|
|
|
|
+DeflateDecompressor::DeflateDecompressor(InputStream& stream)
|
|
|
|
+ : m_input_stream(stream)
|
|
|
|
+{
|
|
}
|
|
}
|
|
|
|
|
|
-Vector<CanonicalCode> DeflateStream::decode_huffman_codes() const
|
|
|
|
|
|
+DeflateDecompressor::~DeflateDecompressor()
|
|
{
|
|
{
|
|
- // FIXME: This path is not tested.
|
|
|
|
- Vector<CanonicalCode> result;
|
|
|
|
-
|
|
|
|
- auto length_code_count = m_reader.read_bits(5) + 257;
|
|
|
|
- auto distance_code_count = m_reader.read_bits(5) + 1;
|
|
|
|
-
|
|
|
|
- size_t length_code_code_length = m_reader.read_bits(4) + 4;
|
|
|
|
-
|
|
|
|
- Vector<u8> code_length_code_length;
|
|
|
|
- code_length_code_length.resize(19);
|
|
|
|
- code_length_code_length[16] = m_reader.read_bits(3);
|
|
|
|
- code_length_code_length[17] = m_reader.read_bits(3);
|
|
|
|
- code_length_code_length[18] = m_reader.read_bits(3);
|
|
|
|
- code_length_code_length[0] = m_reader.read_bits(3);
|
|
|
|
- for (size_t i = 0; i < length_code_code_length; i++) {
|
|
|
|
- auto index = (i % 2 == 0) ? (8 + (i / 2)) : (7 - (i / 2));
|
|
|
|
- code_length_code_length[index] = m_reader.read_bits(3);
|
|
|
|
- }
|
|
|
|
|
|
+ if (m_state == State::ReadingCompressedBlock)
|
|
|
|
+ m_compressed_block.~CompressedBlock();
|
|
|
|
+ if (m_state == State::ReadingUncompressedBlock)
|
|
|
|
+ m_uncompressed_block.~UncompressedBlock();
|
|
|
|
+}
|
|
|
|
|
|
- auto code_length_code = CanonicalCode(code_length_code_length);
|
|
|
|
|
|
+size_t DeflateDecompressor::read(Bytes bytes)
|
|
|
|
+{
|
|
|
|
+ // FIXME: There are surely a ton of bugs because we don't check for read errors
|
|
|
|
+ // very often.
|
|
|
|
|
|
- Vector<u32> code_lens;
|
|
|
|
- code_lens.resize(length_code_count + distance_code_count);
|
|
|
|
|
|
+ if (m_state == State::Idle) {
|
|
|
|
+ if (m_read_final_bock)
|
|
|
|
+ return 0;
|
|
|
|
|
|
- for (size_t index = 0; index < code_lens.capacity();) {
|
|
|
|
- auto symbol = code_length_code.next_symbol(m_reader);
|
|
|
|
|
|
+ m_read_final_bock = m_input_stream.read_bit();
|
|
|
|
+ const auto block_type = m_input_stream.read_bits(2);
|
|
|
|
|
|
- if (symbol <= 15) {
|
|
|
|
- code_lens[index] = symbol;
|
|
|
|
- index++;
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
|
|
+ if (block_type == 0b00) {
|
|
|
|
+ m_input_stream.align_to_byte_boundary();
|
|
|
|
|
|
- u32 run_length;
|
|
|
|
- u32 run_value = 0;
|
|
|
|
|
|
+ LittleEndian<u16> length, negated_length;
|
|
|
|
+ m_input_stream >> length >> negated_length;
|
|
|
|
|
|
- if (symbol == 16) {
|
|
|
|
- if (index == 0) {
|
|
|
|
- dbg() << "No code length value avaliable";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
|
|
+ if ((length ^ 0xffff) != negated_length) {
|
|
|
|
+ m_error = true;
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
- run_length = m_reader.read_bits(2) + 3;
|
|
|
|
- run_value = code_lens[index - 1];
|
|
|
|
- } else if (symbol == 17) {
|
|
|
|
- run_length = m_reader.read_bits(3) + 3;
|
|
|
|
- } else if (symbol == 18) {
|
|
|
|
- run_length = m_reader.read_bits(7) + 11;
|
|
|
|
- } else {
|
|
|
|
- dbg() << "Code symbol is out of range!";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
- }
|
|
|
|
|
|
+ m_state = State::ReadingUncompressedBlock;
|
|
|
|
+ new (&m_uncompressed_block) UncompressedBlock(*this, length);
|
|
|
|
|
|
- u32 end = index + run_length;
|
|
|
|
- if (end > code_lens.capacity()) {
|
|
|
|
- dbg() << "Code run is out of range!";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
|
|
+ return read(bytes);
|
|
}
|
|
}
|
|
|
|
|
|
- memset(code_lens.data() + index, run_value, run_length);
|
|
|
|
- index = end;
|
|
|
|
- }
|
|
|
|
|
|
+ if (block_type == 0b01) {
|
|
|
|
+ m_state = State::ReadingCompressedBlock;
|
|
|
|
+ new (&m_compressed_block) CompressedBlock(*this, CanonicalCode::fixed_literal_codes(), CanonicalCode::fixed_distance_codes());
|
|
|
|
|
|
- Vector<u8> literal_codes;
|
|
|
|
- literal_codes.resize(length_code_count);
|
|
|
|
- memcpy(literal_codes.data(), code_lens.data(), literal_codes.capacity());
|
|
|
|
- result.append(CanonicalCode(literal_codes));
|
|
|
|
|
|
+ return read(bytes);
|
|
|
|
+ }
|
|
|
|
|
|
- Vector<u8> distance_codes;
|
|
|
|
- distance_codes.resize(distance_code_count);
|
|
|
|
- memcpy(distance_codes.data(), code_lens.data() + length_code_count, distance_codes.capacity());
|
|
|
|
|
|
+ if (block_type == 0b10) {
|
|
|
|
+ CanonicalCode literal_codes, distance_codes;
|
|
|
|
+ decode_codes(literal_codes, distance_codes);
|
|
|
|
+ new (&m_compressed_block) CompressedBlock(*this, literal_codes, distance_codes);
|
|
|
|
|
|
- if (distance_code_count == 1 && distance_codes[0] == 0) {
|
|
|
|
- return result;
|
|
|
|
- }
|
|
|
|
|
|
+ return read(bytes);
|
|
|
|
+ }
|
|
|
|
|
|
- u8 one_count = 0;
|
|
|
|
- u8 other_count = 0;
|
|
|
|
|
|
+ ASSERT_NOT_REACHED();
|
|
|
|
+ }
|
|
|
|
|
|
- for (size_t i = 0; i < distance_codes.capacity(); i++) {
|
|
|
|
- u8 value = distance_codes.at(i);
|
|
|
|
|
|
+ if (m_state == State::ReadingCompressedBlock) {
|
|
|
|
+ auto nread = m_output_stream.read(bytes);
|
|
|
|
|
|
- if (value == 1) {
|
|
|
|
- one_count++;
|
|
|
|
- } else if (value > 1) {
|
|
|
|
- other_count++;
|
|
|
|
|
|
+ while (nread < bytes.size() && m_compressed_block.try_read_more()) {
|
|
|
|
+ nread += m_output_stream.read(bytes.slice(nread));
|
|
}
|
|
}
|
|
- }
|
|
|
|
|
|
|
|
- if (one_count == 1 && other_count == 0) {
|
|
|
|
- distance_codes.resize(32);
|
|
|
|
- distance_codes[31] = 1;
|
|
|
|
- }
|
|
|
|
|
|
+ if (nread == bytes.size())
|
|
|
|
+ return nread;
|
|
|
|
|
|
- result.append(CanonicalCode(distance_codes));
|
|
|
|
- return result;
|
|
|
|
-}
|
|
|
|
|
|
+ m_compressed_block.~CompressedBlock();
|
|
|
|
+ m_state = State::Idle;
|
|
|
|
|
|
-u32 DeflateStream::decode_run_length(u32 symbol) const
|
|
|
|
-{
|
|
|
|
- if (symbol <= 264) {
|
|
|
|
- return symbol - 254;
|
|
|
|
|
|
+ return nread + read(bytes.slice(nread));
|
|
}
|
|
}
|
|
|
|
|
|
- if (symbol <= 284) {
|
|
|
|
- auto extra_bits = (symbol - 261) / 4;
|
|
|
|
- return ((((symbol - 265) % 4) + 4) << extra_bits) + 3 + m_reader.read_bits(extra_bits);
|
|
|
|
- }
|
|
|
|
|
|
+ if (m_state == State::ReadingUncompressedBlock) {
|
|
|
|
+ auto nread = m_output_stream.read(bytes);
|
|
|
|
|
|
- if (symbol == 285) {
|
|
|
|
- return 258;
|
|
|
|
- }
|
|
|
|
|
|
+ while (nread < bytes.size() && m_uncompressed_block.try_read_more()) {
|
|
|
|
+ nread += m_output_stream.read(bytes.slice(nread));
|
|
|
|
+ }
|
|
|
|
|
|
- dbg() << "Found invalid symbol in run length " << symbol;
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
-}
|
|
|
|
|
|
+ if (nread == bytes.size())
|
|
|
|
+ return nread;
|
|
|
|
|
|
-u32 DeflateStream::decode_distance(u32 symbol) const
|
|
|
|
-{
|
|
|
|
- if (symbol <= 3) {
|
|
|
|
- return symbol + 1;
|
|
|
|
- }
|
|
|
|
|
|
+ m_uncompressed_block.~UncompressedBlock();
|
|
|
|
+ m_state = State::Idle;
|
|
|
|
|
|
- if (symbol <= 29) {
|
|
|
|
- auto extra_bits = (symbol / 2) - 1;
|
|
|
|
- return (((symbol % 2) + 2) << extra_bits) + 1 + m_reader.read_bits(extra_bits);
|
|
|
|
|
|
+ return nread + read(bytes.slice(nread));
|
|
}
|
|
}
|
|
|
|
|
|
- dbg() << "Found invalid symbol in distance" << symbol;
|
|
|
|
ASSERT_NOT_REACHED();
|
|
ASSERT_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
|
|
-void DeflateStream::copy_from_history(u32 distance, u32 run) const
|
|
|
|
|
|
+bool DeflateDecompressor::read_or_error(Bytes bytes)
|
|
{
|
|
{
|
|
- for (size_t i = 0; i < run; i++) {
|
|
|
|
- u8 byte;
|
|
|
|
-
|
|
|
|
- // FIXME: In many cases we can read more than one byte at a time, this should
|
|
|
|
- // be refactored into a while loop. Beware, edge case:
|
|
|
|
- //
|
|
|
|
- // // The first four bytes are on the stream already, the other four
|
|
|
|
- // // are written by copy_from_history() itself.
|
|
|
|
- // copy_from_history(4, 8);
|
|
|
|
- m_intermediate_stream.read({ &byte, sizeof(byte) }, m_intermediate_stream.woffset() - distance);
|
|
|
|
- m_intermediate_stream << byte;
|
|
|
|
|
|
+ if (read(bytes) < bytes.size()) {
|
|
|
|
+ m_error = true;
|
|
|
|
+ return false;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
|
|
|
|
-i8 BitStreamReader::read()
|
|
|
|
|
|
+bool DeflateDecompressor::discard_or_error(size_t count)
|
|
{
|
|
{
|
|
- if (m_current_byte == -1) {
|
|
|
|
- return -1;
|
|
|
|
- }
|
|
|
|
|
|
+ u8 buffer[4096];
|
|
|
|
|
|
- if (m_remaining_bits == 0) {
|
|
|
|
- if (m_data_index + 1 > m_data.size())
|
|
|
|
- return -1;
|
|
|
|
|
|
+ size_t ndiscarded = 0;
|
|
|
|
+ while (ndiscarded < count) {
|
|
|
|
+ if (eof()) {
|
|
|
|
+ m_error = true;
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
|
|
- m_current_byte = m_data.at(m_data_index++);
|
|
|
|
- m_remaining_bits = 8;
|
|
|
|
|
|
+ ndiscarded += read({ buffer, min<size_t>(count - ndiscarded, 4096) });
|
|
}
|
|
}
|
|
|
|
|
|
- m_remaining_bits--;
|
|
|
|
- return (m_current_byte >> (7 - m_remaining_bits)) & 1;
|
|
|
|
|
|
+ return true;
|
|
}
|
|
}
|
|
|
|
|
|
-i8 BitStreamReader::read_byte()
|
|
|
|
-{
|
|
|
|
- m_current_byte = 0;
|
|
|
|
- m_remaining_bits = 0;
|
|
|
|
-
|
|
|
|
- if (m_data_index + 1 > m_data.size())
|
|
|
|
- return -1;
|
|
|
|
-
|
|
|
|
- return m_data.at(m_data_index++);
|
|
|
|
-}
|
|
|
|
|
|
+bool DeflateDecompressor::eof() const { return m_state == State::Idle && m_read_final_bock; }
|
|
|
|
|
|
-u8 BitStreamReader::get_bit_byte_offset()
|
|
|
|
|
|
+ByteBuffer DeflateDecompressor::decompress_all(ReadonlyBytes bytes)
|
|
{
|
|
{
|
|
- return (8 - m_remaining_bits) % 8;
|
|
|
|
-}
|
|
|
|
|
|
+ InputMemoryStream memory_stream { bytes };
|
|
|
|
+ InputBitStream bit_stream { memory_stream };
|
|
|
|
+ DeflateDecompressor deflate_stream { bit_stream };
|
|
|
|
|
|
-u32 BitStreamReader::read_bits(u8 count)
|
|
|
|
-{
|
|
|
|
- ASSERT(count > 0 && count < 32);
|
|
|
|
|
|
+ auto buffer = ByteBuffer::create_uninitialized(4096);
|
|
|
|
|
|
- u32 result = 0;
|
|
|
|
- for (size_t i = 0; i < count; i++) {
|
|
|
|
- result |= read() << i;
|
|
|
|
|
|
+ size_t nread = 0;
|
|
|
|
+ while (!deflate_stream.eof()) {
|
|
|
|
+ nread += deflate_stream.read(buffer.bytes().slice(nread));
|
|
|
|
+ if (buffer.size() - nread < 4096)
|
|
|
|
+ buffer.grow(buffer.size() + 4096);
|
|
}
|
|
}
|
|
- return result;
|
|
|
|
-}
|
|
|
|
|
|
|
|
-Vector<u8> DeflateStream::generate_literal_length_codes() const
|
|
|
|
-{
|
|
|
|
- Vector<u8> ll_codes;
|
|
|
|
- ll_codes.resize(288);
|
|
|
|
- memset(ll_codes.data() + 0, 8, 144 - 0);
|
|
|
|
- memset(ll_codes.data() + 144, 9, 256 - 144);
|
|
|
|
- memset(ll_codes.data() + 256, 7, 280 - 256);
|
|
|
|
- memset(ll_codes.data() + 280, 8, 288 - 280);
|
|
|
|
- return ll_codes;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-Vector<u8> DeflateStream::generate_fixed_distance_codes() const
|
|
|
|
-{
|
|
|
|
- Vector<u8> fd_codes;
|
|
|
|
- fd_codes.resize(32);
|
|
|
|
- memset(fd_codes.data(), 5, 32);
|
|
|
|
- return fd_codes;
|
|
|
|
|
|
+ buffer.trim(nread);
|
|
|
|
+ return buffer;
|
|
}
|
|
}
|
|
|
|
|
|
-CanonicalCode::CanonicalCode(Vector<u8> codes)
|
|
|
|
|
|
+u32 DeflateDecompressor::decode_run_length(u32 symbol)
|
|
{
|
|
{
|
|
- m_symbol_codes.resize(codes.size());
|
|
|
|
- m_symbol_values.resize(codes.size());
|
|
|
|
-
|
|
|
|
- auto allocated_symbols_count = 0;
|
|
|
|
- auto next_code = 0;
|
|
|
|
-
|
|
|
|
- for (size_t code_length = 1; code_length <= 15; code_length++) {
|
|
|
|
- next_code <<= 1;
|
|
|
|
- auto start_bit = 1 << code_length;
|
|
|
|
-
|
|
|
|
- for (size_t symbol = 0; symbol < codes.size(); symbol++) {
|
|
|
|
- if (codes.at(symbol) != code_length) {
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
|
|
+ // FIXME: I can't quite follow the algorithm here, but it seems to work.
|
|
|
|
|
|
- if (next_code > start_bit) {
|
|
|
|
- dbg() << "Canonical code overflows the huffman tree";
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- m_symbol_codes[allocated_symbols_count] = start_bit | next_code;
|
|
|
|
- m_symbol_values[allocated_symbols_count] = symbol;
|
|
|
|
|
|
+ if (symbol <= 264)
|
|
|
|
+ return symbol - 254;
|
|
|
|
|
|
- allocated_symbols_count++;
|
|
|
|
- next_code++;
|
|
|
|
- }
|
|
|
|
|
|
+ if (symbol <= 284) {
|
|
|
|
+ auto extra_bits = (symbol - 261) / 4;
|
|
|
|
+ return (((symbol - 265) % 4 + 4) << extra_bits) + 3 + m_input_stream.read_bits(extra_bits);
|
|
}
|
|
}
|
|
|
|
|
|
- if (next_code != (1 << 15)) {
|
|
|
|
- dbg() << "Canonical code underflows the huffman tree " << next_code;
|
|
|
|
- ASSERT_NOT_REACHED();
|
|
|
|
- }
|
|
|
|
|
|
+ if (symbol == 285)
|
|
|
|
+ return 258;
|
|
|
|
+
|
|
|
|
+ ASSERT_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
|
|
-static i32 binary_search(Vector<u32>& heystack, u32 needle)
|
|
|
|
|
|
+u32 DeflateDecompressor::decode_distance(u32 symbol)
|
|
{
|
|
{
|
|
- i32 low = 0;
|
|
|
|
- i32 high = heystack.size();
|
|
|
|
-
|
|
|
|
- while (low <= high) {
|
|
|
|
- u32 mid = (low + high) >> 1;
|
|
|
|
- u32 value = heystack.at(mid);
|
|
|
|
-
|
|
|
|
- if (value < needle) {
|
|
|
|
- low = mid + 1;
|
|
|
|
- } else if (value > needle) {
|
|
|
|
- high = mid - 1;
|
|
|
|
- } else {
|
|
|
|
- return mid;
|
|
|
|
- }
|
|
|
|
|
|
+ // FIXME: I can't quite follow the algorithm here, but it seems to work.
|
|
|
|
+
|
|
|
|
+ if (symbol <= 3)
|
|
|
|
+ return symbol + 1;
|
|
|
|
+
|
|
|
|
+ if (symbol <= 29) {
|
|
|
|
+ auto extra_bits = (symbol / 2) - 1;
|
|
|
|
+ return ((symbol % 2 + 2) << extra_bits) + 1 + m_input_stream.read_bits(extra_bits);
|
|
}
|
|
}
|
|
|
|
|
|
- return -1;
|
|
|
|
|
|
+ ASSERT_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
|
|
-u32 CanonicalCode::next_symbol(BitStreamReader& reader)
|
|
|
|
|
|
+void DeflateDecompressor::decode_codes(CanonicalCode&, CanonicalCode&)
|
|
{
|
|
{
|
|
- auto code_bits = 1;
|
|
|
|
-
|
|
|
|
- for (;;) {
|
|
|
|
- code_bits = code_bits << 1 | reader.read();
|
|
|
|
- i32 index = binary_search(m_symbol_codes, code_bits);
|
|
|
|
- if (index >= 0) {
|
|
|
|
- return m_symbol_values.at(index);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ // FIXME: This was already implemented but I removed it because it was quite chaotic and untested.
|
|
|
|
+ // I am planning to come back to this. @asynts
|
|
|
|
+ // https://github.com/SerenityOS/serenity/blob/208cb995babb13e0af07bb9d3219f0a9fe7bca7d/Libraries/LibCompress/Deflate.cpp#L144-L242
|
|
|
|
+ TODO();
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
}
|