LibCompress: Add an LZW compressor

This commit is contained in:
Lucas CHOLLET 2024-04-26 12:33:10 -04:00 committed by Andrew Kaster
parent ff33fa7e8b
commit 54f33b43c6
Notes: sideshowbarker 2024-07-16 21:34:08 +09:00
3 changed files with 105 additions and 0 deletions

View file

@ -3,6 +3,7 @@ set(TEST_SOURCES
TestDeflate.cpp TestDeflate.cpp
TestGzip.cpp TestGzip.cpp
TestLzma.cpp TestLzma.cpp
TestLzw.cpp
TestPackBits.cpp TestPackBits.cpp
TestXz.cpp TestXz.cpp
TestZlib.cpp TestZlib.cpp

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2024, Lucas Chollet <lucas.chollet@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <AK/Array.h>
#include <LibCompress/Lzw.h>
namespace {
ErrorOr<bool> test_roundtrip_string(StringView input)
{
auto const compressed = TRY(Compress::LzwCompressor::compress_all(input.bytes(), 8));
auto const roundtrip = TRY(Compress::LzwDecompressor<LittleEndianInputBitStream>::decompress_all(compressed, 8));
return roundtrip == input.bytes();
}
}
TEST_CASE(roundtrip_lzw_little_endian_short)
{
EXPECT(TRY_OR_FAIL(test_roundtrip_string("WeWellll"sv)));
}
TEST_CASE(roundtrip_lzw_little_endian_long)
{
// LZW changes the code size after ~512 new symbols, this test case is long enough to trigger that.
constexpr auto input = "WellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,g"sv;
EXPECT(TRY_OR_FAIL(test_roundtrip_string(input)));
}

View file

@ -173,4 +173,75 @@ private:
Vector<u8> m_output {}; Vector<u8> m_output {};
}; };
class LzwCompressor : private Details::LzwState {
public:
static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes bytes, u8 initial_code_size)
{
LzwCompressor compressor { initial_code_size };
AllocatingMemoryStream buffer;
LittleEndianOutputBitStream output_stream { MaybeOwned<Stream>(buffer) };
u16 const clear_code = compressor.add_control_code();
u16 const end_of_data_code = compressor.add_control_code();
TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
u32 last_offset = 0;
while (last_offset < bytes.size()) {
ReadonlyBytes current_symbol {};
u16 current_code {};
if (compressor.m_code_table.size() == max_table_size - 2) {
TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
compressor.reset();
}
bool found_symbol = false;
for (u32 symbol_size = 1; last_offset + symbol_size <= bytes.size(); ++symbol_size) {
current_symbol = bytes.slice(last_offset, symbol_size);
auto const new_code = compressor.code_for_symbol(current_symbol);
if (new_code.has_value()) {
current_code = *new_code;
} else {
found_symbol = true;
break;
}
}
TRY(output_stream.write_bits(current_code, compressor.m_code_size));
if (found_symbol) {
compressor.extend_code_table(Vector(current_symbol));
current_symbol = current_symbol.trim(current_symbol.size() - 1);
}
last_offset += current_symbol.size();
}
TRY(output_stream.write_bits(end_of_data_code, compressor.m_code_size));
TRY(output_stream.align_to_byte_boundary());
TRY(output_stream.flush_buffer_to_stream());
return TRY(buffer.read_until_eof());
}
private:
LzwCompressor(u8 initial_code_size)
: Details::LzwState(initial_code_size, 1)
{
}
Optional<u16> code_for_symbol(ReadonlyBytes bytes)
{
for (u16 i = 0; i < m_code_table.size(); ++i) {
if (m_code_table[i].span() == bytes)
return i;
}
return OptionalNone {};
}
};
} }