mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
LibCompress: Add an LZW compressor
This commit is contained in:
parent
ff33fa7e8b
commit
54f33b43c6
Notes:
sideshowbarker
2024-07-16 21:34:08 +09:00
Author: https://github.com/LucasChollet Commit: https://github.com/SerenityOS/serenity/commit/54f33b43c6 Pull-request: https://github.com/SerenityOS/serenity/pull/24191 Reviewed-by: https://github.com/nico ✅ Reviewed-by: https://github.com/timschumi
3 changed files with 105 additions and 0 deletions
|
@ -3,6 +3,7 @@ set(TEST_SOURCES
|
|||
TestDeflate.cpp
|
||||
TestGzip.cpp
|
||||
TestLzma.cpp
|
||||
TestLzw.cpp
|
||||
TestPackBits.cpp
|
||||
TestXz.cpp
|
||||
TestZlib.cpp
|
||||
|
|
33
Tests/LibCompress/TestLzw.cpp
Normal file
33
Tests/LibCompress/TestLzw.cpp
Normal file
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Lucas Chollet <lucas.chollet@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibTest/TestCase.h>
|
||||
|
||||
#include <AK/Array.h>
|
||||
#include <LibCompress/Lzw.h>
|
||||
|
||||
namespace {
|
||||
|
||||
ErrorOr<bool> test_roundtrip_string(StringView input)
|
||||
{
|
||||
auto const compressed = TRY(Compress::LzwCompressor::compress_all(input.bytes(), 8));
|
||||
auto const roundtrip = TRY(Compress::LzwDecompressor<LittleEndianInputBitStream>::decompress_all(compressed, 8));
|
||||
return roundtrip == input.bytes();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TEST_CASE(roundtrip_lzw_little_endian_short)
|
||||
{
|
||||
EXPECT(TRY_OR_FAIL(test_roundtrip_string("WeWellll"sv)));
|
||||
}
|
||||
|
||||
TEST_CASE(roundtrip_lzw_little_endian_long)
|
||||
{
|
||||
// LZW changes the code size after ~512 new symbols, this test case is long enough to trigger that.
|
||||
constexpr auto input = "WellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,g"sv;
|
||||
EXPECT(TRY_OR_FAIL(test_roundtrip_string(input)));
|
||||
}
|
|
@ -173,4 +173,75 @@ private:
|
|||
Vector<u8> m_output {};
|
||||
};
|
||||
|
||||
class LzwCompressor : private Details::LzwState {
|
||||
public:
|
||||
static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes bytes, u8 initial_code_size)
|
||||
{
|
||||
LzwCompressor compressor { initial_code_size };
|
||||
AllocatingMemoryStream buffer;
|
||||
LittleEndianOutputBitStream output_stream { MaybeOwned<Stream>(buffer) };
|
||||
|
||||
u16 const clear_code = compressor.add_control_code();
|
||||
u16 const end_of_data_code = compressor.add_control_code();
|
||||
|
||||
TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
|
||||
|
||||
u32 last_offset = 0;
|
||||
|
||||
while (last_offset < bytes.size()) {
|
||||
ReadonlyBytes current_symbol {};
|
||||
u16 current_code {};
|
||||
|
||||
if (compressor.m_code_table.size() == max_table_size - 2) {
|
||||
TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
|
||||
compressor.reset();
|
||||
}
|
||||
|
||||
bool found_symbol = false;
|
||||
|
||||
for (u32 symbol_size = 1; last_offset + symbol_size <= bytes.size(); ++symbol_size) {
|
||||
current_symbol = bytes.slice(last_offset, symbol_size);
|
||||
auto const new_code = compressor.code_for_symbol(current_symbol);
|
||||
|
||||
if (new_code.has_value()) {
|
||||
current_code = *new_code;
|
||||
} else {
|
||||
found_symbol = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TRY(output_stream.write_bits(current_code, compressor.m_code_size));
|
||||
|
||||
if (found_symbol) {
|
||||
compressor.extend_code_table(Vector(current_symbol));
|
||||
current_symbol = current_symbol.trim(current_symbol.size() - 1);
|
||||
}
|
||||
last_offset += current_symbol.size();
|
||||
}
|
||||
|
||||
TRY(output_stream.write_bits(end_of_data_code, compressor.m_code_size));
|
||||
TRY(output_stream.align_to_byte_boundary());
|
||||
TRY(output_stream.flush_buffer_to_stream());
|
||||
|
||||
return TRY(buffer.read_until_eof());
|
||||
}
|
||||
|
||||
private:
|
||||
LzwCompressor(u8 initial_code_size)
|
||||
: Details::LzwState(initial_code_size, 1)
|
||||
{
|
||||
}
|
||||
|
||||
Optional<u16> code_for_symbol(ReadonlyBytes bytes)
|
||||
{
|
||||
for (u16 i = 0; i < m_code_table.size(); ++i) {
|
||||
if (m_code_table[i].span() == bytes)
|
||||
return i;
|
||||
}
|
||||
|
||||
return OptionalNone {};
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue