mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibCompress: Add an LZW compressor
This commit is contained in:
parent
ff33fa7e8b
commit
54f33b43c6
Notes:
sideshowbarker
2024-07-16 21:34:08 +09:00
Author: https://github.com/LucasChollet Commit: https://github.com/SerenityOS/serenity/commit/54f33b43c6 Pull-request: https://github.com/SerenityOS/serenity/pull/24191 Reviewed-by: https://github.com/nico ✅ Reviewed-by: https://github.com/timschumi
3 changed files with 105 additions and 0 deletions
|
@ -3,6 +3,7 @@ set(TEST_SOURCES
|
||||||
TestDeflate.cpp
|
TestDeflate.cpp
|
||||||
TestGzip.cpp
|
TestGzip.cpp
|
||||||
TestLzma.cpp
|
TestLzma.cpp
|
||||||
|
TestLzw.cpp
|
||||||
TestPackBits.cpp
|
TestPackBits.cpp
|
||||||
TestXz.cpp
|
TestXz.cpp
|
||||||
TestZlib.cpp
|
TestZlib.cpp
|
||||||
|
|
33
Tests/LibCompress/TestLzw.cpp
Normal file
33
Tests/LibCompress/TestLzw.cpp
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Lucas Chollet <lucas.chollet@serenityos.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <LibTest/TestCase.h>
|
||||||
|
|
||||||
|
#include <AK/Array.h>
|
||||||
|
#include <LibCompress/Lzw.h>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
ErrorOr<bool> test_roundtrip_string(StringView input)
|
||||||
|
{
|
||||||
|
auto const compressed = TRY(Compress::LzwCompressor::compress_all(input.bytes(), 8));
|
||||||
|
auto const roundtrip = TRY(Compress::LzwDecompressor<LittleEndianInputBitStream>::decompress_all(compressed, 8));
|
||||||
|
return roundtrip == input.bytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE(roundtrip_lzw_little_endian_short)
|
||||||
|
{
|
||||||
|
EXPECT(TRY_OR_FAIL(test_roundtrip_string("WeWellll"sv)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE(roundtrip_lzw_little_endian_long)
|
||||||
|
{
|
||||||
|
// LZW changes the code size after ~512 new symbols, this test case is long enough to trigger that.
|
||||||
|
constexpr auto input = "WellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,g"sv;
|
||||||
|
EXPECT(TRY_OR_FAIL(test_roundtrip_string(input)));
|
||||||
|
}
|
|
@ -173,4 +173,75 @@ private:
|
||||||
Vector<u8> m_output {};
|
Vector<u8> m_output {};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class LzwCompressor : private Details::LzwState {
|
||||||
|
public:
|
||||||
|
static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes bytes, u8 initial_code_size)
|
||||||
|
{
|
||||||
|
LzwCompressor compressor { initial_code_size };
|
||||||
|
AllocatingMemoryStream buffer;
|
||||||
|
LittleEndianOutputBitStream output_stream { MaybeOwned<Stream>(buffer) };
|
||||||
|
|
||||||
|
u16 const clear_code = compressor.add_control_code();
|
||||||
|
u16 const end_of_data_code = compressor.add_control_code();
|
||||||
|
|
||||||
|
TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
|
||||||
|
|
||||||
|
u32 last_offset = 0;
|
||||||
|
|
||||||
|
while (last_offset < bytes.size()) {
|
||||||
|
ReadonlyBytes current_symbol {};
|
||||||
|
u16 current_code {};
|
||||||
|
|
||||||
|
if (compressor.m_code_table.size() == max_table_size - 2) {
|
||||||
|
TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
|
||||||
|
compressor.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool found_symbol = false;
|
||||||
|
|
||||||
|
for (u32 symbol_size = 1; last_offset + symbol_size <= bytes.size(); ++symbol_size) {
|
||||||
|
current_symbol = bytes.slice(last_offset, symbol_size);
|
||||||
|
auto const new_code = compressor.code_for_symbol(current_symbol);
|
||||||
|
|
||||||
|
if (new_code.has_value()) {
|
||||||
|
current_code = *new_code;
|
||||||
|
} else {
|
||||||
|
found_symbol = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TRY(output_stream.write_bits(current_code, compressor.m_code_size));
|
||||||
|
|
||||||
|
if (found_symbol) {
|
||||||
|
compressor.extend_code_table(Vector(current_symbol));
|
||||||
|
current_symbol = current_symbol.trim(current_symbol.size() - 1);
|
||||||
|
}
|
||||||
|
last_offset += current_symbol.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
TRY(output_stream.write_bits(end_of_data_code, compressor.m_code_size));
|
||||||
|
TRY(output_stream.align_to_byte_boundary());
|
||||||
|
TRY(output_stream.flush_buffer_to_stream());
|
||||||
|
|
||||||
|
return TRY(buffer.read_until_eof());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
LzwCompressor(u8 initial_code_size)
|
||||||
|
: Details::LzwState(initial_code_size, 1)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<u16> code_for_symbol(ReadonlyBytes bytes)
|
||||||
|
{
|
||||||
|
for (u16 i = 0; i < m_code_table.size(); ++i) {
|
||||||
|
if (m_code_table[i].span() == bytes)
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return OptionalNone {};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue