Browse Source

LibCompress: Add an LZW compressor

Lucas CHOLLET 1 năm trước cách đây
mục cha
commit
54f33b43c6

+ 1 - 0
Tests/LibCompress/CMakeLists.txt

@@ -3,6 +3,7 @@ set(TEST_SOURCES
     TestDeflate.cpp
     TestGzip.cpp
     TestLzma.cpp
+    TestLzw.cpp
     TestPackBits.cpp
     TestXz.cpp
     TestZlib.cpp

+ 33 - 0
Tests/LibCompress/TestLzw.cpp

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024, Lucas Chollet <lucas.chollet@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibTest/TestCase.h>
+
+#include <AK/Array.h>
+#include <LibCompress/Lzw.h>
+
+namespace {
+
+ErrorOr<bool> test_roundtrip_string(StringView input)
+{
+    auto const compressed = TRY(Compress::LzwCompressor::compress_all(input.bytes(), 8));
+    auto const roundtrip = TRY(Compress::LzwDecompressor<LittleEndianInputBitStream>::decompress_all(compressed, 8));
+    return roundtrip == input.bytes();
+}
+
+}
+
+TEST_CASE(roundtrip_lzw_little_endian_short)
+{
+    EXPECT(TRY_OR_FAIL(test_roundtrip_string("WeWellll"sv)));
+}
+
+TEST_CASE(roundtrip_lzw_little_endian_long)
+{
+    // LZW changes the code size after ~512 new symbols, this test case is long enough to trigger that.
+    constexpr auto input = "WellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,g"sv;
+    EXPECT(TRY_OR_FAIL(test_roundtrip_string(input)));
+}

+ 71 - 0
Userland/Libraries/LibCompress/Lzw.h

@@ -173,4 +173,75 @@ private:
     Vector<u8> m_output {};
 };
 
+class LzwCompressor : private Details::LzwState {
+public:
+    static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes bytes, u8 initial_code_size)
+    {
+        LzwCompressor compressor { initial_code_size };
+        AllocatingMemoryStream buffer;
+        LittleEndianOutputBitStream output_stream { MaybeOwned<Stream>(buffer) };
+
+        u16 const clear_code = compressor.add_control_code();
+        u16 const end_of_data_code = compressor.add_control_code();
+
+        TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
+
+        u32 last_offset = 0;
+
+        while (last_offset < bytes.size()) {
+            ReadonlyBytes current_symbol {};
+            u16 current_code {};
+
+            if (compressor.m_code_table.size() == max_table_size - 2) {
+                TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
+                compressor.reset();
+            }
+
+            bool found_symbol = false;
+
+            for (u32 symbol_size = 1; last_offset + symbol_size <= bytes.size(); ++symbol_size) {
+                current_symbol = bytes.slice(last_offset, symbol_size);
+                auto const new_code = compressor.code_for_symbol(current_symbol);
+
+                if (new_code.has_value()) {
+                    current_code = *new_code;
+                } else {
+                    found_symbol = true;
+                    break;
+                }
+            }
+
+            TRY(output_stream.write_bits(current_code, compressor.m_code_size));
+
+            if (found_symbol) {
+                compressor.extend_code_table(Vector(current_symbol));
+                current_symbol = current_symbol.trim(current_symbol.size() - 1);
+            }
+            last_offset += current_symbol.size();
+        }
+
+        TRY(output_stream.write_bits(end_of_data_code, compressor.m_code_size));
+        TRY(output_stream.align_to_byte_boundary());
+        TRY(output_stream.flush_buffer_to_stream());
+
+        return TRY(buffer.read_until_eof());
+    }
+
+private:
+    LzwCompressor(u8 initial_code_size)
+        : Details::LzwState(initial_code_size, 1)
+    {
+    }
+
+    Optional<u16> code_for_symbol(ReadonlyBytes bytes)
+    {
+        for (u16 i = 0; i < m_code_table.size(); ++i) {
+            if (m_code_table[i].span() == bytes)
+                return i;
+        }
+
+        return OptionalNone {};
+    }
+};
+
 }