LibCompress: Add a PackBits decoder

This compression scheme was quite popular during the 80's, and we can
still find it in use inside file formats such as TIFF or PDF.
This commit is contained in:
Lucas CHOLLET 2023-12-21 00:16:04 -05:00 committed by Tim Schumacher
parent 5d0fb4bac3
commit d748edd994
Notes: sideshowbarker 2024-07-17 05:19:06 +09:00
5 changed files with 99 additions and 0 deletions

View file

@ -3,6 +3,7 @@ set(TEST_SOURCES
TestDeflate.cpp
TestGzip.cpp
TestLzma.cpp
TestPackBits.cpp
TestXz.cpp
TestZlib.cpp
)

View file

@ -0,0 +1,25 @@
/*
* Copyright (c) 2023, Lucas Chollet <lucas.chollet@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <AK/Array.h>
#include <LibCompress/PackBitsDecoder.h>
TEST_CASE(pack_bits)
{
Array<u8, 15> const compressed {
0xFE, 0xAA, 0x02, 0x80, 0x00, 0x2A, 0xFD, 0xAA, 0x03, 0x80, 0x00, 0x2A, 0x22, 0xF7, 0xAA
};
Array<u8, 24> const raw {
0xAA, 0xAA, 0xAA, 0x80, 0x00, 0x2A, 0xAA, 0xAA, 0xAA, 0xAA, 0x80, 0x00,
0x2A, 0x22, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA
};
auto unpacked = TRY_OR_FAIL(Compress::PackBits::decode_all(compressed));
EXPECT_EQ(unpacked.bytes(), raw);
}

View file

@ -4,6 +4,7 @@ set(SOURCES
Deflate.cpp
Lzma.cpp
Lzma2.cpp
PackBitsDecoder.cpp
Xz.cpp
Zlib.cpp
Gzip.cpp

View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2023, Lucas Chollet <lucas.chollet@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "PackBitsDecoder.h"
#include <AK/MemoryStream.h>
namespace Compress::PackBits {
ErrorOr<ByteBuffer> decode_all(ReadonlyBytes bytes, Optional<u64> expected_output_size, CompatibilityMode mode)
{
// This implementation uses unsigned values for the selector, as described in the PDF spec.
// Note that this remains compatible with other implementations based on signed numbers.
auto memory_stream = make<FixedMemoryStream>(bytes);
ByteBuffer decoded_bytes;
if (expected_output_size.has_value())
TRY(decoded_bytes.try_ensure_capacity(*expected_output_size));
while (memory_stream->remaining() > 0 && decoded_bytes.size() < expected_output_size.value_or(NumericLimits<u64>::max())) {
auto const length = TRY(memory_stream->read_value<u8>());
if (length < 128) {
for (u8 i = 0; i <= length; ++i)
TRY(decoded_bytes.try_append(TRY(memory_stream->read_value<u8>())));
} else if (length > 128) {
auto const next_byte = TRY(memory_stream->read_value<u8>());
for (u8 i = 0; i < 257 - length; ++i)
TRY(decoded_bytes.try_append(next_byte));
} else {
VERIFY(length == 128);
if (mode == CompatibilityMode::PDF)
break;
}
}
return decoded_bytes;
}
}

View file

@ -0,0 +1,27 @@
/*
* Copyright (c) 2023, Lucas Chollet <lucas.chollet@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/ByteBuffer.h>
#include <AK/Optional.h>
namespace Compress::PackBits {
// This implements the PackBits compression scheme, aka run-length compression
// It is fairly simple and described here: https://web.archive.org/web/20080705155158/http://developer.apple.com/technotes/tn/tn1023.html
// But also in section:
// - 7.4.5 RunLengthDecode Filter of the PDF specification
// - Section 9: PackBits Compression of the TIFF specification
enum class CompatibilityMode {
Original, // 128 is defined as no-op
PDF, // 128 is defined as end of stream
};
ErrorOr<ByteBuffer> decode_all(ReadonlyBytes bytes, Optional<u64> expected_output_size = {}, CompatibilityMode mode = CompatibilityMode::Original);
}