LibCompress: Implement the XZ BCJ filter for ARM64

This commit is contained in:
Tim Schumacher 2023-10-25 19:17:10 +02:00 committed by Andrew Kaster
parent e2d4952f0f
commit a1cf2708ee
Notes: sideshowbarker 2024-07-18 03:23:00 +09:00
3 changed files with 336 additions and 4 deletions

View file

@ -1077,6 +1077,139 @@ non proident, sunt in culpa qui officia
deserunt mollit anim id est laborum.
)"sv;
// The reference data is split into parts, since the binary contains a lot of filler data.
Array<u8, 112> const xz_utils_arm64_binary_part1 {
0x00, 0x00, 0x00, 0x94, // bl #0x0
0xFF, 0xFF, 0xFF, 0x97, // bl #0x0
0xFE, 0xFF, 0xFF, 0x97, // bl #0x0
0xFD, 0xFF, 0xFF, 0x97, // bl #0x0
0x03, 0x00, 0x00, 0x94, // bl #0x1c
0x02, 0x00, 0x00, 0x94, // bl #0x1c
0x01, 0x00, 0x00, 0x94, // bl #0x1c
0x00, 0x00, 0x00, 0x94, // bl #0x1c
0x01, 0x00, 0x00, 0x96, // bl #0xfffffffff8000024
0x00, 0x00, 0x00, 0x96, // bl #0xfffffffff8000024
0xFF, 0xFF, 0xFF, 0x95, // bl #0x8000024
0xFE, 0xFF, 0xFF, 0x95, // bl #0x8000024
0x17, 0x11, 0x11, 0x95, // bl #0x444448c
0x16, 0x11, 0x11, 0x95, // bl #0x444448c
0x15, 0x11, 0x11, 0x95, // bl #0x444448c
0x14, 0x11, 0x11, 0x95, // bl #0x444448c
0x27, 0x22, 0x22, 0x96, // bl #0xfffffffff88888dc
0x26, 0x22, 0x22, 0x96, // bl #0xfffffffff88888dc
0x25, 0x22, 0x22, 0x96, // bl #0xfffffffff88888dc
0x24, 0x22, 0x22, 0x96, // bl #0xfffffffff88888dc
0xEC, 0xFF, 0xFF, 0x97, // bl #0x0
0xEB, 0xFF, 0xFF, 0x97, // bl #0x0
0xEA, 0xFF, 0xFF, 0x97, // bl #0x0
0xE9, 0xFF, 0xFF, 0x97, // bl #0x0
0x03, 0x00, 0x00, 0x90, // adrp x3, #0x0
0x28, 0x00, 0x00, 0xB0, // adrp x8, #0x5000
0xAD, 0x00, 0x00, 0xB0, // adrp x13, #0x15000
0x32, 0x01, 0x00, 0xB0 // adrp x18, #0x25000
};
Array<u8, 3984> const xz_utils_arm64_binary_part2 = Array<u8, 3984>::from_repeated_value(0x55);
Array<u8, 384> const xz_utils_arm64_binary_part3 {
0x0B, 0x10, 0x00, 0x90, // adrp x11, #0x200000
0x30, 0x10, 0x00, 0xB0, // adrp x16, #0x205000
0xF5, 0x17, 0x00, 0xD0, // adrp x21, #0x2fe000
0xFA, 0x17, 0x00, 0xF0, // adrp x26, #0x2ff000
0x1F, 0x18, 0x00, 0x90, // adrp xzr, #0x300000
0x24, 0x18, 0x00, 0xB0, // adrp x4, #0x305000
0xE9, 0x1F, 0x00, 0xD0, // adrp x9, #0x3fe000
0xEE, 0x1F, 0x00, 0xF0, // adrp x14, #0x3ff000
0x13, 0x20, 0x00, 0x90, // adrp x19, #0x400000
0x38, 0x20, 0x00, 0xB0, // adrp x24, #0x405000
0xFD, 0x27, 0x00, 0xD0, // adrp x29, #0x4fe000
0xE2, 0x27, 0x00, 0xF0, // adrp x2, #0x4ff000
0x07, 0x38, 0x00, 0x90, // adrp x7, #0x700000
0x2C, 0x38, 0x00, 0xB0, // adrp x12, #0x705000
0xF1, 0x3F, 0x00, 0xD0, // adrp x17, #0x7fe000
0xF6, 0x3F, 0x00, 0xF0, // adrp x22, #0x7ff000
0x1B, 0x40, 0x00, 0x90, // adrp x27, #0x800000
0x20, 0x40, 0x00, 0xB0, // adrp x0, #0x805000
0xE5, 0x47, 0x00, 0xD0, // adrp x5, #0x8fe000
0xEA, 0x47, 0x00, 0xF0, // adrp x10, #0x8ff000
0x0F, 0x78, 0x00, 0x90, // adrp x15, #0xf00000
0x34, 0x78, 0x00, 0xB0, // adrp x20, #0xf05000
0xF9, 0x7F, 0x00, 0xD0, // adrp x25, #0xffe000
0xFE, 0x7F, 0x00, 0xF0, // adrp x30, #0xfff000
0x03, 0x80, 0x00, 0x90, // adrp x3, #0x1000000
0x28, 0x80, 0x00, 0xB0, // adrp x8, #0x1005000
0xED, 0x87, 0x00, 0xD0, // adrp x13, #0x10fe000
0xF2, 0x87, 0x00, 0xF0, // adrp x18, #0x10ff000
0x17, 0xF8, 0x00, 0x90, // adrp x23, #0x1f00000
0x3C, 0xF8, 0x00, 0xB0, // adrp x28, #0x1f05000
0xE1, 0xFF, 0x00, 0xD0, // adrp x1, #0x1ffe000
0xE6, 0xFF, 0x00, 0xF0, // adrp x6, #0x1fff000
0x0B, 0x00, 0x01, 0x90, // adrp x11, #0x2000000
0x30, 0x00, 0x01, 0xB0, // adrp x16, #0x2005000
0xF5, 0x07, 0x01, 0xD0, // adrp x21, #0x20fe000
0xFA, 0x07, 0x01, 0xF0, // adrp x26, #0x20ff000
0x1F, 0xF8, 0x01, 0x90, // adrp xzr, #0x3f00000
0x24, 0xF8, 0x01, 0xB0, // adrp x4, #0x3f05000
0xE9, 0xFF, 0x01, 0xD0, // adrp x9, #0x3ffe000
0xEE, 0xFF, 0x01, 0xF0, // adrp x14, #0x3fff000
0x13, 0x00, 0x02, 0x90, // adrp x19, #0x4000000
0x38, 0x00, 0x02, 0xB0, // adrp x24, #0x4005000
0xFD, 0x07, 0x02, 0xD0, // adrp x29, #0x40fe000
0xE2, 0x07, 0x02, 0xF0, // adrp x2, #0x40ff000
0x07, 0xF8, 0x03, 0x90, // adrp x7, #0x7f00000
0x2C, 0xF8, 0x03, 0xB0, // adrp x12, #0x7f05000
0xF1, 0xFF, 0x03, 0xD0, // adrp x17, #0x7ffe000
0xF6, 0xFF, 0x03, 0xF0, // adrp x22, #0x7fff000
0x1B, 0x00, 0x04, 0x90, // adrp x27, #0x8000000
0x20, 0x00, 0x04, 0xB0, // adrp x0, #0x8005000
0xE5, 0x07, 0x04, 0xD0, // adrp x5, #0x80fe000
0xEA, 0x07, 0x04, 0xF0, // adrp x10, #0x80ff000
0x0F, 0xF8, 0x07, 0x90, // adrp x15, #0xff00000
0x34, 0xF8, 0x07, 0xB0, // adrp x20, #0xff05000
0xF9, 0xFF, 0x07, 0xD0, // adrp x25, #0xfffe000
0xFE, 0xFF, 0x07, 0xF0, // adrp x30, #0xffff000
0x03, 0x00, 0x08, 0x90, // adrp x3, #0x10000000
0x28, 0x00, 0x08, 0xB0, // adrp x8, #0x10005000
0xED, 0x07, 0x08, 0xD0, // adrp x13, #0x100fe000
0xF2, 0x07, 0x08, 0xF0, // adrp x18, #0x100ff000
0x17, 0xF8, 0x0F, 0x90, // adrp x23, #0x1ff00000
0x3C, 0xF8, 0x0F, 0xB0, // adrp x28, #0x1ff05000
0xE1, 0xFF, 0x0F, 0xD0, // adrp x1, #0x1fffe000
0xE6, 0xFF, 0x0F, 0xF0, // adrp x6, #0x1ffff000
0x0B, 0x00, 0x10, 0x90, // adrp x11, #0x20000000
0x30, 0x00, 0x10, 0xB0, // adrp x16, #0x20005000
0xF5, 0x07, 0x10, 0xD0, // adrp x21, #0x200fe000
0xFA, 0x07, 0x10, 0xF0, // adrp x26, #0x200ff000
0x1F, 0xF8, 0x1F, 0x90, // adrp xzr, #0x3ff00000
0x24, 0xF8, 0x1F, 0xB0, // adrp x4, #0x3ff05000
0xE9, 0xFF, 0x1F, 0xD0, // adrp x9, #0x3fffe000
0xEE, 0xFF, 0x1F, 0xF0, // adrp x14, #0x3ffff000
0x13, 0x00, 0x20, 0x90, // adrp x19, #0x40000000
0x38, 0x00, 0x20, 0xB0, // adrp x24, #0x40005000
0xFD, 0x07, 0x20, 0xD0, // adrp x29, #0x400fe000
0xE2, 0x07, 0x20, 0xF0, // adrp x2, #0x400ff000
0x07, 0xF8, 0x3F, 0x90, // adrp x7, #0x7ff00000
0x2C, 0xF8, 0x3F, 0xB0, // adrp x12, #0x7ff05000
0xF1, 0xFF, 0x3F, 0xD0, // adrp x17, #0x7fffe000
0xF6, 0xFF, 0x3F, 0xF0, // adrp x22, #0x7ffff000
0x1B, 0x00, 0x40, 0x90, // adrp x27, #0x80000000
0x20, 0x00, 0x40, 0xB0, // adrp x0, #0x80005000
0xE5, 0x07, 0x40, 0xD0, // adrp x5, #0x800fe000
0xEA, 0x07, 0x40, 0xF0, // adrp x10, #0x800ff000
0x0F, 0xF8, 0x7F, 0x90, // adrp x15, #0xfff00000
0x34, 0xF8, 0x7F, 0xB0, // adrp x20, #0xfff05000
0xF9, 0xFF, 0x7F, 0xD0, // adrp x25, #0xffffe000
0xFE, 0xFF, 0x7F, 0xF0, // adrp x30, #0xfffff000
0x03, 0x00, 0x80, 0x90, // adrp x3, #0xffffffff00000000
0x28, 0x00, 0x80, 0xB0, // adrp x8, #0xffffffff00005000
0xED, 0x07, 0x80, 0xD0, // adrp x13, #0xffffffff000fe000
0xF2, 0x07, 0x80, 0xF0, // adrp x18, #0xffffffff000ff000
0x17, 0xF8, 0xFF, 0x90, // adrp x23, #0xfffffffffff00000
0x3C, 0xF8, 0xFF, 0xB0, // adrp x28, #0xfffffffffff05000
0xE1, 0xFF, 0xFF, 0xD0, // adrp x1, #0xffffffffffffe000
0xE6, 0xFF, 0xFF, 0xF0 // adrp x6, #0xfffffffffffff000
};
Array<u8, 3712> const xz_utils_arm64_binary_part4 = Array<u8, 3712>::from_repeated_value(0x55);
Array<u8, 384> const xz_utils_arm64_binary_part5 = xz_utils_arm64_binary_part3;
TEST_CASE(xz_utils_good_0cat_empty)
{
// "good-0cat-empty.xz has two zero-Block Streams concatenated without
@ -1229,8 +1362,25 @@ TEST_CASE(xz_utils_good_1_arm64_lzma2_1)
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
auto decompressor = MUST(Compress::XzDecompressor::create(move(stream)));
// TODO: This uses the currently unimplemented arm64 filter.
(void)decompressor->read_until_eof(PAGE_SIZE);
auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
auto span = buffer.span();
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part1.size()), xz_utils_arm64_binary_part1.span());
span = span.slice(xz_utils_arm64_binary_part1.size());
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part2.size()), xz_utils_arm64_binary_part2.span());
span = span.slice(xz_utils_arm64_binary_part2.size());
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part3.size()), xz_utils_arm64_binary_part3.span());
span = span.slice(xz_utils_arm64_binary_part3.size());
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part4.size()), xz_utils_arm64_binary_part4.span());
span = span.slice(xz_utils_arm64_binary_part4.size());
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part5.size()), xz_utils_arm64_binary_part5.span());
span = span.slice(xz_utils_arm64_binary_part5.size());
EXPECT_EQ(span.size(), 0ul);
}
TEST_CASE(xz_utils_good_1_arm64_lzma2_2)
@ -1273,8 +1423,25 @@ TEST_CASE(xz_utils_good_1_arm64_lzma2_2)
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
auto decompressor = MUST(Compress::XzDecompressor::create(move(stream)));
// TODO: This uses the currently unimplemented arm64 filter.
(void)decompressor->read_until_eof(PAGE_SIZE);
auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
auto span = buffer.span();
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part1.size()), xz_utils_arm64_binary_part1.span());
span = span.slice(xz_utils_arm64_binary_part1.size());
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part2.size()), xz_utils_arm64_binary_part2.span());
span = span.slice(xz_utils_arm64_binary_part2.size());
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part3.size()), xz_utils_arm64_binary_part3.span());
span = span.slice(xz_utils_arm64_binary_part3.size());
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part4.size()), xz_utils_arm64_binary_part4.span());
span = span.slice(xz_utils_arm64_binary_part4.size());
EXPECT_EQ(span.trim(xz_utils_arm64_binary_part5.size()), xz_utils_arm64_binary_part5.span());
span = span.slice(xz_utils_arm64_binary_part5.size());
EXPECT_EQ(span.size(), 0ul);
}
TEST_CASE(xz_utils_good_1_block_header_1)

View file

@ -221,6 +221,124 @@ void XzFilterDelta::close()
{
}
ErrorOr<NonnullOwnPtr<XzFilterBCJArm64>> XzFilterBCJArm64::create(MaybeOwned<Stream> stream, u32 start_offset)
{
if (start_offset % INSTRUCTION_ALIGNMENT != 0)
return Error::from_string_literal("XZ BCJ filter offset is not a multiple of the alignment");
auto counting_stream = CountingStream { move(stream) };
auto input_buffer = TRY(CircularBuffer::create_empty(INSTRUCTION_SIZE));
auto output_buffer = TRY(CircularBuffer::create_empty(INSTRUCTION_SIZE));
auto filter = TRY(adopt_nonnull_own_or_enomem(new (nothrow) XzFilterBCJArm64(move(counting_stream), start_offset, move(input_buffer), move(output_buffer))));
return filter;
}
XzFilterBCJArm64::XzFilterBCJArm64(CountingStream stream, u32 start_offset, CircularBuffer input_buffer, CircularBuffer output_buffer)
: m_stream(move(stream))
, m_start_offset(start_offset)
, m_input_buffer(move(input_buffer))
, m_output_buffer(move(output_buffer))
{
}
ErrorOr<Bytes> XzFilterBCJArm64::read_some(Bytes bytes)
{
if (m_output_buffer.used_space() > 0) {
// If we still have buffered outgoing data, return that first.
return m_output_buffer.read(bytes);
}
while (m_input_buffer.used_space() < INSTRUCTION_SIZE) {
if (m_stream.is_eof()) {
// If we can't get any more input data, dump the buffered contents unchanged.
// We won't be able to assemble another instruction.
return m_input_buffer.read(bytes);
}
TRY(m_input_buffer.fill_from_stream(m_stream));
}
// The algorithm considers the offset of the current bytes to be the current program counter.
u32 stream_offset = m_start_offset + m_stream.read_bytes() - m_input_buffer.used_space();
Array<u8, INSTRUCTION_SIZE> buffer;
auto buffer_span = m_input_buffer.read(buffer);
VERIFY(buffer_span.size() == INSTRUCTION_SIZE);
if ((buffer[3] & 0b11111100) == 0b10010100) {
// The ARM64 instruction manual notes that BL is encoded as the following in a little-endian byte order:
// 100101XX XXXXXXX XXXXXXXX XXXXXXXX
// X is an immediate 26 bit value designating the program counter offset divided by 4.
stream_offset >>= 2;
u32 program_counter = ((buffer[3] & 0b11) << 24) | (buffer[2] << 16) | (buffer[1] << 8) | buffer[0];
u32 program_counter_offset = program_counter - stream_offset;
// Reassemble the instruction.
buffer[3] = ((program_counter_offset >> 24) & 0b11) | 0b10010100;
buffer[2] = program_counter_offset >> 16;
buffer[1] = program_counter_offset >> 8;
buffer[0] = program_counter_offset;
} else if ((buffer[3] & 0b10011111) == 0b10010000) {
// ADRP instructions are encoded in the following format:
// 1XX10000 YYYYYYYY YYYYYYYY YYYZZZZZ
// Y:X is an immediate 21 bit value designating the program counter offset divided by 4096 (i.e. a right shift by 12).
// Z is the register number.
stream_offset >>= 12;
auto register_number = buffer[0] & 0b11111;
u32 program_counter = (buffer[2] << 13) | (buffer[1] << 5) | ((buffer[0] >> 3) & 0b11100) | ((buffer[3] >> 5) & 0b11);
// Only offsets between -512MiB and +512MiB are processed, which is suppsoed to reduce false-positives.
// Note: The XZ reference implementation presents a human readable range, an unoptimized condition, and an optimized condition for this.
// Since none of the three entirely match each other, our only option is to copy the exact formula that is used in practice.
if (!((program_counter + 0x00020000) & 0x001C0000)) {
u32 program_counter_offset = program_counter - stream_offset;
// Clip the immediate to 18 bits, then sign-extend to 21 bits.
program_counter_offset &= (1 << 18) - 1;
program_counter_offset |= (0 - (program_counter_offset & (1 << 17))) & (0b111 << 18);
// Reassemble the instruction.
buffer[3] = ((program_counter_offset & 0b11) << 5) | 0b10010000;
buffer[2] = program_counter_offset >> 13;
buffer[1] = program_counter_offset >> 5;
buffer[0] = ((program_counter_offset & 0b11100) << 3) | register_number;
}
}
// Write what we can into the Span, put the rest into the output buffer.
auto size_in_span = min(INSTRUCTION_SIZE, bytes.size());
bytes = bytes.trim(size_in_span);
buffer.span().trim(size_in_span).copy_to(bytes);
if (size_in_span < INSTRUCTION_SIZE) {
auto bytes_written_to_buffer = m_output_buffer.write(buffer.span().slice(size_in_span));
VERIFY(bytes_written_to_buffer == INSTRUCTION_SIZE - size_in_span);
}
return bytes;
}
ErrorOr<size_t> XzFilterBCJArm64::write_some(ReadonlyBytes)
{
return EBADF;
}
bool XzFilterBCJArm64::is_eof() const
{
return m_stream.is_eof();
}
bool XzFilterBCJArm64::is_open() const
{
return m_stream.is_open();
}
void XzFilterBCJArm64::close()
{
}
ErrorOr<NonnullOwnPtr<XzDecompressor>> XzDecompressor::create(MaybeOwned<Stream> stream)
{
auto counting_stream = TRY(try_make<CountingStream>(move(stream)));
@ -419,6 +537,25 @@ ErrorOr<void> XzDecompressor::load_next_block(u8 encoded_block_header_size)
continue;
}
// 5.3.2. Branch/Call/Jump Filters for Executables
if (filter.id == 0x0a) {
if (filter.last)
return Error::from_string_literal("XZ BCJ filter can only be a non-last filter");
u32 start_offset = 0;
if (filter.properties.size() == 0) {
// No start offset given.
} else if (filter.properties.size() == sizeof(XzFilterBCJProperties)) {
auto const* properties = reinterpret_cast<XzFilterBCJProperties*>(filter.properties.data());
start_offset = properties->start_offset;
} else {
return Error::from_string_literal("XZ BCJ filter has an unknown properties size");
}
new_block_stream = TRY(XzFilterBCJArm64::create(move(new_block_stream), start_offset));
continue;
}
// 5.3.3. Delta
if (filter.id == 0x03) {
if (filter.last)

View file

@ -99,6 +99,34 @@ struct [[gnu::packed]] XzFilterLzma2Properties {
};
static_assert(sizeof(XzFilterLzma2Properties) == 1);
// 5.3.2. Branch/Call/Jump Filters for Executables
struct [[gnu::packed]] XzFilterBCJProperties {
u32 start_offset;
};
static_assert(sizeof(XzFilterBCJProperties) == 4);
class XzFilterBCJArm64 : public Stream {
public:
static ErrorOr<NonnullOwnPtr<XzFilterBCJArm64>> create(MaybeOwned<Stream>, u32 start_offset);
virtual ErrorOr<Bytes> read_some(Bytes) override;
virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
virtual bool is_eof() const override;
virtual bool is_open() const override;
virtual void close() override;
private:
static constexpr size_t INSTRUCTION_ALIGNMENT = 4;
static constexpr size_t INSTRUCTION_SIZE = 4;
XzFilterBCJArm64(CountingStream, u32 start_offset, CircularBuffer input_buffer, CircularBuffer output_buffer);
CountingStream m_stream;
u32 m_start_offset;
CircularBuffer m_input_buffer;
CircularBuffer m_output_buffer;
};
// 5.3.3. Delta
struct [[gnu::packed]] XzFilterDeltaProperties {
u8 encoded_distance;