Browse Source

LibCompress: Implement the XZ BCJ filter for ARM64

Tim Schumacher 1 năm trước cách đây
mục cha
commit
a1cf2708ee

+ 171 - 4
Tests/LibCompress/TestXz.cpp

@@ -1077,6 +1077,139 @@ non proident, sunt in culpa qui officia
 deserunt mollit anim id est laborum. 
 )"sv;
 
+// The reference data is split into parts, since the binary contains a lot of filler data.
+Array<u8, 112> const xz_utils_arm64_binary_part1 {
+    0x00, 0x00, 0x00, 0x94, // bl #0x0
+    0xFF, 0xFF, 0xFF, 0x97, // bl #0x0
+    0xFE, 0xFF, 0xFF, 0x97, // bl #0x0
+    0xFD, 0xFF, 0xFF, 0x97, // bl #0x0
+    0x03, 0x00, 0x00, 0x94, // bl #0x1c
+    0x02, 0x00, 0x00, 0x94, // bl #0x1c
+    0x01, 0x00, 0x00, 0x94, // bl #0x1c
+    0x00, 0x00, 0x00, 0x94, // bl #0x1c
+    0x01, 0x00, 0x00, 0x96, // bl #0xfffffffff8000024
+    0x00, 0x00, 0x00, 0x96, // bl #0xfffffffff8000024
+    0xFF, 0xFF, 0xFF, 0x95, // bl #0x8000024
+    0xFE, 0xFF, 0xFF, 0x95, // bl #0x8000024
+    0x17, 0x11, 0x11, 0x95, // bl #0x444448c
+    0x16, 0x11, 0x11, 0x95, // bl #0x444448c
+    0x15, 0x11, 0x11, 0x95, // bl #0x444448c
+    0x14, 0x11, 0x11, 0x95, // bl #0x444448c
+    0x27, 0x22, 0x22, 0x96, // bl #0xfffffffff88888dc
+    0x26, 0x22, 0x22, 0x96, // bl #0xfffffffff88888dc
+    0x25, 0x22, 0x22, 0x96, // bl #0xfffffffff88888dc
+    0x24, 0x22, 0x22, 0x96, // bl #0xfffffffff88888dc
+    0xEC, 0xFF, 0xFF, 0x97, // bl #0x0
+    0xEB, 0xFF, 0xFF, 0x97, // bl #0x0
+    0xEA, 0xFF, 0xFF, 0x97, // bl #0x0
+    0xE9, 0xFF, 0xFF, 0x97, // bl #0x0
+    0x03, 0x00, 0x00, 0x90, // adrp x3, #0x0
+    0x28, 0x00, 0x00, 0xB0, // adrp x8, #0x5000
+    0xAD, 0x00, 0x00, 0xB0, // adrp x13, #0x15000
+    0x32, 0x01, 0x00, 0xB0  // adrp x18, #0x25000
+};
+Array<u8, 3984> const xz_utils_arm64_binary_part2 = Array<u8, 3984>::from_repeated_value(0x55);
+Array<u8, 384> const xz_utils_arm64_binary_part3 {
+    0x0B, 0x10, 0x00, 0x90, // adrp x11, #0x200000
+    0x30, 0x10, 0x00, 0xB0, // adrp x16, #0x205000
+    0xF5, 0x17, 0x00, 0xD0, // adrp x21, #0x2fe000
+    0xFA, 0x17, 0x00, 0xF0, // adrp x26, #0x2ff000
+    0x1F, 0x18, 0x00, 0x90, // adrp xzr, #0x300000
+    0x24, 0x18, 0x00, 0xB0, // adrp x4, #0x305000
+    0xE9, 0x1F, 0x00, 0xD0, // adrp x9, #0x3fe000
+    0xEE, 0x1F, 0x00, 0xF0, // adrp x14, #0x3ff000
+    0x13, 0x20, 0x00, 0x90, // adrp x19, #0x400000
+    0x38, 0x20, 0x00, 0xB0, // adrp x24, #0x405000
+    0xFD, 0x27, 0x00, 0xD0, // adrp x29, #0x4fe000
+    0xE2, 0x27, 0x00, 0xF0, // adrp x2, #0x4ff000
+    0x07, 0x38, 0x00, 0x90, // adrp x7, #0x700000
+    0x2C, 0x38, 0x00, 0xB0, // adrp x12, #0x705000
+    0xF1, 0x3F, 0x00, 0xD0, // adrp x17, #0x7fe000
+    0xF6, 0x3F, 0x00, 0xF0, // adrp x22, #0x7ff000
+    0x1B, 0x40, 0x00, 0x90, // adrp x27, #0x800000
+    0x20, 0x40, 0x00, 0xB0, // adrp x0, #0x805000
+    0xE5, 0x47, 0x00, 0xD0, // adrp x5, #0x8fe000
+    0xEA, 0x47, 0x00, 0xF0, // adrp x10, #0x8ff000
+    0x0F, 0x78, 0x00, 0x90, // adrp x15, #0xf00000
+    0x34, 0x78, 0x00, 0xB0, // adrp x20, #0xf05000
+    0xF9, 0x7F, 0x00, 0xD0, // adrp x25, #0xffe000
+    0xFE, 0x7F, 0x00, 0xF0, // adrp x30, #0xfff000
+    0x03, 0x80, 0x00, 0x90, // adrp x3, #0x1000000
+    0x28, 0x80, 0x00, 0xB0, // adrp x8, #0x1005000
+    0xED, 0x87, 0x00, 0xD0, // adrp x13, #0x10fe000
+    0xF2, 0x87, 0x00, 0xF0, // adrp x18, #0x10ff000
+    0x17, 0xF8, 0x00, 0x90, // adrp x23, #0x1f00000
+    0x3C, 0xF8, 0x00, 0xB0, // adrp x28, #0x1f05000
+    0xE1, 0xFF, 0x00, 0xD0, // adrp x1, #0x1ffe000
+    0xE6, 0xFF, 0x00, 0xF0, // adrp x6, #0x1fff000
+    0x0B, 0x00, 0x01, 0x90, // adrp x11, #0x2000000
+    0x30, 0x00, 0x01, 0xB0, // adrp x16, #0x2005000
+    0xF5, 0x07, 0x01, 0xD0, // adrp x21, #0x20fe000
+    0xFA, 0x07, 0x01, 0xF0, // adrp x26, #0x20ff000
+    0x1F, 0xF8, 0x01, 0x90, // adrp xzr, #0x3f00000
+    0x24, 0xF8, 0x01, 0xB0, // adrp x4, #0x3f05000
+    0xE9, 0xFF, 0x01, 0xD0, // adrp x9, #0x3ffe000
+    0xEE, 0xFF, 0x01, 0xF0, // adrp x14, #0x3fff000
+    0x13, 0x00, 0x02, 0x90, // adrp x19, #0x4000000
+    0x38, 0x00, 0x02, 0xB0, // adrp x24, #0x4005000
+    0xFD, 0x07, 0x02, 0xD0, // adrp x29, #0x40fe000
+    0xE2, 0x07, 0x02, 0xF0, // adrp x2, #0x40ff000
+    0x07, 0xF8, 0x03, 0x90, // adrp x7, #0x7f00000
+    0x2C, 0xF8, 0x03, 0xB0, // adrp x12, #0x7f05000
+    0xF1, 0xFF, 0x03, 0xD0, // adrp x17, #0x7ffe000
+    0xF6, 0xFF, 0x03, 0xF0, // adrp x22, #0x7fff000
+    0x1B, 0x00, 0x04, 0x90, // adrp x27, #0x8000000
+    0x20, 0x00, 0x04, 0xB0, // adrp x0, #0x8005000
+    0xE5, 0x07, 0x04, 0xD0, // adrp x5, #0x80fe000
+    0xEA, 0x07, 0x04, 0xF0, // adrp x10, #0x80ff000
+    0x0F, 0xF8, 0x07, 0x90, // adrp x15, #0xff00000
+    0x34, 0xF8, 0x07, 0xB0, // adrp x20, #0xff05000
+    0xF9, 0xFF, 0x07, 0xD0, // adrp x25, #0xfffe000
+    0xFE, 0xFF, 0x07, 0xF0, // adrp x30, #0xffff000
+    0x03, 0x00, 0x08, 0x90, // adrp x3, #0x10000000
+    0x28, 0x00, 0x08, 0xB0, // adrp x8, #0x10005000
+    0xED, 0x07, 0x08, 0xD0, // adrp x13, #0x100fe000
+    0xF2, 0x07, 0x08, 0xF0, // adrp x18, #0x100ff000
+    0x17, 0xF8, 0x0F, 0x90, // adrp x23, #0x1ff00000
+    0x3C, 0xF8, 0x0F, 0xB0, // adrp x28, #0x1ff05000
+    0xE1, 0xFF, 0x0F, 0xD0, // adrp x1, #0x1fffe000
+    0xE6, 0xFF, 0x0F, 0xF0, // adrp x6, #0x1ffff000
+    0x0B, 0x00, 0x10, 0x90, // adrp x11, #0x20000000
+    0x30, 0x00, 0x10, 0xB0, // adrp x16, #0x20005000
+    0xF5, 0x07, 0x10, 0xD0, // adrp x21, #0x200fe000
+    0xFA, 0x07, 0x10, 0xF0, // adrp x26, #0x200ff000
+    0x1F, 0xF8, 0x1F, 0x90, // adrp xzr, #0x3ff00000
+    0x24, 0xF8, 0x1F, 0xB0, // adrp x4, #0x3ff05000
+    0xE9, 0xFF, 0x1F, 0xD0, // adrp x9, #0x3fffe000
+    0xEE, 0xFF, 0x1F, 0xF0, // adrp x14, #0x3ffff000
+    0x13, 0x00, 0x20, 0x90, // adrp x19, #0x40000000
+    0x38, 0x00, 0x20, 0xB0, // adrp x24, #0x40005000
+    0xFD, 0x07, 0x20, 0xD0, // adrp x29, #0x400fe000
+    0xE2, 0x07, 0x20, 0xF0, // adrp x2, #0x400ff000
+    0x07, 0xF8, 0x3F, 0x90, // adrp x7, #0x7ff00000
+    0x2C, 0xF8, 0x3F, 0xB0, // adrp x12, #0x7ff05000
+    0xF1, 0xFF, 0x3F, 0xD0, // adrp x17, #0x7fffe000
+    0xF6, 0xFF, 0x3F, 0xF0, // adrp x22, #0x7ffff000
+    0x1B, 0x00, 0x40, 0x90, // adrp x27, #0x80000000
+    0x20, 0x00, 0x40, 0xB0, // adrp x0, #0x80005000
+    0xE5, 0x07, 0x40, 0xD0, // adrp x5, #0x800fe000
+    0xEA, 0x07, 0x40, 0xF0, // adrp x10, #0x800ff000
+    0x0F, 0xF8, 0x7F, 0x90, // adrp x15, #0xfff00000
+    0x34, 0xF8, 0x7F, 0xB0, // adrp x20, #0xfff05000
+    0xF9, 0xFF, 0x7F, 0xD0, // adrp x25, #0xffffe000
+    0xFE, 0xFF, 0x7F, 0xF0, // adrp x30, #0xfffff000
+    0x03, 0x00, 0x80, 0x90, // adrp x3, #0xffffffff00000000
+    0x28, 0x00, 0x80, 0xB0, // adrp x8, #0xffffffff00005000
+    0xED, 0x07, 0x80, 0xD0, // adrp x13, #0xffffffff000fe000
+    0xF2, 0x07, 0x80, 0xF0, // adrp x18, #0xffffffff000ff000
+    0x17, 0xF8, 0xFF, 0x90, // adrp x23, #0xfffffffffff00000
+    0x3C, 0xF8, 0xFF, 0xB0, // adrp x28, #0xfffffffffff05000
+    0xE1, 0xFF, 0xFF, 0xD0, // adrp x1, #0xffffffffffffe000
+    0xE6, 0xFF, 0xFF, 0xF0  // adrp x6, #0xfffffffffffff000
+};
+Array<u8, 3712> const xz_utils_arm64_binary_part4 = Array<u8, 3712>::from_repeated_value(0x55);
+Array<u8, 384> const xz_utils_arm64_binary_part5 = xz_utils_arm64_binary_part3;
+
 TEST_CASE(xz_utils_good_0cat_empty)
 {
     // "good-0cat-empty.xz has two zero-Block Streams concatenated without
@@ -1229,8 +1362,25 @@ TEST_CASE(xz_utils_good_1_arm64_lzma2_1)
 
     auto stream = MUST(try_make<FixedMemoryStream>(compressed));
     auto decompressor = MUST(Compress::XzDecompressor::create(move(stream)));
-    // TODO: This uses the currently unimplemented arm64 filter.
-    (void)decompressor->read_until_eof(PAGE_SIZE);
+    auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
+    auto span = buffer.span();
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part1.size()), xz_utils_arm64_binary_part1.span());
+    span = span.slice(xz_utils_arm64_binary_part1.size());
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part2.size()), xz_utils_arm64_binary_part2.span());
+    span = span.slice(xz_utils_arm64_binary_part2.size());
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part3.size()), xz_utils_arm64_binary_part3.span());
+    span = span.slice(xz_utils_arm64_binary_part3.size());
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part4.size()), xz_utils_arm64_binary_part4.span());
+    span = span.slice(xz_utils_arm64_binary_part4.size());
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part5.size()), xz_utils_arm64_binary_part5.span());
+    span = span.slice(xz_utils_arm64_binary_part5.size());
+
+    EXPECT_EQ(span.size(), 0ul);
 }
 
 TEST_CASE(xz_utils_good_1_arm64_lzma2_2)
@@ -1273,8 +1423,25 @@ TEST_CASE(xz_utils_good_1_arm64_lzma2_2)
 
     auto stream = MUST(try_make<FixedMemoryStream>(compressed));
     auto decompressor = MUST(Compress::XzDecompressor::create(move(stream)));
-    // TODO: This uses the currently unimplemented arm64 filter.
-    (void)decompressor->read_until_eof(PAGE_SIZE);
+    auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
+    auto span = buffer.span();
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part1.size()), xz_utils_arm64_binary_part1.span());
+    span = span.slice(xz_utils_arm64_binary_part1.size());
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part2.size()), xz_utils_arm64_binary_part2.span());
+    span = span.slice(xz_utils_arm64_binary_part2.size());
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part3.size()), xz_utils_arm64_binary_part3.span());
+    span = span.slice(xz_utils_arm64_binary_part3.size());
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part4.size()), xz_utils_arm64_binary_part4.span());
+    span = span.slice(xz_utils_arm64_binary_part4.size());
+
+    EXPECT_EQ(span.trim(xz_utils_arm64_binary_part5.size()), xz_utils_arm64_binary_part5.span());
+    span = span.slice(xz_utils_arm64_binary_part5.size());
+
+    EXPECT_EQ(span.size(), 0ul);
 }
 
 TEST_CASE(xz_utils_good_1_block_header_1)

+ 137 - 0
Userland/Libraries/LibCompress/Xz.cpp

@@ -221,6 +221,124 @@ void XzFilterDelta::close()
 {
 }
 
+ErrorOr<NonnullOwnPtr<XzFilterBCJArm64>> XzFilterBCJArm64::create(MaybeOwned<Stream> stream, u32 start_offset)
+{
+    if (start_offset % INSTRUCTION_ALIGNMENT != 0)
+        return Error::from_string_literal("XZ BCJ filter offset is not a multiple of the alignment");
+
+    auto counting_stream = CountingStream { move(stream) };
+    auto input_buffer = TRY(CircularBuffer::create_empty(INSTRUCTION_SIZE));
+    auto output_buffer = TRY(CircularBuffer::create_empty(INSTRUCTION_SIZE));
+    auto filter = TRY(adopt_nonnull_own_or_enomem(new (nothrow) XzFilterBCJArm64(move(counting_stream), start_offset, move(input_buffer), move(output_buffer))));
+    return filter;
+}
+
+XzFilterBCJArm64::XzFilterBCJArm64(CountingStream stream, u32 start_offset, CircularBuffer input_buffer, CircularBuffer output_buffer)
+    : m_stream(move(stream))
+    , m_start_offset(start_offset)
+    , m_input_buffer(move(input_buffer))
+    , m_output_buffer(move(output_buffer))
+{
+}
+
+ErrorOr<Bytes> XzFilterBCJArm64::read_some(Bytes bytes)
+{
+    if (m_output_buffer.used_space() > 0) {
+        // If we still have buffered outgoing data, return that first.
+        return m_output_buffer.read(bytes);
+    }
+
+    while (m_input_buffer.used_space() < INSTRUCTION_SIZE) {
+        if (m_stream.is_eof()) {
+            // If we can't get any more input data, dump the buffered contents unchanged.
+            // We won't be able to assemble another instruction.
+            return m_input_buffer.read(bytes);
+        }
+
+        TRY(m_input_buffer.fill_from_stream(m_stream));
+    }
+
+    // The algorithm considers the offset of the current bytes to be the current program counter.
+    u32 stream_offset = m_start_offset + m_stream.read_bytes() - m_input_buffer.used_space();
+
+    Array<u8, INSTRUCTION_SIZE> buffer;
+    auto buffer_span = m_input_buffer.read(buffer);
+    VERIFY(buffer_span.size() == INSTRUCTION_SIZE);
+
+    if ((buffer[3] & 0b11111100) == 0b10010100) {
+        // The ARM64 instruction manual notes that BL is encoded as the following in a little-endian byte order:
+        //   100101XX XXXXXXX XXXXXXXX XXXXXXXX
+        // X is an immediate 26 bit value designating the program counter offset divided by 4.
+
+        stream_offset >>= 2;
+
+        u32 program_counter = ((buffer[3] & 0b11) << 24) | (buffer[2] << 16) | (buffer[1] << 8) | buffer[0];
+        u32 program_counter_offset = program_counter - stream_offset;
+
+        // Reassemble the instruction.
+        buffer[3] = ((program_counter_offset >> 24) & 0b11) | 0b10010100;
+        buffer[2] = program_counter_offset >> 16;
+        buffer[1] = program_counter_offset >> 8;
+        buffer[0] = program_counter_offset;
+    } else if ((buffer[3] & 0b10011111) == 0b10010000) {
+        // ADRP instructions are encoded in the following format:
+        //  1XX10000 YYYYYYYY YYYYYYYY YYYZZZZZ
+        // Y:X is an immediate 21 bit value designating the program counter offset divided by 4096 (i.e. a right shift by 12).
+        // Z is the register number.
+
+        stream_offset >>= 12;
+
+        auto register_number = buffer[0] & 0b11111;
+        u32 program_counter = (buffer[2] << 13) | (buffer[1] << 5) | ((buffer[0] >> 3) & 0b11100) | ((buffer[3] >> 5) & 0b11);
+
+        // Only offsets between -512MiB and +512MiB are processed, which is suppsoed to reduce false-positives.
+        // Note: The XZ reference implementation presents a human readable range, an unoptimized condition, and an optimized condition for this.
+        //       Since none of the three entirely match each other, our only option is to copy the exact formula that is used in practice.
+        if (!((program_counter + 0x00020000) & 0x001C0000)) {
+            u32 program_counter_offset = program_counter - stream_offset;
+
+            // Clip the immediate to 18 bits, then sign-extend to 21 bits.
+            program_counter_offset &= (1 << 18) - 1;
+            program_counter_offset |= (0 - (program_counter_offset & (1 << 17))) & (0b111 << 18);
+
+            // Reassemble the instruction.
+            buffer[3] = ((program_counter_offset & 0b11) << 5) | 0b10010000;
+            buffer[2] = program_counter_offset >> 13;
+            buffer[1] = program_counter_offset >> 5;
+            buffer[0] = ((program_counter_offset & 0b11100) << 3) | register_number;
+        }
+    }
+
+    // Write what we can into the Span, put the rest into the output buffer.
+    auto size_in_span = min(INSTRUCTION_SIZE, bytes.size());
+    bytes = bytes.trim(size_in_span);
+    buffer.span().trim(size_in_span).copy_to(bytes);
+    if (size_in_span < INSTRUCTION_SIZE) {
+        auto bytes_written_to_buffer = m_output_buffer.write(buffer.span().slice(size_in_span));
+        VERIFY(bytes_written_to_buffer == INSTRUCTION_SIZE - size_in_span);
+    }
+    return bytes;
+}
+
+ErrorOr<size_t> XzFilterBCJArm64::write_some(ReadonlyBytes)
+{
+    return EBADF;
+}
+
+bool XzFilterBCJArm64::is_eof() const
+{
+    return m_stream.is_eof();
+}
+
+bool XzFilterBCJArm64::is_open() const
+{
+    return m_stream.is_open();
+}
+
+void XzFilterBCJArm64::close()
+{
+}
+
 ErrorOr<NonnullOwnPtr<XzDecompressor>> XzDecompressor::create(MaybeOwned<Stream> stream)
 {
     auto counting_stream = TRY(try_make<CountingStream>(move(stream)));
@@ -419,6 +537,25 @@ ErrorOr<void> XzDecompressor::load_next_block(u8 encoded_block_header_size)
             continue;
         }
 
+        // 5.3.2. Branch/Call/Jump Filters for Executables
+        if (filter.id == 0x0a) {
+            if (filter.last)
+                return Error::from_string_literal("XZ BCJ filter can only be a non-last filter");
+
+            u32 start_offset = 0;
+            if (filter.properties.size() == 0) {
+                // No start offset given.
+            } else if (filter.properties.size() == sizeof(XzFilterBCJProperties)) {
+                auto const* properties = reinterpret_cast<XzFilterBCJProperties*>(filter.properties.data());
+                start_offset = properties->start_offset;
+            } else {
+                return Error::from_string_literal("XZ BCJ filter has an unknown properties size");
+            }
+
+            new_block_stream = TRY(XzFilterBCJArm64::create(move(new_block_stream), start_offset));
+            continue;
+        }
+
         // 5.3.3. Delta
         if (filter.id == 0x03) {
             if (filter.last)

+ 28 - 0
Userland/Libraries/LibCompress/Xz.h

@@ -99,6 +99,34 @@ struct [[gnu::packed]] XzFilterLzma2Properties {
 };
 static_assert(sizeof(XzFilterLzma2Properties) == 1);
 
+// 5.3.2. Branch/Call/Jump Filters for Executables
+struct [[gnu::packed]] XzFilterBCJProperties {
+    u32 start_offset;
+};
+static_assert(sizeof(XzFilterBCJProperties) == 4);
+
+class XzFilterBCJArm64 : public Stream {
+public:
+    static ErrorOr<NonnullOwnPtr<XzFilterBCJArm64>> create(MaybeOwned<Stream>, u32 start_offset);
+
+    virtual ErrorOr<Bytes> read_some(Bytes) override;
+    virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
+    virtual bool is_eof() const override;
+    virtual bool is_open() const override;
+    virtual void close() override;
+
+private:
+    static constexpr size_t INSTRUCTION_ALIGNMENT = 4;
+    static constexpr size_t INSTRUCTION_SIZE = 4;
+
+    XzFilterBCJArm64(CountingStream, u32 start_offset, CircularBuffer input_buffer, CircularBuffer output_buffer);
+
+    CountingStream m_stream;
+    u32 m_start_offset;
+    CircularBuffer m_input_buffer;
+    CircularBuffer m_output_buffer;
+};
+
 // 5.3.3. Delta
 struct [[gnu::packed]] XzFilterDeltaProperties {
     u8 encoded_distance;