Xz.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815
  1. /*
  2. * Copyright (c) 2023, Tim Schumacher <timschumi@gmx.de>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ByteBuffer.h>
  7. #include <AK/MemoryStream.h>
  8. #include <LibCompress/Lzma2.h>
  9. #include <LibCompress/Xz.h>
  10. #include <LibCrypto/Checksum/CRC32.h>
  11. namespace Compress {
  12. ErrorOr<XzMultibyteInteger> XzMultibyteInteger::read_from_stream(Stream& stream)
  13. {
  14. // 1.2. Multibyte Integers:
  15. // "When smaller values are more likely than bigger values (for
  16. // example file sizes), multibyte integers are encoded in a
  17. // variable-length representation:
  18. // - Numbers in the range [0, 127] are copied as is, and take
  19. // one byte of space.
  20. // - Bigger numbers will occupy two or more bytes. All but the
  21. // last byte of the multibyte representation have the highest
  22. // (eighth) bit set."
  23. // 9 * 7 bits is 63 bits, which is the largest that will fit into an u64.
  24. constexpr size_t maximum_number_of_bytes = 9;
  25. u64 result = 0;
  26. for (size_t i = 0; i < maximum_number_of_bytes; i++) {
  27. u64 const next_byte = TRY(stream.read_value<u8>());
  28. result |= (next_byte & 0x7F) << (i * 7);
  29. // We should reject numbers that are encoded in too many bytes.
  30. if (next_byte == 0x00 && i != 0)
  31. return Error::from_string_literal("XZ multibyte integer has a larger encoding than necessary");
  32. if ((next_byte & 0x80) == 0)
  33. break;
  34. }
  35. return XzMultibyteInteger { result };
  36. }
  37. ErrorOr<void> XzStreamHeader::validate() const
  38. {
  39. // 2.1.1.1. Header Magic Bytes:
  40. // "The first six (6) bytes of the Stream are so called Header
  41. // Magic Bytes. They can be used to identify the file type.
  42. //
  43. // Using a C array and ASCII:
  44. // const uint8_t HEADER_MAGIC[6]
  45. // = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
  46. //
  47. // In plain hexadecimal:
  48. // FD 37 7A 58 5A 00
  49. //
  50. // If the Header Magic Bytes don't match, the decoder MUST
  51. // indicate an error."
  52. if (magic[0] != 0xFD || magic[1] != '7' || magic[2] != 'z' || magic[3] != 'X' || magic[4] != 'Z' || magic[5] != 0x00)
  53. return Error::from_string_literal("XZ stream header has an invalid magic");
  54. // 2.1.1.2. Stream Flags:
  55. // "If any reserved bit is set, the decoder MUST indicate an error.
  56. // It is possible that there is a new field present which the
  57. // decoder is not aware of, and can thus parse the Stream Header
  58. // incorrectly."
  59. if (flags.reserved != 0 || flags.reserved_bits != 0)
  60. return Error::from_string_literal("XZ stream header has reserved non-null stream flag bits");
  61. // 2.1.1.3. CRC32:
  62. // "The CRC32 is calculated from the Stream Flags field. It is
  63. // stored as an unsigned 32-bit little endian integer. If the
  64. // calculated value does not match the stored one, the decoder
  65. // MUST indicate an error."
  66. if (Crypto::Checksum::CRC32({ &flags, sizeof(flags) }).digest() != flags_crc32)
  67. return Error::from_string_literal("XZ stream header has an invalid CRC32 checksum");
  68. return {};
  69. }
  70. ErrorOr<void> XzStreamFooter::validate() const
  71. {
  72. // 2.1.2.1. CRC32:
  73. // "The CRC32 is calculated from the Backward Size and Stream Flags
  74. // fields. It is stored as an unsigned 32-bit little endian
  75. // integer. If the calculated value does not match the stored one,
  76. // the decoder MUST indicate an error."
  77. Crypto::Checksum::CRC32 calculated_crc32;
  78. calculated_crc32.update({ &encoded_backward_size, sizeof(encoded_backward_size) });
  79. calculated_crc32.update({ &flags, sizeof(flags) });
  80. if (calculated_crc32.digest() != size_and_flags_crc32)
  81. return Error::from_string_literal("XZ stream footer has an invalid CRC32 checksum");
  82. // 2.1.2.4. Footer Magic Bytes:
  83. // "As the last step of the decoding process, the decoder MUST
  84. // verify the existence of Footer Magic Bytes. If they don't
  85. // match, an error MUST be indicated.
  86. //
  87. // Using a C array and ASCII:
  88. // const uint8_t FOOTER_MAGIC[2] = { 'Y', 'Z' };
  89. //
  90. // In hexadecimal:
  91. // 59 5A"
  92. if (magic[0] != 'Y' || magic[1] != 'Z')
  93. return Error::from_string_literal("XZ stream footer has an invalid magic");
  94. return {};
  95. }
  96. u32 XzStreamFooter::backward_size() const
  97. {
  98. // 2.1.2.2. Backward Size:
  99. // "Backward Size is stored as a 32-bit little endian integer,
  100. // which indicates the size of the Index field as multiple of
  101. // four bytes, minimum value being four bytes:
  102. //
  103. // real_backward_size = (stored_backward_size + 1) * 4;"
  104. return (encoded_backward_size + 1) * 4;
  105. }
  106. size_t XzBlockFlags::number_of_filters() const
  107. {
  108. // 3.1.2. Block Flags:
  109. // "Bit(s) Mask Description
  110. // 0-1 0x03 Number of filters (1-4)"
  111. return encoded_number_of_filters + 1;
  112. }
  113. ErrorOr<void> XzFilterLzma2Properties::validate() const
  114. {
  115. // 5.3.1. LZMA2:
  116. // "Bits Mask Description
  117. // 6-7 0xC0 Reserved for future use; MUST be zero for now."
  118. if (reserved != 0)
  119. return Error::from_string_literal("XZ LZMA2 filter properties contains non-null reserved bits");
  120. // " const uint8_t bits = get_dictionary_flags() & 0x3F;
  121. // if (bits > 40)
  122. // return DICTIONARY_TOO_BIG; // Bigger than 4 GiB"
  123. if (encoded_dictionary_size > 40)
  124. return Error::from_string_literal("XZ LZMA2 filter properties contains larger-than-allowed dictionary size");
  125. return {};
  126. }
  127. u32 XzFilterLzma2Properties::dictionary_size() const
  128. {
  129. // "Dictionary Size is encoded with one-bit mantissa and five-bit
  130. // exponent. The smallest dictionary size is 4 KiB and the biggest
  131. // is 4 GiB.
  132. // Instead of having a table in the decoder, the dictionary size
  133. // can be decoded using the following C code:"
  134. if (encoded_dictionary_size == 40)
  135. return NumericLimits<u32>::max();
  136. u32 dictionary_size = 2 | (encoded_dictionary_size & 1);
  137. dictionary_size <<= encoded_dictionary_size / 2 + 11;
  138. return dictionary_size;
  139. }
  140. u32 XzFilterDeltaProperties::distance() const
  141. {
  142. // "The Properties byte indicates the delta distance, which can be
  143. // 1-256 bytes backwards from the current byte: 0x00 indicates
  144. // distance of 1 byte and 0xFF distance of 256 bytes."
  145. return encoded_distance + 1;
  146. }
  147. ErrorOr<NonnullOwnPtr<XzFilterDelta>> XzFilterDelta::create(MaybeOwned<Stream> stream, u32 distance)
  148. {
  149. auto buffer = TRY(CircularBuffer::create_empty(distance));
  150. auto filter = TRY(adopt_nonnull_own_or_enomem(new (nothrow) XzFilterDelta(move(stream), move(buffer))));
  151. return filter;
  152. }
  153. XzFilterDelta::XzFilterDelta(MaybeOwned<Stream> stream, CircularBuffer buffer)
  154. : m_stream(move(stream))
  155. , m_buffer(move(buffer))
  156. {
  157. }
  158. ErrorOr<Bytes> XzFilterDelta::read_some(Bytes bytes)
  159. {
  160. bytes = TRY(m_stream->read_some(bytes));
  161. auto distance = m_buffer.capacity();
  162. for (auto& byte : bytes) {
  163. if (m_buffer.seekback_limit() >= distance) {
  164. u8 byte_at_distance { 0 };
  165. MUST(m_buffer.read_with_seekback({ &byte_at_distance, 1 }, distance));
  166. byte = byte_at_distance + byte;
  167. }
  168. m_buffer.write({ &byte, 1 });
  169. MUST(m_buffer.discard(1));
  170. }
  171. return bytes;
  172. }
  173. ErrorOr<size_t> XzFilterDelta::write_some(ReadonlyBytes)
  174. {
  175. return EBADF;
  176. }
  177. bool XzFilterDelta::is_eof() const
  178. {
  179. return m_stream->is_eof();
  180. }
  181. bool XzFilterDelta::is_open() const
  182. {
  183. return m_stream->is_open();
  184. }
  185. void XzFilterDelta::close()
  186. {
  187. }
  188. ErrorOr<NonnullOwnPtr<XzFilterBCJArm64>> XzFilterBCJArm64::create(MaybeOwned<Stream> stream, u32 start_offset)
  189. {
  190. if (start_offset % INSTRUCTION_ALIGNMENT != 0)
  191. return Error::from_string_literal("XZ BCJ filter offset is not a multiple of the alignment");
  192. auto counting_stream = CountingStream { move(stream) };
  193. auto input_buffer = TRY(CircularBuffer::create_empty(INSTRUCTION_SIZE));
  194. auto output_buffer = TRY(CircularBuffer::create_empty(INSTRUCTION_SIZE));
  195. auto filter = TRY(adopt_nonnull_own_or_enomem(new (nothrow) XzFilterBCJArm64(move(counting_stream), start_offset, move(input_buffer), move(output_buffer))));
  196. return filter;
  197. }
  198. XzFilterBCJArm64::XzFilterBCJArm64(CountingStream stream, u32 start_offset, CircularBuffer input_buffer, CircularBuffer output_buffer)
  199. : m_stream(move(stream))
  200. , m_start_offset(start_offset)
  201. , m_input_buffer(move(input_buffer))
  202. , m_output_buffer(move(output_buffer))
  203. {
  204. }
  205. ErrorOr<Bytes> XzFilterBCJArm64::read_some(Bytes bytes)
  206. {
  207. if (m_output_buffer.used_space() > 0) {
  208. // If we still have buffered outgoing data, return that first.
  209. return m_output_buffer.read(bytes);
  210. }
  211. while (m_input_buffer.used_space() < INSTRUCTION_SIZE) {
  212. if (m_stream.is_eof()) {
  213. // If we can't get any more input data, dump the buffered contents unchanged.
  214. // We won't be able to assemble another instruction.
  215. return m_input_buffer.read(bytes);
  216. }
  217. TRY(m_input_buffer.fill_from_stream(m_stream));
  218. }
  219. // The algorithm considers the offset of the current bytes to be the current program counter.
  220. u32 stream_offset = m_start_offset + m_stream.read_bytes() - m_input_buffer.used_space();
  221. Array<u8, INSTRUCTION_SIZE> buffer;
  222. auto buffer_span = m_input_buffer.read(buffer);
  223. VERIFY(buffer_span.size() == INSTRUCTION_SIZE);
  224. if ((buffer[3] & 0b11111100) == 0b10010100) {
  225. // The ARM64 instruction manual notes that BL is encoded as the following in a little-endian byte order:
  226. // 100101XX XXXXXXX XXXXXXXX XXXXXXXX
  227. // X is an immediate 26 bit value designating the program counter offset divided by 4.
  228. stream_offset >>= 2;
  229. u32 program_counter = ((buffer[3] & 0b11) << 24) | (buffer[2] << 16) | (buffer[1] << 8) | buffer[0];
  230. u32 program_counter_offset = program_counter - stream_offset;
  231. // Reassemble the instruction.
  232. buffer[3] = ((program_counter_offset >> 24) & 0b11) | 0b10010100;
  233. buffer[2] = program_counter_offset >> 16;
  234. buffer[1] = program_counter_offset >> 8;
  235. buffer[0] = program_counter_offset;
  236. } else if ((buffer[3] & 0b10011111) == 0b10010000) {
  237. // ADRP instructions are encoded in the following format:
  238. // 1XX10000 YYYYYYYY YYYYYYYY YYYZZZZZ
  239. // Y:X is an immediate 21 bit value designating the program counter offset divided by 4096 (i.e. a right shift by 12).
  240. // Z is the register number.
  241. stream_offset >>= 12;
  242. auto register_number = buffer[0] & 0b11111;
  243. u32 program_counter = (buffer[2] << 13) | (buffer[1] << 5) | ((buffer[0] >> 3) & 0b11100) | ((buffer[3] >> 5) & 0b11);
  244. // Only offsets between -512MiB and +512MiB are processed, which is suppsoed to reduce false-positives.
  245. // Note: The XZ reference implementation presents a human readable range, an unoptimized condition, and an optimized condition for this.
  246. // Since none of the three entirely match each other, our only option is to copy the exact formula that is used in practice.
  247. if (!((program_counter + 0x00020000) & 0x001C0000)) {
  248. u32 program_counter_offset = program_counter - stream_offset;
  249. // Clip the immediate to 18 bits, then sign-extend to 21 bits.
  250. program_counter_offset &= (1 << 18) - 1;
  251. program_counter_offset |= (0 - (program_counter_offset & (1 << 17))) & (0b111 << 18);
  252. // Reassemble the instruction.
  253. buffer[3] = ((program_counter_offset & 0b11) << 5) | 0b10010000;
  254. buffer[2] = program_counter_offset >> 13;
  255. buffer[1] = program_counter_offset >> 5;
  256. buffer[0] = ((program_counter_offset & 0b11100) << 3) | register_number;
  257. }
  258. }
  259. // Write what we can into the Span, put the rest into the output buffer.
  260. auto size_in_span = min(INSTRUCTION_SIZE, bytes.size());
  261. bytes = bytes.trim(size_in_span);
  262. buffer.span().trim(size_in_span).copy_to(bytes);
  263. if (size_in_span < INSTRUCTION_SIZE) {
  264. auto bytes_written_to_buffer = m_output_buffer.write(buffer.span().slice(size_in_span));
  265. VERIFY(bytes_written_to_buffer == INSTRUCTION_SIZE - size_in_span);
  266. }
  267. return bytes;
  268. }
  269. ErrorOr<size_t> XzFilterBCJArm64::write_some(ReadonlyBytes)
  270. {
  271. return EBADF;
  272. }
  273. bool XzFilterBCJArm64::is_eof() const
  274. {
  275. return m_stream.is_eof();
  276. }
  277. bool XzFilterBCJArm64::is_open() const
  278. {
  279. return m_stream.is_open();
  280. }
  281. void XzFilterBCJArm64::close()
  282. {
  283. }
  284. ErrorOr<NonnullOwnPtr<XzDecompressor>> XzDecompressor::create(MaybeOwned<Stream> stream)
  285. {
  286. auto counting_stream = TRY(try_make<CountingStream>(move(stream)));
  287. auto decompressor = TRY(adopt_nonnull_own_or_enomem(new (nothrow) XzDecompressor(move(counting_stream))));
  288. return decompressor;
  289. }
  290. XzDecompressor::XzDecompressor(NonnullOwnPtr<CountingStream> stream)
  291. : m_stream(move(stream))
  292. {
  293. }
  294. static Optional<size_t> size_for_check_type(XzStreamCheckType check_type)
  295. {
  296. switch (check_type) {
  297. case XzStreamCheckType::None:
  298. return 0;
  299. case XzStreamCheckType::CRC32:
  300. return 4;
  301. case XzStreamCheckType::CRC64:
  302. return 8;
  303. case XzStreamCheckType::SHA256:
  304. return 32;
  305. default:
  306. return {};
  307. }
  308. }
  309. ErrorOr<bool> XzDecompressor::load_next_stream()
  310. {
  311. // If we already determined to have found the last stream footer, there is nothing more to do.
  312. if (m_found_last_stream_footer)
  313. return false;
  314. // This assumes that we can just read the Stream Header into memory as-is. Check that this still holds up for good measure.
  315. static_assert(AK::Traits<XzStreamHeader>::is_trivially_serializable());
  316. XzStreamHeader stream_header {};
  317. Bytes stream_header_bytes { &stream_header, sizeof(stream_header) };
  318. if (m_found_first_stream_header) {
  319. // 2.2. Stream Padding:
  320. // "Stream Padding MUST contain only null bytes. To preserve the
  321. // four-byte alignment of consecutive Streams, the size of Stream
  322. // Padding MUST be a multiple of four bytes. Empty Stream Padding
  323. // is allowed. If these requirements are not met, the decoder MUST
  324. // indicate an error."
  325. VERIFY(m_stream->read_bytes() % 4 == 0);
  326. while (true) {
  327. // Read the first byte until we either get a non-null byte or reach EOF.
  328. auto byte_or_error = m_stream->read_value<u8>();
  329. if (byte_or_error.is_error() && m_stream->is_eof())
  330. break;
  331. auto byte = TRY(byte_or_error);
  332. if (byte != 0) {
  333. stream_header_bytes[0] = byte;
  334. stream_header_bytes = stream_header_bytes.slice(1);
  335. break;
  336. }
  337. }
  338. // If we aren't at EOF we already read the potential first byte of the header, so we need to subtract that.
  339. auto end_of_padding_offset = m_stream->read_bytes();
  340. if (!m_stream->is_eof())
  341. end_of_padding_offset -= 1;
  342. if (end_of_padding_offset % 4 != 0)
  343. return Error::from_string_literal("XZ Stream Padding is not aligned to 4 bytes");
  344. if (m_stream->is_eof()) {
  345. m_found_last_stream_footer = true;
  346. return false;
  347. }
  348. }
  349. TRY(m_stream->read_until_filled(stream_header_bytes));
  350. TRY(stream_header.validate());
  351. m_stream_flags = stream_header.flags;
  352. m_found_first_stream_header = true;
  353. return true;
  354. }
  355. ErrorOr<void> XzDecompressor::load_next_block(u8 encoded_block_header_size)
  356. {
  357. // We already read the encoded Block Header size (one byte) to determine that this is not an Index.
  358. m_current_block_start_offset = m_stream->read_bytes() - 1;
  359. // Ensure that the start of the block is aligned to a multiple of four (in theory, everything in XZ is).
  360. VERIFY(m_current_block_start_offset % 4 == 0);
  361. // 3.1.1. Block Header Size:
  362. // "This field contains the size of the Block Header field,
  363. // including the Block Header Size field itself. Valid values are
  364. // in the range [0x01, 0xFF], which indicate the size of the Block
  365. // Header as multiples of four bytes, minimum size being eight
  366. // bytes:
  367. //
  368. // real_header_size = (encoded_header_size + 1) * 4;"
  369. u64 const block_header_size = (encoded_block_header_size + 1) * 4;
  370. // Read the whole header into a buffer to allow calculating the CRC32 later (3.1.7. CRC32).
  371. auto header = TRY(ByteBuffer::create_uninitialized(block_header_size));
  372. header[0] = encoded_block_header_size;
  373. TRY(m_stream->read_until_filled(header.span().slice(1)));
  374. FixedMemoryStream header_stream { header.span().slice(1) };
  375. // 3.1.2. Block Flags:
  376. // "If any reserved bit is set, the decoder MUST indicate an error.
  377. // It is possible that there is a new field present which the
  378. // decoder is not aware of, and can thus parse the Block Header
  379. // incorrectly."
  380. auto const flags = TRY(header_stream.read_value<XzBlockFlags>());
  381. if (flags.reserved != 0)
  382. return Error::from_string_literal("XZ block header has reserved non-null block flag bits");
  383. MaybeOwned<Stream> new_block_stream { *m_stream };
  384. // 3.1.3. Compressed Size:
  385. // "This field is present only if the appropriate bit is set in
  386. // the Block Flags field (see Section 3.1.2)."
  387. if (flags.compressed_size_present) {
  388. // "Compressed Size is stored using the encoding described in Section 1.2."
  389. u64 const compressed_size = TRY(header_stream.read_value<XzMultibyteInteger>());
  390. // "The Compressed Size field contains the size of the Compressed
  391. // Data field, which MUST be non-zero."
  392. if (compressed_size == 0)
  393. return Error::from_string_literal("XZ block header contains a compressed size of zero");
  394. new_block_stream = TRY(try_make<ConstrainedStream>(move(new_block_stream), compressed_size));
  395. }
  396. // 3.1.4. Uncompressed Size:
  397. // "This field is present only if the appropriate bit is set in
  398. // the Block Flags field (see Section 3.1.2)."
  399. if (flags.uncompressed_size_present) {
  400. // "Uncompressed Size is stored using the encoding described in Section 1.2."
  401. u64 const uncompressed_size = TRY(header_stream.read_value<XzMultibyteInteger>());
  402. m_current_block_expected_uncompressed_size = uncompressed_size;
  403. } else {
  404. m_current_block_expected_uncompressed_size.clear();
  405. }
  406. // We need to process the filters in reverse order, since they are listed in the order that they have been applied in.
  407. struct FilterEntry {
  408. u64 id;
  409. ByteBuffer properties;
  410. bool last;
  411. };
  412. Vector<FilterEntry, 4> filters;
  413. // 3.1.5. List of Filter Flags:
  414. // "The number of Filter Flags fields is stored in the Block Flags
  415. // field (see Section 3.1.2)."
  416. for (size_t i = 0; i < flags.number_of_filters(); i++) {
  417. auto last = (i == flags.number_of_filters() - 1);
  418. // "The format of each Filter Flags field is as follows:
  419. // Both Filter ID and Size of Properties are stored using the
  420. // encoding described in Section 1.2."
  421. u64 const filter_id = TRY(header_stream.read_value<XzMultibyteInteger>());
  422. u64 const size_of_properties = TRY(header_stream.read_value<XzMultibyteInteger>());
  423. // "Size of Properties indicates the size of the Filter Properties field as bytes."
  424. auto filter_properties = TRY(ByteBuffer::create_uninitialized(size_of_properties));
  425. TRY(header_stream.read_until_filled(filter_properties));
  426. filters.empend(filter_id, move(filter_properties), last);
  427. }
  428. for (auto& filter : filters.in_reverse()) {
  429. // 5.3.1. LZMA2
  430. if (filter.id == 0x21) {
  431. if (!filter.last)
  432. return Error::from_string_literal("XZ LZMA2 filter can only be the last filter");
  433. if (filter.properties.size() < sizeof(XzFilterLzma2Properties))
  434. return Error::from_string_literal("XZ LZMA2 filter has a smaller-than-needed properties size");
  435. auto const* properties = reinterpret_cast<XzFilterLzma2Properties*>(filter.properties.data());
  436. TRY(properties->validate());
  437. new_block_stream = TRY(Lzma2Decompressor::create_from_raw_stream(move(new_block_stream), properties->dictionary_size()));
  438. continue;
  439. }
  440. // 5.3.2. Branch/Call/Jump Filters for Executables
  441. if (filter.id == 0x0a) {
  442. if (filter.last)
  443. return Error::from_string_literal("XZ BCJ filter can only be a non-last filter");
  444. u32 start_offset = 0;
  445. if (filter.properties.size() == 0) {
  446. // No start offset given.
  447. } else if (filter.properties.size() == sizeof(XzFilterBCJProperties)) {
  448. auto const* properties = reinterpret_cast<XzFilterBCJProperties*>(filter.properties.data());
  449. start_offset = properties->start_offset;
  450. } else {
  451. return Error::from_string_literal("XZ BCJ filter has an unknown properties size");
  452. }
  453. new_block_stream = TRY(XzFilterBCJArm64::create(move(new_block_stream), start_offset));
  454. continue;
  455. }
  456. // 5.3.3. Delta
  457. if (filter.id == 0x03) {
  458. if (filter.last)
  459. return Error::from_string_literal("XZ Delta filter can only be a non-last filter");
  460. if (filter.properties.size() < sizeof(XzFilterDeltaProperties))
  461. return Error::from_string_literal("XZ Delta filter has a smaller-than-needed properties size");
  462. auto const* properties = reinterpret_cast<XzFilterDeltaProperties*>(filter.properties.data());
  463. new_block_stream = TRY(XzFilterDelta::create(move(new_block_stream), properties->distance()));
  464. continue;
  465. }
  466. return Error::from_string_literal("XZ block header contains unknown filter ID");
  467. }
  468. // 3.1.6. Header Padding:
  469. // "This field contains as many null byte as it is needed to make
  470. // the Block Header have the size specified in Block Header Size."
  471. constexpr size_t size_of_block_header_size = 1;
  472. constexpr size_t size_of_crc32 = 4;
  473. while (MUST(header_stream.tell()) < block_header_size - size_of_block_header_size - size_of_crc32) {
  474. auto const padding_byte = TRY(header_stream.read_value<u8>());
  475. // "If any of the bytes are not null bytes, the decoder MUST
  476. // indicate an error."
  477. if (padding_byte != 0)
  478. return Error::from_string_literal("XZ block header padding contains non-null bytes");
  479. }
  480. // 3.1.7. CRC32:
  481. // "The CRC32 is calculated over everything in the Block Header
  482. // field except the CRC32 field itself.
  483. Crypto::Checksum::CRC32 calculated_header_crc32 { header.span().trim(block_header_size - size_of_crc32) };
  484. // It is stored as an unsigned 32-bit little endian integer.
  485. u32 const stored_header_crc32 = TRY(header_stream.read_value<LittleEndian<u32>>());
  486. // If the calculated value does not match the stored one, the decoder MUST indicate
  487. // an error."
  488. if (calculated_header_crc32.digest() != stored_header_crc32)
  489. return Error::from_string_literal("Stored XZ block header CRC32 does not match the stored CRC32");
  490. m_current_block_stream = move(new_block_stream);
  491. m_current_block_uncompressed_size = 0;
  492. return {};
  493. }
  494. ErrorOr<void> XzDecompressor::finish_current_block()
  495. {
  496. auto unpadded_size = m_stream->read_bytes() - m_current_block_start_offset;
  497. // 3.3. Block Padding:
  498. // "Block Padding MUST contain 0-3 null bytes to make the size of
  499. // the Block a multiple of four bytes. This can be needed when
  500. // the size of Compressed Data is not a multiple of four."
  501. for (size_t i = 0; (unpadded_size + i) % 4 != 0; i++) {
  502. auto const padding_byte = TRY(m_stream->read_value<u8>());
  503. // "If any of the bytes in Block Padding are not null bytes, the decoder
  504. // MUST indicate an error."
  505. if (padding_byte != 0)
  506. return Error::from_string_literal("XZ block contains a non-null padding byte");
  507. }
  508. // 3.4. Check:
  509. // "The type and size of the Check field depends on which bits
  510. // are set in the Stream Flags field (see Section 2.1.1.2).
  511. //
  512. // The Check, when used, is calculated from the original
  513. // uncompressed data. If the calculated Check does not match the
  514. // stored one, the decoder MUST indicate an error. If the selected
  515. // type of Check is not supported by the decoder, it SHOULD
  516. // indicate a warning or error."
  517. auto const maybe_check_size = size_for_check_type(m_stream_flags->check_type);
  518. if (!maybe_check_size.has_value())
  519. return Error::from_string_literal("XZ stream has an unknown check type");
  520. // TODO: Block content checks are currently unimplemented as a whole, independent of the check type.
  521. // For now, we only make sure to remove the correct amount of bytes from the stream.
  522. TRY(m_stream->discard(*maybe_check_size));
  523. unpadded_size += *maybe_check_size;
  524. if (m_current_block_expected_uncompressed_size.has_value()) {
  525. if (*m_current_block_expected_uncompressed_size != m_current_block_uncompressed_size)
  526. return Error::from_string_literal("Uncompressed size of XZ block does not match the expected value");
  527. }
  528. TRY(m_processed_blocks.try_append({
  529. .uncompressed_size = m_current_block_uncompressed_size,
  530. .unpadded_size = unpadded_size,
  531. }));
  532. return {};
  533. }
  534. ErrorOr<void> XzDecompressor::finish_current_stream()
  535. {
  536. // We already read the Index Indicator (one byte) to determine that this is an Index.
  537. auto const start_of_current_block = m_stream->read_bytes() - 1;
  538. // 4.2. Number of Records:
  539. // "This field indicates how many Records there are in the List
  540. // of Records field, and thus how many Blocks there are in the
  541. // Stream. The value is stored using the encoding described in
  542. // Section 1.2."
  543. u64 const number_of_records = TRY(m_stream->read_value<XzMultibyteInteger>());
  544. if (m_processed_blocks.size() != number_of_records)
  545. return Error::from_string_literal("Number of Records in XZ Index does not match the number of processed Blocks");
  546. // 4.3. List of Records:
  547. // "List of Records consists of as many Records as indicated by the
  548. // Number of Records field:"
  549. for (u64 i = 0; i < number_of_records; i++) {
  550. // "Each Record contains information about one Block:
  551. //
  552. // +===============+===================+
  553. // | Unpadded Size | Uncompressed Size |
  554. // +===============+===================+"
  555. // 4.3.1. Unpadded Size:
  556. // "This field indicates the size of the Block excluding the Block
  557. // Padding field. That is, Unpadded Size is the size of the Block
  558. // Header, Compressed Data, and Check fields. Unpadded Size is
  559. // stored using the encoding described in Section 1.2."
  560. u64 const unpadded_size = TRY(m_stream->read_value<XzMultibyteInteger>());
  561. // "The value MUST never be zero; with the current structure of Blocks, the
  562. // actual minimum value for Unpadded Size is five."
  563. if (unpadded_size < 5)
  564. return Error::from_string_literal("XZ index contains a record with an unpadded size of less than five");
  565. // 4.3.2. Uncompressed Size:
  566. // "This field indicates the Uncompressed Size of the respective
  567. // Block as bytes. The value is stored using the encoding
  568. // described in Section 1.2."
  569. u64 const uncompressed_size = TRY(m_stream->read_value<XzMultibyteInteger>());
  570. // 4.3. List of Records:
  571. // "If the decoder has decoded all the Blocks of the Stream, it
  572. // MUST verify that the contents of the Records match the real
  573. // Unpadded Size and Uncompressed Size of the respective Blocks."
  574. if (m_processed_blocks[i].uncompressed_size != uncompressed_size)
  575. return Error::from_string_literal("Uncompressed size of XZ Block does not match the Index");
  576. if (m_processed_blocks[i].unpadded_size != unpadded_size)
  577. return Error::from_string_literal("Unpadded size of XZ Block does not match the Index");
  578. }
  579. // 4.4. Index Padding:
  580. // "This field MUST contain 0-3 null bytes to pad the Index to
  581. // a multiple of four bytes. If any of the bytes are not null
  582. // bytes, the decoder MUST indicate an error."
  583. while ((m_stream->read_bytes() - start_of_current_block) % 4 != 0) {
  584. auto padding_byte = TRY(m_stream->read_value<u8>());
  585. if (padding_byte != 0)
  586. return Error::from_string_literal("XZ index contains a non-null padding byte");
  587. }
  588. // 4.5. CRC32:
  589. // "The CRC32 is calculated over everything in the Index field
  590. // except the CRC32 field itself. The CRC32 is stored as an
  591. // unsigned 32-bit little endian integer."
  592. u32 const index_crc32 = TRY(m_stream->read_value<LittleEndian<u32>>());
  593. // "If the calculated value does not match the stored one, the decoder MUST indicate
  594. // an error."
  595. // TODO: Validation of the index CRC32 is currently unimplemented.
  596. (void)index_crc32;
  597. auto const size_of_index = m_stream->read_bytes() - start_of_current_block;
  598. // According to the specification of a stream (2.1. Stream), the index is the last element in a stream,
  599. // followed by the stream footer (2.1.2. Stream Footer).
  600. auto const stream_footer = TRY(m_stream->read_value<XzStreamFooter>());
  601. // This handles verifying the CRC32 (2.1.2.1. CRC32) and the magic bytes (2.1.2.4. Footer Magic Bytes).
  602. TRY(stream_footer.validate());
  603. // 2.1.2.2. Backward Size:
  604. // "If the stored value does not match the real size of the Index
  605. // field, the decoder MUST indicate an error."
  606. if (stream_footer.backward_size() != size_of_index)
  607. return Error::from_string_literal("XZ index size does not match the stored size in the stream footer");
  608. // 2.1.2.3. Stream Flags:
  609. // "This is a copy of the Stream Flags field from the Stream
  610. // Header. The information stored to Stream Flags is needed
  611. // when parsing the Stream backwards. The decoder MUST compare
  612. // the Stream Flags fields in both Stream Header and Stream
  613. // Footer, and indicate an error if they are not identical."
  614. if (ReadonlyBytes { &*m_stream_flags, sizeof(XzStreamFlags) } != ReadonlyBytes { &stream_footer.flags, sizeof(stream_footer.flags) })
  615. return Error::from_string_literal("XZ stream header flags don't match the stream footer");
  616. return {};
  617. }
  618. ErrorOr<Bytes> XzDecompressor::read_some(Bytes bytes)
  619. {
  620. if (!m_stream_flags.has_value()) {
  621. if (!TRY(load_next_stream()))
  622. return bytes.trim(0);
  623. }
  624. if (!m_current_block_stream.has_value() || (*m_current_block_stream)->is_eof()) {
  625. if (m_current_block_stream.has_value()) {
  626. // We have already processed a block, so we weed to clean up trailing data before the next block starts.
  627. TRY(finish_current_block());
  628. }
  629. // The first byte between Block Header (3.1.1. Block Header Size) and Index (4.1. Index Indicator) overlap.
  630. // Block header sizes have valid values in the range of [0x01, 0xFF], the only valid value for an Index Indicator is therefore 0x00.
  631. auto const encoded_block_header_size_or_index_indicator = TRY(m_stream->read_value<u8>());
  632. if (encoded_block_header_size_or_index_indicator == 0x00) {
  633. // This is an Index, which is the last element before the stream footer.
  634. TRY(finish_current_stream());
  635. // Another XZ Stream might follow, so we just unset the current information and continue on the next read.
  636. m_stream_flags.clear();
  637. m_processed_blocks.clear();
  638. return bytes.trim(0);
  639. }
  640. TRY(load_next_block(encoded_block_header_size_or_index_indicator));
  641. }
  642. auto result = TRY((*m_current_block_stream)->read_some(bytes));
  643. m_current_block_uncompressed_size += result.size();
  644. return result;
  645. }
  646. ErrorOr<size_t> XzDecompressor::write_some(ReadonlyBytes)
  647. {
  648. return Error::from_errno(EBADF);
  649. }
  650. bool XzDecompressor::is_eof() const
  651. {
  652. return m_found_last_stream_footer;
  653. }
  654. bool XzDecompressor::is_open() const
  655. {
  656. return true;
  657. }
  658. void XzDecompressor::close()
  659. {
  660. }
  661. }