Gzip.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /*
  2. * Copyright (c) 2020-2022, the SerenityOS developers.
  3. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <LibCompress/Gzip.h>
  8. #include <AK/BitStream.h>
  9. #include <AK/MemoryStream.h>
  10. #include <AK/String.h>
  11. #include <LibCore/DateTime.h>
  12. #include <LibCore/File.h>
  13. #include <LibCore/MappedFile.h>
  14. #include <LibCore/System.h>
  15. namespace Compress {
  16. bool GzipDecompressor::is_likely_compressed(ReadonlyBytes bytes)
  17. {
  18. return bytes.size() >= 2 && bytes[0] == gzip_magic_1 && bytes[1] == gzip_magic_2;
  19. }
  20. bool BlockHeader::valid_magic_number() const
  21. {
  22. return identification_1 == gzip_magic_1 && identification_2 == gzip_magic_2;
  23. }
  24. bool BlockHeader::supported_by_implementation() const
  25. {
  26. if (compression_method != 0x08) {
  27. // RFC 1952 does not define any compression methods other than deflate.
  28. return false;
  29. }
  30. if (flags > Flags::MAX) {
  31. // RFC 1952 does not define any more flags.
  32. return false;
  33. }
  34. return true;
  35. }
  36. ErrorOr<NonnullOwnPtr<GzipDecompressor::Member>> GzipDecompressor::Member::construct(BlockHeader header, LittleEndianInputBitStream& stream)
  37. {
  38. auto deflate_stream = TRY(DeflateDecompressor::construct(MaybeOwned<LittleEndianInputBitStream>(stream)));
  39. return TRY(adopt_nonnull_own_or_enomem(new (nothrow) Member(header, move(deflate_stream))));
  40. }
  41. GzipDecompressor::Member::Member(BlockHeader header, NonnullOwnPtr<DeflateDecompressor> stream)
  42. : m_header(header)
  43. , m_stream(move(stream))
  44. {
  45. }
  46. GzipDecompressor::GzipDecompressor(MaybeOwned<Stream> stream)
  47. : m_input_stream(make<LittleEndianInputBitStream>(move(stream)))
  48. {
  49. }
  50. GzipDecompressor::~GzipDecompressor()
  51. {
  52. m_current_member.clear();
  53. }
  54. ErrorOr<Bytes> GzipDecompressor::read_some(Bytes bytes)
  55. {
  56. size_t total_read = 0;
  57. while (total_read < bytes.size()) {
  58. if (is_eof())
  59. break;
  60. auto slice = bytes.slice(total_read);
  61. if (m_current_member) {
  62. auto current_slice = TRY(current_member().m_stream->read_some(slice));
  63. current_member().m_checksum.update(current_slice);
  64. current_member().m_nread += current_slice.size();
  65. if (current_slice.size() < slice.size()) {
  66. u32 crc32 = TRY(m_input_stream->read_value<LittleEndian<u32>>());
  67. u32 input_size = TRY(m_input_stream->read_value<LittleEndian<u32>>());
  68. if (crc32 != current_member().m_checksum.digest())
  69. return Error::from_string_literal("Stored CRC32 does not match the calculated CRC32 of the current member");
  70. if (input_size != current_member().m_nread)
  71. return Error::from_string_literal("Input size does not match the number of read bytes");
  72. m_current_member.clear();
  73. total_read += current_slice.size();
  74. continue;
  75. }
  76. total_read += current_slice.size();
  77. continue;
  78. } else {
  79. auto current_partial_header_slice = Bytes { m_partial_header, sizeof(BlockHeader) }.slice(m_partial_header_offset);
  80. auto current_partial_header_data = TRY(m_input_stream->read_some(current_partial_header_slice));
  81. m_partial_header_offset += current_partial_header_data.size();
  82. if (is_eof())
  83. break;
  84. if (m_partial_header_offset < sizeof(BlockHeader)) {
  85. break; // partial header read
  86. }
  87. m_partial_header_offset = 0;
  88. BlockHeader header = *(reinterpret_cast<BlockHeader*>(m_partial_header));
  89. if (!header.valid_magic_number())
  90. return Error::from_string_literal("Header does not have a valid magic number");
  91. if (!header.supported_by_implementation())
  92. return Error::from_string_literal("Header is not supported by implementation");
  93. if (header.flags & Flags::FEXTRA) {
  94. u16 subfield_id = TRY(m_input_stream->read_value<LittleEndian<u16>>());
  95. u16 length = TRY(m_input_stream->read_value<LittleEndian<u16>>());
  96. TRY(m_input_stream->discard(length));
  97. (void)subfield_id;
  98. }
  99. auto discard_string = [&]() -> ErrorOr<void> {
  100. char next_char;
  101. do {
  102. next_char = TRY(m_input_stream->read_value<char>());
  103. } while (next_char);
  104. return {};
  105. };
  106. if (header.flags & Flags::FNAME)
  107. TRY(discard_string());
  108. if (header.flags & Flags::FCOMMENT)
  109. TRY(discard_string());
  110. if (header.flags & Flags::FHCRC) {
  111. u16 crc = TRY(m_input_stream->read_value<LittleEndian<u16>>());
  112. // FIXME: we should probably verify this instead of just assuming it matches
  113. (void)crc;
  114. }
  115. m_current_member = TRY(Member::construct(header, *m_input_stream));
  116. continue;
  117. }
  118. }
  119. return bytes.slice(0, total_read);
  120. }
  121. ErrorOr<Optional<String>> GzipDecompressor::describe_header(ReadonlyBytes bytes)
  122. {
  123. if (bytes.size() < sizeof(BlockHeader))
  124. return OptionalNone {};
  125. auto& header = *(reinterpret_cast<BlockHeader const*>(bytes.data()));
  126. if (!header.valid_magic_number() || !header.supported_by_implementation())
  127. return OptionalNone {};
  128. LittleEndian<u32> original_size = *reinterpret_cast<u32 const*>(bytes.offset(bytes.size() - sizeof(u32)));
  129. return TRY(String::formatted("last modified: {}, original size {}", Core::DateTime::from_timestamp(header.modification_time), (u32)original_size));
  130. }
  131. ErrorOr<ByteBuffer> GzipDecompressor::decompress_all(ReadonlyBytes bytes)
  132. {
  133. auto memory_stream = TRY(try_make<FixedMemoryStream>(bytes));
  134. auto gzip_stream = make<GzipDecompressor>(move(memory_stream));
  135. AllocatingMemoryStream output_stream;
  136. auto buffer = TRY(ByteBuffer::create_uninitialized(4096));
  137. while (!gzip_stream->is_eof()) {
  138. auto const data = TRY(gzip_stream->read_some(buffer));
  139. TRY(output_stream.write_until_depleted(data));
  140. }
  141. auto output_buffer = TRY(ByteBuffer::create_uninitialized(output_stream.used_buffer_size()));
  142. TRY(output_stream.read_until_filled(output_buffer));
  143. return output_buffer;
  144. }
  145. ErrorOr<void> GzipDecompressor::decompress_file(StringView input_filename, NonnullOwnPtr<Stream> output_stream)
  146. {
  147. auto input_file = TRY(Core::File::open(input_filename, Core::File::OpenMode::Read));
  148. auto input_stream = TRY(Core::InputBufferedFile::create(move(input_file), 256 * KiB));
  149. auto gzip_stream = GzipDecompressor { move(input_stream) };
  150. auto buffer = TRY(ByteBuffer::create_uninitialized(256 * KiB));
  151. while (!gzip_stream.is_eof()) {
  152. auto span = TRY(gzip_stream.read_some(buffer));
  153. TRY(output_stream->write_until_depleted(span));
  154. }
  155. return {};
  156. }
  157. bool GzipDecompressor::is_eof() const { return m_input_stream->is_eof(); }
  158. ErrorOr<size_t> GzipDecompressor::write_some(ReadonlyBytes)
  159. {
  160. return Error::from_errno(EBADF);
  161. }
  162. GzipCompressor::GzipCompressor(MaybeOwned<Stream> stream)
  163. : m_output_stream(move(stream))
  164. {
  165. }
  166. ErrorOr<Bytes> GzipCompressor::read_some(Bytes)
  167. {
  168. return Error::from_errno(EBADF);
  169. }
  170. ErrorOr<size_t> GzipCompressor::write_some(ReadonlyBytes bytes)
  171. {
  172. BlockHeader header;
  173. header.identification_1 = 0x1f;
  174. header.identification_2 = 0x8b;
  175. header.compression_method = 0x08;
  176. header.flags = 0;
  177. header.modification_time = 0;
  178. header.extra_flags = 3; // DEFLATE sets 2 for maximum compression and 4 for minimum compression
  179. header.operating_system = 3; // unix
  180. TRY(m_output_stream->write_until_depleted({ &header, sizeof(header) }));
  181. auto compressed_stream = TRY(DeflateCompressor::construct(MaybeOwned(*m_output_stream)));
  182. TRY(compressed_stream->write_until_depleted(bytes));
  183. TRY(compressed_stream->final_flush());
  184. Crypto::Checksum::CRC32 crc32;
  185. crc32.update(bytes);
  186. TRY(m_output_stream->write_value<LittleEndian<u32>>(crc32.digest()));
  187. TRY(m_output_stream->write_value<LittleEndian<u32>>(bytes.size()));
  188. return bytes.size();
  189. }
  190. bool GzipCompressor::is_eof() const
  191. {
  192. return true;
  193. }
  194. bool GzipCompressor::is_open() const
  195. {
  196. return m_output_stream->is_open();
  197. }
  198. void GzipCompressor::close()
  199. {
  200. }
  201. ErrorOr<ByteBuffer> GzipCompressor::compress_all(ReadonlyBytes bytes)
  202. {
  203. auto output_stream = TRY(try_make<AllocatingMemoryStream>());
  204. GzipCompressor gzip_stream { MaybeOwned<Stream>(*output_stream) };
  205. TRY(gzip_stream.write_until_depleted(bytes));
  206. auto buffer = TRY(ByteBuffer::create_uninitialized(output_stream->used_buffer_size()));
  207. TRY(output_stream->read_until_filled(buffer.bytes()));
  208. return buffer;
  209. }
  210. ErrorOr<void> GzipCompressor::compress_file(StringView input_filename, NonnullOwnPtr<Stream> output_stream)
  211. {
  212. // We map the whole file instead of streaming to reduce size overhead (gzip header) and increase the deflate block size (better compression)
  213. // TODO: automatically fallback to buffered streaming for very large files
  214. OwnPtr<Core::MappedFile> file;
  215. ReadonlyBytes input_bytes;
  216. if (TRY(Core::System::stat(input_filename)).st_size > 0) {
  217. file = TRY(Core::MappedFile::map(input_filename));
  218. input_bytes = file->bytes();
  219. }
  220. auto output_bytes = TRY(Compress::GzipCompressor::compress_all(input_bytes));
  221. TRY(output_stream->write_until_depleted(output_bytes));
  222. return {};
  223. }
  224. }