Zip.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. /*
  2. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  3. * Copyright (c) 2022, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <LibArchive/Zip.h>
  8. #include <LibCompress/Deflate.h>
  9. #include <LibCrypto/Checksum/CRC32.h>
  10. namespace Archive {
  11. bool Zip::find_end_of_central_directory_offset(ReadonlyBytes buffer, size_t& offset)
  12. {
  13. for (size_t backwards_offset = 0; backwards_offset <= UINT16_MAX; backwards_offset++) // the file may have a trailing comment of an arbitrary 16 bit length
  14. {
  15. if (buffer.size() < (sizeof(EndOfCentralDirectory) - sizeof(u8*)) + backwards_offset)
  16. return false;
  17. auto const signature_offset = (buffer.size() - (sizeof(EndOfCentralDirectory) - sizeof(u8*)) - backwards_offset);
  18. if (auto signature = ReadonlyBytes { buffer.data() + signature_offset, EndOfCentralDirectory::signature.size() };
  19. signature == EndOfCentralDirectory::signature) {
  20. offset = signature_offset;
  21. return true;
  22. }
  23. }
  24. return false;
  25. }
  26. Optional<Zip> Zip::try_create(ReadonlyBytes buffer)
  27. {
  28. size_t end_of_central_directory_offset;
  29. if (!find_end_of_central_directory_offset(buffer, end_of_central_directory_offset))
  30. return {};
  31. EndOfCentralDirectory end_of_central_directory {};
  32. if (!end_of_central_directory.read(buffer.slice(end_of_central_directory_offset)))
  33. return {};
  34. if (end_of_central_directory.disk_number != 0 || end_of_central_directory.central_directory_start_disk != 0 || end_of_central_directory.disk_records_count != end_of_central_directory.total_records_count)
  35. return {}; // TODO: support multi-volume zip archives
  36. size_t member_offset = end_of_central_directory.central_directory_offset;
  37. for (size_t i = 0; i < end_of_central_directory.total_records_count; i++) {
  38. CentralDirectoryRecord central_directory_record {};
  39. if (member_offset > buffer.size())
  40. return {};
  41. if (!central_directory_record.read(buffer.slice(member_offset)))
  42. return {};
  43. if (central_directory_record.general_purpose_flags.encrypted)
  44. return {}; // TODO: support encrypted zip members
  45. if (central_directory_record.general_purpose_flags.data_descriptor)
  46. return {}; // TODO: support zip data descriptors
  47. if (central_directory_record.compression_method != ZipCompressionMethod::Store && central_directory_record.compression_method != ZipCompressionMethod::Deflate)
  48. return {}; // TODO: support obsolete zip compression methods
  49. if (central_directory_record.compression_method == ZipCompressionMethod::Store && central_directory_record.uncompressed_size != central_directory_record.compressed_size)
  50. return {};
  51. if (central_directory_record.start_disk != 0)
  52. return {}; // TODO: support multi-volume zip archives
  53. if (memchr(central_directory_record.name, 0, central_directory_record.name_length) != nullptr)
  54. return {};
  55. LocalFileHeader local_file_header {};
  56. if (central_directory_record.local_file_header_offset > buffer.size())
  57. return {};
  58. if (!local_file_header.read(buffer.slice(central_directory_record.local_file_header_offset)))
  59. return {};
  60. if (buffer.size() - (local_file_header.compressed_data - buffer.data()) < central_directory_record.compressed_size)
  61. return {};
  62. member_offset += central_directory_record.size();
  63. }
  64. return Zip {
  65. end_of_central_directory.total_records_count,
  66. end_of_central_directory.central_directory_offset,
  67. buffer,
  68. };
  69. }
  70. ErrorOr<bool> Zip::for_each_member(Function<ErrorOr<IterationDecision>(ZipMember const&)> callback) const
  71. {
  72. size_t member_offset = m_members_start_offset;
  73. for (size_t i = 0; i < m_member_count; i++) {
  74. CentralDirectoryRecord central_directory_record {};
  75. VERIFY(central_directory_record.read(m_input_data.slice(member_offset)));
  76. LocalFileHeader local_file_header {};
  77. VERIFY(local_file_header.read(m_input_data.slice(central_directory_record.local_file_header_offset)));
  78. ZipMember member;
  79. member.name = TRY(String::from_utf8({ central_directory_record.name, central_directory_record.name_length }));
  80. member.compressed_data = { local_file_header.compressed_data, central_directory_record.compressed_size };
  81. member.compression_method = central_directory_record.compression_method;
  82. member.uncompressed_size = central_directory_record.uncompressed_size;
  83. member.crc32 = central_directory_record.crc32;
  84. member.modification_time = central_directory_record.modification_time;
  85. member.modification_date = central_directory_record.modification_date;
  86. member.is_directory = central_directory_record.external_attributes & zip_directory_external_attribute || member.name.bytes_as_string_view().ends_with('/'); // FIXME: better directory detection
  87. if (TRY(callback(member)) == IterationDecision::Break)
  88. return false;
  89. member_offset += central_directory_record.size();
  90. }
  91. return true;
  92. }
  93. ErrorOr<Statistics> Zip::calculate_statistics() const
  94. {
  95. size_t file_count = 0;
  96. size_t directory_count = 0;
  97. size_t uncompressed_bytes = 0;
  98. TRY(for_each_member([&](auto zip_member) -> ErrorOr<IterationDecision> {
  99. if (zip_member.is_directory)
  100. directory_count++;
  101. else
  102. file_count++;
  103. uncompressed_bytes += zip_member.uncompressed_size;
  104. return IterationDecision::Continue;
  105. }));
  106. return Statistics(file_count, directory_count, uncompressed_bytes);
  107. }
  108. ZipOutputStream::ZipOutputStream(NonnullOwnPtr<Stream> stream)
  109. : m_stream(move(stream))
  110. {
  111. }
  112. static u16 minimum_version_needed(ZipCompressionMethod method)
  113. {
  114. // Deflate was added in PKZip 2.0
  115. return method == ZipCompressionMethod::Deflate ? 20 : 10;
  116. }
  117. ErrorOr<void> ZipOutputStream::add_member(ZipMember const& member)
  118. {
  119. VERIFY(!m_finished);
  120. VERIFY(member.name.bytes_as_string_view().length() <= UINT16_MAX);
  121. VERIFY(member.compressed_data.size() <= UINT32_MAX);
  122. TRY(m_members.try_append(member));
  123. LocalFileHeader local_file_header {
  124. .minimum_version = minimum_version_needed(member.compression_method),
  125. .general_purpose_flags = { .flags = 0 },
  126. .compression_method = static_cast<u16>(member.compression_method),
  127. .modification_time = member.modification_time,
  128. .modification_date = member.modification_date,
  129. .crc32 = member.crc32,
  130. .compressed_size = static_cast<u32>(member.compressed_data.size()),
  131. .uncompressed_size = member.uncompressed_size,
  132. .name_length = static_cast<u16>(member.name.bytes_as_string_view().length()),
  133. .extra_data_length = 0,
  134. .name = reinterpret_cast<u8 const*>(member.name.bytes_as_string_view().characters_without_null_termination()),
  135. .extra_data = nullptr,
  136. .compressed_data = member.compressed_data.data(),
  137. };
  138. return local_file_header.write(*m_stream);
  139. }
  140. ErrorOr<ZipOutputStream::MemberInformation> ZipOutputStream::add_member_from_stream(StringView path, Stream& stream, Optional<Core::DateTime> const& modification_time)
  141. {
  142. auto buffer = TRY(stream.read_until_eof());
  143. Archive::ZipMember member {};
  144. member.name = TRY(String::from_utf8(path));
  145. if (modification_time.has_value()) {
  146. member.modification_date = to_packed_dos_date(modification_time->year(), modification_time->month(), modification_time->day());
  147. member.modification_time = to_packed_dos_time(modification_time->hour(), modification_time->minute(), modification_time->second());
  148. }
  149. auto deflate_buffer = Compress::DeflateCompressor::compress_all(buffer);
  150. auto compression_ratio = 1.f;
  151. auto compressed_size = buffer.size();
  152. if (!deflate_buffer.is_error() && deflate_buffer.value().size() < buffer.size()) {
  153. member.compressed_data = deflate_buffer.value().bytes();
  154. member.compression_method = Archive::ZipCompressionMethod::Deflate;
  155. compression_ratio = static_cast<float>(deflate_buffer.value().size()) / static_cast<float>(buffer.size());
  156. compressed_size = member.compressed_data.size();
  157. } else {
  158. member.compressed_data = buffer.bytes();
  159. member.compression_method = Archive::ZipCompressionMethod::Store;
  160. }
  161. member.uncompressed_size = buffer.size();
  162. Crypto::Checksum::CRC32 checksum { buffer.bytes() };
  163. member.crc32 = checksum.digest();
  164. member.is_directory = false;
  165. TRY(add_member(member));
  166. return MemberInformation { compression_ratio, compressed_size };
  167. }
  168. ErrorOr<void> ZipOutputStream::add_directory(StringView name, Optional<Core::DateTime> const& modification_time)
  169. {
  170. Archive::ZipMember member {};
  171. member.name = TRY(String::from_utf8(name));
  172. member.compressed_data = {};
  173. member.compression_method = Archive::ZipCompressionMethod::Store;
  174. member.uncompressed_size = 0;
  175. member.crc32 = 0;
  176. member.is_directory = true;
  177. if (modification_time.has_value()) {
  178. member.modification_date = to_packed_dos_date(modification_time->year(), modification_time->month(), modification_time->day());
  179. member.modification_time = to_packed_dos_time(modification_time->hour(), modification_time->minute(), modification_time->second());
  180. }
  181. return add_member(member);
  182. }
  183. ErrorOr<void> ZipOutputStream::finish()
  184. {
  185. VERIFY(!m_finished);
  186. m_finished = true;
  187. auto file_header_offset = 0u;
  188. auto central_directory_size = 0u;
  189. for (ZipMember const& member : m_members) {
  190. auto zip_version = minimum_version_needed(member.compression_method);
  191. CentralDirectoryRecord central_directory_record {
  192. .made_by_version = zip_version,
  193. .minimum_version = zip_version,
  194. .general_purpose_flags = { .flags = 0 },
  195. .compression_method = member.compression_method,
  196. .modification_time = member.modification_time,
  197. .modification_date = member.modification_date,
  198. .crc32 = member.crc32,
  199. .compressed_size = static_cast<u32>(member.compressed_data.size()),
  200. .uncompressed_size = member.uncompressed_size,
  201. .name_length = static_cast<u16>(member.name.bytes_as_string_view().length()),
  202. .extra_data_length = 0,
  203. .comment_length = 0,
  204. .start_disk = 0,
  205. .internal_attributes = 0,
  206. .external_attributes = member.is_directory ? zip_directory_external_attribute : 0,
  207. .local_file_header_offset = file_header_offset, // FIXME: we assume the wrapped output stream was never written to before us
  208. .name = reinterpret_cast<u8 const*>(member.name.bytes_as_string_view().characters_without_null_termination()),
  209. .extra_data = nullptr,
  210. .comment = nullptr,
  211. };
  212. file_header_offset += sizeof(LocalFileHeader::signature) + (sizeof(LocalFileHeader) - (sizeof(u8*) * 3)) + member.name.bytes_as_string_view().length() + member.compressed_data.size();
  213. TRY(central_directory_record.write(*m_stream));
  214. central_directory_size += central_directory_record.size();
  215. }
  216. EndOfCentralDirectory end_of_central_directory {
  217. .disk_number = 0,
  218. .central_directory_start_disk = 0,
  219. .disk_records_count = static_cast<u16>(m_members.size()),
  220. .total_records_count = static_cast<u16>(m_members.size()),
  221. .central_directory_size = central_directory_size,
  222. .central_directory_offset = file_header_offset,
  223. .comment_length = 0,
  224. .comment = nullptr,
  225. };
  226. return end_of_central_directory.write(*m_stream);
  227. }
  228. }