unzip.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. /*
  2. * Copyright (c) 2020, Andrés Vieira <anvieiravazquez@gmail.com>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/MappedFile.h>
  27. #include <AK/NumberFormat.h>
  28. #include <LibCore/ArgsParser.h>
  29. #include <LibCore/File.h>
  30. #include <string.h>
  31. #include <sys/stat.h>
  32. static const u8 central_directory_file_header_sig[] = "\x50\x4b\x01\x02";
  33. bool seek_and_read(u8* buffer, const MappedFile& file, off_t seek_to, size_t bytes_to_read)
  34. {
  35. if (!buffer)
  36. return false;
  37. if ((size_t)seek_to >= file.size())
  38. return false;
  39. memcpy(buffer, (const char*)file.data() + seek_to, bytes_to_read);
  40. return true;
  41. }
  42. bool find_next_central_directory(off_t file_size, const MappedFile& file, off_t current_index, off_t& return_index)
  43. {
  44. off_t start_index = current_index == 0 ? current_index : current_index + 1;
  45. for (off_t index = start_index; index < file_size - 4; index++) {
  46. u8 buffer[4];
  47. if (!seek_and_read(buffer, file, index, 4))
  48. return false;
  49. if (!memcmp(buffer, central_directory_file_header_sig, 4)) {
  50. return_index = index;
  51. return true;
  52. }
  53. }
  54. return false;
  55. }
  56. bool unpack_file_for_central_directory_index(off_t central_directory_index, const MappedFile& file)
  57. {
  58. enum CentralFileDirectoryHeaderOffsets {
  59. CFDHCompressionMethodOffset = 10,
  60. CFDHLocalFileHeaderIndexOffset = 42,
  61. };
  62. enum LocalFileHeaderOffsets {
  63. LFHCompressionMethodOffset = 8,
  64. LFHCompressedSizeOffset = 18,
  65. LFHFileNameLengthOffset = 26,
  66. LFHExtraFieldLengthOffset = 28,
  67. LFHFileNameBaseOffset = 30,
  68. };
  69. enum CompressionMethod {
  70. None = 0,
  71. Shrunk = 1,
  72. Factor1 = 2,
  73. Factor2 = 3,
  74. Factor3 = 4,
  75. Factor4 = 5,
  76. Implode = 6,
  77. Deflate = 8,
  78. EnhancedDeflate = 9,
  79. PKWareDCLImplode = 10,
  80. BZIP2 = 12,
  81. LZMA = 14,
  82. TERSE = 18,
  83. LZ77 = 19,
  84. };
  85. u8 buffer[4];
  86. if (!seek_and_read(buffer, file, central_directory_index + CFDHLocalFileHeaderIndexOffset, 4))
  87. return false;
  88. off_t local_file_header_index = buffer[3] << 24 | buffer[2] << 16 | buffer[1] << 8 | buffer[0];
  89. if (!seek_and_read(buffer, file, local_file_header_index + LFHCompressionMethodOffset, 2))
  90. return false;
  91. auto compression_method = buffer[1] << 8 | buffer[0];
  92. // FIXME: Remove once any decompression is supported.
  93. ASSERT(compression_method == None);
  94. if (!seek_and_read(buffer, file, local_file_header_index + LFHCompressedSizeOffset, 4))
  95. return false;
  96. off_t compressed_file_size = buffer[3] << 24 | buffer[2] << 16 | buffer[1] << 8 | buffer[0];
  97. if (!seek_and_read(buffer, file, local_file_header_index + LFHFileNameLengthOffset, 2))
  98. return false;
  99. off_t file_name_length = buffer[1] << 8 | buffer[0];
  100. if (!seek_and_read(buffer, file, local_file_header_index + LFHExtraFieldLengthOffset, 2))
  101. return false;
  102. off_t extra_field_length = buffer[1] << 8 | buffer[0];
  103. if (!seek_and_read(buffer, file, local_file_header_index + LFHFileNameBaseOffset, file_name_length))
  104. return false;
  105. char file_name[file_name_length + 1];
  106. memcpy(file_name, buffer, file_name_length);
  107. file_name[file_name_length] = '\0';
  108. if (file_name[file_name_length - 1] == '/') {
  109. if (mkdir(file_name, 0755) < 0) {
  110. perror("mkdir");
  111. return false;
  112. }
  113. } else {
  114. auto new_file = Core::File::construct(String { file_name });
  115. if (!new_file->open(Core::IODevice::WriteOnly)) {
  116. fprintf(stderr, "Can't write file %s: %s\n", file_name, new_file->error_string());
  117. return false;
  118. }
  119. printf(" extracting: %s\n", file_name);
  120. u8 raw_file_contents[compressed_file_size];
  121. if (!seek_and_read(raw_file_contents, file, local_file_header_index + LFHFileNameBaseOffset + file_name_length + extra_field_length, compressed_file_size))
  122. return false;
  123. // FIXME: Try to uncompress data here. We're just ignoring it as no decompression methods are implemented yet.
  124. if (!new_file->write(raw_file_contents, compressed_file_size)) {
  125. fprintf(stderr, "Can't write file contents in %s: %s\n", file_name, new_file->error_string());
  126. return false;
  127. }
  128. if (!new_file->close()) {
  129. fprintf(stderr, "Can't close file %s: %s\n", file_name, new_file->error_string());
  130. return false;
  131. }
  132. }
  133. return true;
  134. }
  135. int main(int argc, char** argv)
  136. {
  137. const char* path;
  138. int map_size_limit = 32 * MB;
  139. Core::ArgsParser args_parser;
  140. args_parser.add_option(map_size_limit, "Maximum chunk size to map", "map-size-limit", 0, "size");
  141. args_parser.add_positional_argument(path, "File to unzip", "path", Core::ArgsParser::Required::Yes);
  142. args_parser.parse(argc, argv);
  143. String zip_file_path { path };
  144. struct stat st;
  145. int rc = stat(zip_file_path.characters(), &st);
  146. if (rc < 0) {
  147. perror("stat");
  148. return 1;
  149. }
  150. // FIXME: Map file chunk-by-chunk once we have mmap() with offset.
  151. // This will require mapping some parts then unmapping them repeatedly,
  152. // but it would be significantly faster and less syscall heavy than seek()/read() at every read.
  153. if (st.st_size >= map_size_limit) {
  154. fprintf(stderr, "unzip warning: Refusing to map file since it is larger than %s, pass '--map-size-limit %d' to get around this\n",
  155. human_readable_size(map_size_limit).characters(),
  156. round_up_to_power_of_two(st.st_size, 16));
  157. return 1;
  158. }
  159. MappedFile mapped_file { zip_file_path };
  160. if (!mapped_file.is_valid())
  161. return 1;
  162. printf("Archive: %s\n", zip_file_path.characters());
  163. off_t index = 0;
  164. while (find_next_central_directory(st.st_size, mapped_file, index, index)) {
  165. bool success = unpack_file_for_central_directory_index(index, mapped_file);
  166. if (!success) {
  167. printf("Could not find local file header for a file.\n");
  168. return 4;
  169. }
  170. }
  171. return 0;
  172. }