Deflate.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074
  1. /*
  2. * Copyright (c) 2020, the SerenityOS developers.
  3. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Array.h>
  8. #include <AK/Assertions.h>
  9. #include <AK/BinaryHeap.h>
  10. #include <AK/BinarySearch.h>
  11. #include <AK/MemoryStream.h>
  12. #include <string.h>
  13. #include <LibCompress/Deflate.h>
  14. namespace Compress {
  15. static constexpr u8 deflate_special_code_length_copy = 16;
  16. static constexpr u8 deflate_special_code_length_zeros = 17;
  17. static constexpr u8 deflate_special_code_length_long_zeros = 18;
  18. CanonicalCode const& CanonicalCode::fixed_literal_codes()
  19. {
  20. static CanonicalCode code;
  21. static bool initialized = false;
  22. if (initialized)
  23. return code;
  24. code = CanonicalCode::from_bytes(fixed_literal_bit_lengths).value();
  25. initialized = true;
  26. return code;
  27. }
  28. CanonicalCode const& CanonicalCode::fixed_distance_codes()
  29. {
  30. static CanonicalCode code;
  31. static bool initialized = false;
  32. if (initialized)
  33. return code;
  34. code = CanonicalCode::from_bytes(fixed_distance_bit_lengths).value();
  35. initialized = true;
  36. return code;
  37. }
  38. Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
  39. {
  40. // FIXME: I can't quite follow the algorithm here, but it seems to work.
  41. CanonicalCode code;
  42. auto non_zero_symbols = 0;
  43. auto last_non_zero = -1;
  44. for (size_t i = 0; i < bytes.size(); i++) {
  45. if (bytes[i] != 0) {
  46. non_zero_symbols++;
  47. last_non_zero = i;
  48. }
  49. }
  50. if (non_zero_symbols == 1) { // special case - only 1 symbol
  51. code.m_symbol_codes.append(0b10);
  52. code.m_symbol_values.append(last_non_zero);
  53. code.m_bit_codes[last_non_zero] = 0;
  54. code.m_bit_code_lengths[last_non_zero] = 1;
  55. return code;
  56. }
  57. auto next_code = 0;
  58. for (size_t code_length = 1; code_length <= 15; ++code_length) {
  59. next_code <<= 1;
  60. auto start_bit = 1 << code_length;
  61. for (size_t symbol = 0; symbol < bytes.size(); ++symbol) {
  62. if (bytes[symbol] != code_length)
  63. continue;
  64. if (next_code > start_bit)
  65. return {};
  66. code.m_symbol_codes.append(start_bit | next_code);
  67. code.m_symbol_values.append(symbol);
  68. code.m_bit_codes[symbol] = fast_reverse16(start_bit | next_code, code_length); // DEFLATE writes huffman encoded symbols as lsb-first
  69. code.m_bit_code_lengths[symbol] = code_length;
  70. next_code++;
  71. }
  72. }
  73. if (next_code != (1 << 15)) {
  74. return {};
  75. }
  76. return code;
  77. }
  78. u32 CanonicalCode::read_symbol(InputBitStream& stream) const
  79. {
  80. u32 code_bits = 1;
  81. for (;;) {
  82. code_bits = code_bits << 1 | stream.read_bits(1);
  83. if (code_bits >= (1 << 16))
  84. return UINT32_MAX; // the maximum symbol in deflate is 288, so we use UINT32_MAX (an impossible value) to indicate an error
  85. // FIXME: This is very inefficient and could greatly be improved by implementing this
  86. // algorithm: https://www.hanshq.net/zip.html#huffdec
  87. size_t index;
  88. if (binary_search(m_symbol_codes.span(), code_bits, &index))
  89. return m_symbol_values[index];
  90. }
  91. }
  92. void CanonicalCode::write_symbol(OutputBitStream& stream, u32 symbol) const
  93. {
  94. stream.write_bits(m_bit_codes[symbol], m_bit_code_lengths[symbol]);
  95. }
  96. DeflateDecompressor::CompressedBlock::CompressedBlock(DeflateDecompressor& decompressor, CanonicalCode literal_codes, Optional<CanonicalCode> distance_codes)
  97. : m_decompressor(decompressor)
  98. , m_literal_codes(literal_codes)
  99. , m_distance_codes(distance_codes)
  100. {
  101. }
  102. bool DeflateDecompressor::CompressedBlock::try_read_more()
  103. {
  104. if (m_eof == true)
  105. return false;
  106. auto const symbol = m_literal_codes.read_symbol(m_decompressor.m_input_stream);
  107. if (symbol >= 286) { // invalid deflate literal/length symbol
  108. m_decompressor.set_fatal_error();
  109. return false;
  110. }
  111. if (symbol < 256) {
  112. m_decompressor.m_output_stream << static_cast<u8>(symbol);
  113. return true;
  114. } else if (symbol == 256) {
  115. m_eof = true;
  116. return false;
  117. } else {
  118. if (!m_distance_codes.has_value()) {
  119. m_decompressor.set_fatal_error();
  120. return false;
  121. }
  122. auto const length = m_decompressor.decode_length(symbol);
  123. auto const distance_symbol = m_distance_codes.value().read_symbol(m_decompressor.m_input_stream);
  124. if (distance_symbol >= 30) { // invalid deflate distance symbol
  125. m_decompressor.set_fatal_error();
  126. return false;
  127. }
  128. auto const distance = m_decompressor.decode_distance(distance_symbol);
  129. for (size_t idx = 0; idx < length; ++idx) {
  130. u8 byte = 0;
  131. m_decompressor.m_output_stream.read({ &byte, sizeof(byte) }, distance);
  132. if (m_decompressor.m_output_stream.handle_any_error()) {
  133. m_decompressor.set_fatal_error();
  134. return false; // a back reference was requested that was too far back (outside our current sliding window)
  135. }
  136. m_decompressor.m_output_stream << byte;
  137. }
  138. return true;
  139. }
  140. }
  141. DeflateDecompressor::UncompressedBlock::UncompressedBlock(DeflateDecompressor& decompressor, size_t length)
  142. : m_decompressor(decompressor)
  143. , m_bytes_remaining(length)
  144. {
  145. }
  146. bool DeflateDecompressor::UncompressedBlock::try_read_more()
  147. {
  148. if (m_bytes_remaining == 0)
  149. return false;
  150. auto const nread = min(m_bytes_remaining, m_decompressor.m_output_stream.remaining_contiguous_space());
  151. m_bytes_remaining -= nread;
  152. m_decompressor.m_input_stream >> m_decompressor.m_output_stream.reserve_contiguous_space(nread);
  153. return true;
  154. }
  155. DeflateDecompressor::DeflateDecompressor(InputStream& stream)
  156. : m_input_stream(stream)
  157. {
  158. }
  159. DeflateDecompressor::~DeflateDecompressor()
  160. {
  161. if (m_state == State::ReadingCompressedBlock)
  162. m_compressed_block.~CompressedBlock();
  163. if (m_state == State::ReadingUncompressedBlock)
  164. m_uncompressed_block.~UncompressedBlock();
  165. }
  166. size_t DeflateDecompressor::read(Bytes bytes)
  167. {
  168. size_t total_read = 0;
  169. while (total_read < bytes.size()) {
  170. if (has_any_error())
  171. break;
  172. auto slice = bytes.slice(total_read);
  173. if (m_state == State::Idle) {
  174. if (m_read_final_bock)
  175. break;
  176. m_read_final_bock = m_input_stream.read_bit();
  177. auto const block_type = m_input_stream.read_bits(2);
  178. if (m_input_stream.has_any_error()) {
  179. set_fatal_error();
  180. break;
  181. }
  182. if (block_type == 0b00) {
  183. m_input_stream.align_to_byte_boundary();
  184. LittleEndian<u16> length, negated_length;
  185. m_input_stream >> length >> negated_length;
  186. if (m_input_stream.has_any_error()) {
  187. set_fatal_error();
  188. break;
  189. }
  190. if ((length ^ 0xffff) != negated_length) {
  191. set_fatal_error();
  192. break;
  193. }
  194. m_state = State::ReadingUncompressedBlock;
  195. new (&m_uncompressed_block) UncompressedBlock(*this, length);
  196. continue;
  197. }
  198. if (block_type == 0b01) {
  199. m_state = State::ReadingCompressedBlock;
  200. new (&m_compressed_block) CompressedBlock(*this, CanonicalCode::fixed_literal_codes(), CanonicalCode::fixed_distance_codes());
  201. continue;
  202. }
  203. if (block_type == 0b10) {
  204. CanonicalCode literal_codes;
  205. Optional<CanonicalCode> distance_codes;
  206. decode_codes(literal_codes, distance_codes);
  207. if (m_input_stream.has_any_error()) {
  208. set_fatal_error();
  209. break;
  210. }
  211. m_state = State::ReadingCompressedBlock;
  212. new (&m_compressed_block) CompressedBlock(*this, literal_codes, distance_codes);
  213. continue;
  214. }
  215. set_fatal_error();
  216. break;
  217. }
  218. if (m_state == State::ReadingCompressedBlock) {
  219. auto nread = m_output_stream.read(slice);
  220. while (nread < slice.size() && m_compressed_block.try_read_more()) {
  221. nread += m_output_stream.read(slice.slice(nread));
  222. }
  223. if (m_input_stream.has_any_error()) {
  224. set_fatal_error();
  225. break;
  226. }
  227. total_read += nread;
  228. if (nread == slice.size())
  229. break;
  230. m_compressed_block.~CompressedBlock();
  231. m_state = State::Idle;
  232. continue;
  233. }
  234. if (m_state == State::ReadingUncompressedBlock) {
  235. auto nread = m_output_stream.read(slice);
  236. while (nread < slice.size() && m_uncompressed_block.try_read_more()) {
  237. nread += m_output_stream.read(slice.slice(nread));
  238. }
  239. if (m_input_stream.has_any_error()) {
  240. set_fatal_error();
  241. break;
  242. }
  243. total_read += nread;
  244. if (nread == slice.size())
  245. break;
  246. m_uncompressed_block.~UncompressedBlock();
  247. m_state = State::Idle;
  248. continue;
  249. }
  250. VERIFY_NOT_REACHED();
  251. }
  252. return total_read;
  253. }
  254. bool DeflateDecompressor::read_or_error(Bytes bytes)
  255. {
  256. if (read(bytes) < bytes.size()) {
  257. set_fatal_error();
  258. return false;
  259. }
  260. return true;
  261. }
  262. bool DeflateDecompressor::discard_or_error(size_t count)
  263. {
  264. u8 buffer[4096];
  265. size_t ndiscarded = 0;
  266. while (ndiscarded < count) {
  267. if (unreliable_eof()) {
  268. set_fatal_error();
  269. return false;
  270. }
  271. ndiscarded += read({ buffer, min<size_t>(count - ndiscarded, 4096) });
  272. }
  273. return true;
  274. }
  275. bool DeflateDecompressor::unreliable_eof() const { return m_state == State::Idle && m_read_final_bock; }
  276. bool DeflateDecompressor::handle_any_error()
  277. {
  278. bool handled_errors = m_input_stream.handle_any_error();
  279. return Stream::handle_any_error() || handled_errors;
  280. }
  281. Optional<ByteBuffer> DeflateDecompressor::decompress_all(ReadonlyBytes bytes)
  282. {
  283. InputMemoryStream memory_stream { bytes };
  284. DeflateDecompressor deflate_stream { memory_stream };
  285. DuplexMemoryStream output_stream;
  286. u8 buffer[4096];
  287. while (!deflate_stream.has_any_error() && !deflate_stream.unreliable_eof()) {
  288. auto const nread = deflate_stream.read({ buffer, sizeof(buffer) });
  289. output_stream.write_or_error({ buffer, nread });
  290. }
  291. if (deflate_stream.handle_any_error())
  292. return {};
  293. return output_stream.copy_into_contiguous_buffer();
  294. }
  295. u32 DeflateDecompressor::decode_length(u32 symbol)
  296. {
  297. // FIXME: I can't quite follow the algorithm here, but it seems to work.
  298. if (symbol <= 264)
  299. return symbol - 254;
  300. if (symbol <= 284) {
  301. auto extra_bits = (symbol - 261) / 4;
  302. return (((symbol - 265) % 4 + 4) << extra_bits) + 3 + m_input_stream.read_bits(extra_bits);
  303. }
  304. if (symbol == 285)
  305. return 258;
  306. VERIFY_NOT_REACHED();
  307. }
  308. u32 DeflateDecompressor::decode_distance(u32 symbol)
  309. {
  310. // FIXME: I can't quite follow the algorithm here, but it seems to work.
  311. if (symbol <= 3)
  312. return symbol + 1;
  313. if (symbol <= 29) {
  314. auto extra_bits = (symbol / 2) - 1;
  315. return ((symbol % 2 + 2) << extra_bits) + 1 + m_input_stream.read_bits(extra_bits);
  316. }
  317. VERIFY_NOT_REACHED();
  318. }
  319. void DeflateDecompressor::decode_codes(CanonicalCode& literal_code, Optional<CanonicalCode>& distance_code)
  320. {
  321. auto literal_code_count = m_input_stream.read_bits(5) + 257;
  322. auto distance_code_count = m_input_stream.read_bits(5) + 1;
  323. auto code_length_count = m_input_stream.read_bits(4) + 4;
  324. // First we have to extract the code lengths of the code that was used to encode the code lengths of
  325. // the code that was used to encode the block.
  326. u8 code_lengths_code_lengths[19] = { 0 };
  327. for (size_t i = 0; i < code_length_count; ++i) {
  328. code_lengths_code_lengths[code_lengths_code_lengths_order[i]] = m_input_stream.read_bits(3);
  329. }
  330. // Now we can extract the code that was used to encode the code lengths of the code that was used to
  331. // encode the block.
  332. auto code_length_code_result = CanonicalCode::from_bytes({ code_lengths_code_lengths, sizeof(code_lengths_code_lengths) });
  333. if (!code_length_code_result.has_value()) {
  334. set_fatal_error();
  335. return;
  336. }
  337. auto const code_length_code = code_length_code_result.value();
  338. // Next we extract the code lengths of the code that was used to encode the block.
  339. Vector<u8> code_lengths;
  340. while (code_lengths.size() < literal_code_count + distance_code_count) {
  341. auto symbol = code_length_code.read_symbol(m_input_stream);
  342. if (symbol == UINT32_MAX) {
  343. set_fatal_error();
  344. return;
  345. }
  346. if (symbol < deflate_special_code_length_copy) {
  347. code_lengths.append(static_cast<u8>(symbol));
  348. continue;
  349. } else if (symbol == deflate_special_code_length_zeros) {
  350. auto nrepeat = 3 + m_input_stream.read_bits(3);
  351. for (size_t j = 0; j < nrepeat; ++j)
  352. code_lengths.append(0);
  353. continue;
  354. } else if (symbol == deflate_special_code_length_long_zeros) {
  355. auto nrepeat = 11 + m_input_stream.read_bits(7);
  356. for (size_t j = 0; j < nrepeat; ++j)
  357. code_lengths.append(0);
  358. continue;
  359. } else {
  360. VERIFY(symbol == deflate_special_code_length_copy);
  361. if (code_lengths.is_empty()) {
  362. set_fatal_error();
  363. return;
  364. }
  365. auto nrepeat = 3 + m_input_stream.read_bits(2);
  366. for (size_t j = 0; j < nrepeat; ++j)
  367. code_lengths.append(code_lengths.last());
  368. }
  369. }
  370. if (code_lengths.size() != literal_code_count + distance_code_count) {
  371. set_fatal_error();
  372. return;
  373. }
  374. // Now we extract the code that was used to encode literals and lengths in the block.
  375. auto literal_code_result = CanonicalCode::from_bytes(code_lengths.span().trim(literal_code_count));
  376. if (!literal_code_result.has_value()) {
  377. set_fatal_error();
  378. return;
  379. }
  380. literal_code = literal_code_result.value();
  381. // Now we extract the code that was used to encode distances in the block.
  382. if (distance_code_count == 1) {
  383. auto length = code_lengths[literal_code_count];
  384. if (length == 0) {
  385. return;
  386. } else if (length != 1) {
  387. set_fatal_error();
  388. return;
  389. }
  390. }
  391. auto distance_code_result = CanonicalCode::from_bytes(code_lengths.span().slice(literal_code_count));
  392. if (!distance_code_result.has_value()) {
  393. set_fatal_error();
  394. return;
  395. }
  396. distance_code = distance_code_result.value();
  397. }
  398. DeflateCompressor::DeflateCompressor(OutputStream& stream, CompressionLevel compression_level)
  399. : m_compression_level(compression_level)
  400. , m_compression_constants(compression_constants[static_cast<int>(m_compression_level)])
  401. , m_output_stream(stream)
  402. {
  403. m_symbol_frequencies.fill(0);
  404. m_distance_frequencies.fill(0);
  405. }
  406. DeflateCompressor::~DeflateCompressor()
  407. {
  408. VERIFY(m_finished);
  409. }
  410. size_t DeflateCompressor::write(ReadonlyBytes bytes)
  411. {
  412. VERIFY(!m_finished);
  413. if (bytes.size() == 0)
  414. return 0; // recursion base case
  415. auto n_written = bytes.copy_trimmed_to(pending_block().slice(m_pending_block_size));
  416. m_pending_block_size += n_written;
  417. if (m_pending_block_size == block_size)
  418. flush();
  419. return n_written + write(bytes.slice(n_written));
  420. }
  421. bool DeflateCompressor::write_or_error(ReadonlyBytes bytes)
  422. {
  423. if (write(bytes) < bytes.size()) {
  424. set_fatal_error();
  425. return false;
  426. }
  427. return true;
  428. }
  429. // Knuth's multiplicative hash on 4 bytes
  430. u16 DeflateCompressor::hash_sequence(u8 const* bytes)
  431. {
  432. constexpr const u32 knuth_constant = 2654435761; // shares no common factors with 2^32
  433. return ((bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24) * knuth_constant) >> (32 - hash_bits);
  434. }
  435. size_t DeflateCompressor::compare_match_candidate(size_t start, size_t candidate, size_t previous_match_length, size_t maximum_match_length)
  436. {
  437. VERIFY(previous_match_length < maximum_match_length);
  438. // We firstly check that the match is at least (prev_match_length + 1) long, we check backwards as there's a higher chance the end mismatches
  439. for (ssize_t i = previous_match_length; i >= 0; i--) {
  440. if (m_rolling_window[start + i] != m_rolling_window[candidate + i])
  441. return 0;
  442. }
  443. // Find the actual length
  444. auto match_length = previous_match_length + 1;
  445. while (match_length < maximum_match_length && m_rolling_window[start + match_length] == m_rolling_window[candidate + match_length]) {
  446. match_length++;
  447. }
  448. VERIFY(match_length > previous_match_length);
  449. VERIFY(match_length <= maximum_match_length);
  450. return match_length;
  451. }
  452. size_t DeflateCompressor::find_back_match(size_t start, u16 hash, size_t previous_match_length, size_t maximum_match_length, size_t& match_position)
  453. {
  454. auto max_chain_length = m_compression_constants.max_chain;
  455. if (previous_match_length == 0)
  456. previous_match_length = min_match_length - 1; // we only care about matches that are at least min_match_length long
  457. if (previous_match_length >= maximum_match_length)
  458. return 0; // we can't improve a maximum length match
  459. if (previous_match_length >= m_compression_constants.max_lazy_length)
  460. return 0; // the previous match is already pretty, we shouldn't waste another full search
  461. if (previous_match_length >= m_compression_constants.good_match_length)
  462. max_chain_length /= 4; // we already have a pretty good much, so do a shorter search
  463. auto candidate = m_hash_head[hash];
  464. auto match_found = false;
  465. while (max_chain_length--) {
  466. if (candidate == empty_slot)
  467. break; // no remaining candidates
  468. VERIFY(candidate < start);
  469. if (start - candidate > window_size)
  470. break; // outside the window
  471. auto match_length = compare_match_candidate(start, candidate, previous_match_length, maximum_match_length);
  472. if (match_length != 0) {
  473. match_found = true;
  474. match_position = candidate;
  475. previous_match_length = match_length;
  476. if (match_length == maximum_match_length)
  477. return match_length; // bail if we got the maximum possible length
  478. }
  479. candidate = m_hash_prev[candidate % window_size];
  480. }
  481. if (!match_found)
  482. return 0; // we didn't find any matches
  483. return previous_match_length; // we found matches, but they were at most previous_match_length long
  484. }
  485. ALWAYS_INLINE u8 DeflateCompressor::distance_to_base(u16 distance)
  486. {
  487. return (distance <= 256) ? distance_to_base_lo[distance - 1] : distance_to_base_hi[(distance - 1) >> 7];
  488. }
  489. template<size_t Size>
  490. void DeflateCompressor::generate_huffman_lengths(Array<u8, Size>& lengths, Array<u16, Size> const& frequencies, size_t max_bit_length, u16 frequency_cap)
  491. {
  492. VERIFY((1u << max_bit_length) >= Size);
  493. u16 heap_keys[Size]; // Used for O(n) heap construction
  494. u16 heap_values[Size];
  495. u16 huffman_links[Size * 2 + 1] = { 0 };
  496. size_t non_zero_freqs = 0;
  497. for (size_t i = 0; i < Size; i++) {
  498. auto frequency = frequencies[i];
  499. if (frequency == 0)
  500. continue;
  501. if (frequency > frequency_cap) {
  502. frequency = frequency_cap;
  503. }
  504. heap_keys[non_zero_freqs] = frequency; // sort symbols by frequency
  505. heap_values[non_zero_freqs] = Size + non_zero_freqs; // huffman_links "links"
  506. non_zero_freqs++;
  507. }
  508. // special case for only 1 used symbol
  509. if (non_zero_freqs < 2) {
  510. for (size_t i = 0; i < Size; i++)
  511. lengths[i] = (frequencies[i] == 0) ? 0 : 1;
  512. return;
  513. }
  514. BinaryHeap<u16, u16, Size> heap { heap_keys, heap_values, non_zero_freqs };
  515. // build the huffman tree - binary heap is used for efficient frequency comparisons
  516. while (heap.size() > 1) {
  517. u16 lowest_frequency = heap.peek_min_key();
  518. u16 lowest_link = heap.pop_min();
  519. u16 second_lowest_frequency = heap.peek_min_key();
  520. u16 second_lowest_link = heap.pop_min();
  521. u16 new_link = heap.size() + 2;
  522. heap.insert(lowest_frequency + second_lowest_frequency, new_link);
  523. huffman_links[lowest_link] = new_link;
  524. huffman_links[second_lowest_link] = new_link;
  525. }
  526. non_zero_freqs = 0;
  527. for (size_t i = 0; i < Size; i++) {
  528. if (frequencies[i] == 0) {
  529. lengths[i] = 0;
  530. continue;
  531. }
  532. u16 link = huffman_links[Size + non_zero_freqs];
  533. non_zero_freqs++;
  534. size_t bit_length = 1;
  535. while (link != 2) {
  536. bit_length++;
  537. link = huffman_links[link];
  538. }
  539. if (bit_length > max_bit_length) {
  540. VERIFY(frequency_cap != 1);
  541. return generate_huffman_lengths(lengths, frequencies, max_bit_length, frequency_cap / 2);
  542. }
  543. lengths[i] = bit_length;
  544. }
  545. }
  546. void DeflateCompressor::lz77_compress_block()
  547. {
  548. for (auto& slot : m_hash_head) { // initialize chained hash table
  549. slot = empty_slot;
  550. }
  551. auto insert_hash = [&](auto pos, auto hash) {
  552. auto window_pos = pos % window_size;
  553. m_hash_prev[window_pos] = m_hash_head[hash];
  554. m_hash_head[hash] = window_pos;
  555. };
  556. auto emit_literal = [&](auto literal) {
  557. VERIFY(m_pending_symbol_size <= block_size + 1);
  558. auto index = m_pending_symbol_size++;
  559. m_symbol_buffer[index].distance = 0;
  560. m_symbol_buffer[index].literal = literal;
  561. m_symbol_frequencies[literal]++;
  562. };
  563. auto emit_back_reference = [&](auto distance, auto length) {
  564. VERIFY(m_pending_symbol_size <= block_size + 1);
  565. auto index = m_pending_symbol_size++;
  566. m_symbol_buffer[index].distance = distance;
  567. m_symbol_buffer[index].length = length;
  568. m_symbol_frequencies[length_to_symbol[length]]++;
  569. m_distance_frequencies[distance_to_base(distance)]++;
  570. };
  571. size_t previous_match_length = 0;
  572. size_t previous_match_position = 0;
  573. VERIFY(m_compression_constants.great_match_length <= max_match_length);
  574. // our block starts at block_size and is m_pending_block_size in length
  575. auto block_end = block_size + m_pending_block_size;
  576. size_t current_position;
  577. for (current_position = block_size; current_position < block_end - min_match_length + 1; current_position++) {
  578. auto hash = hash_sequence(&m_rolling_window[current_position]);
  579. size_t match_position;
  580. auto match_length = find_back_match(current_position, hash, previous_match_length,
  581. min(m_compression_constants.great_match_length, block_end - current_position), match_position);
  582. insert_hash(current_position, hash);
  583. // if the previous match is as good as the new match, just use it
  584. if (previous_match_length != 0 && previous_match_length >= match_length) {
  585. emit_back_reference((current_position - 1) - previous_match_position, previous_match_length);
  586. // skip all the bytes that are included in this match
  587. for (size_t j = current_position + 1; j < min(current_position - 1 + previous_match_length, block_end - min_match_length + 1); j++) {
  588. insert_hash(j, hash_sequence(&m_rolling_window[j]));
  589. }
  590. current_position = (current_position - 1) + previous_match_length - 1;
  591. previous_match_length = 0;
  592. continue;
  593. }
  594. if (match_length == 0) {
  595. VERIFY(previous_match_length == 0);
  596. emit_literal(m_rolling_window[current_position]);
  597. continue;
  598. }
  599. // if this is a lazy match, and the new match is better than the old one, output previous as literal
  600. if (previous_match_length != 0) {
  601. emit_literal(m_rolling_window[current_position - 1]);
  602. }
  603. previous_match_length = match_length;
  604. previous_match_position = match_position;
  605. }
  606. // clean up leftover lazy match
  607. if (previous_match_length != 0) {
  608. emit_back_reference((current_position - 1) - previous_match_position, previous_match_length);
  609. current_position = (current_position - 1) + previous_match_length;
  610. }
  611. // output remaining literals
  612. while (current_position < block_end) {
  613. emit_literal(m_rolling_window[current_position++]);
  614. }
  615. }
  616. size_t DeflateCompressor::huffman_block_length(Array<u8, max_huffman_literals> const& literal_bit_lengths, Array<u8, max_huffman_distances> const& distance_bit_lengths)
  617. {
  618. size_t length = 0;
  619. for (size_t i = 0; i < 286; i++) {
  620. auto frequency = m_symbol_frequencies[i];
  621. length += literal_bit_lengths[i] * frequency;
  622. if (i >= 257) // back reference length symbols
  623. length += packed_length_symbols[i - 257].extra_bits * frequency;
  624. }
  625. for (size_t i = 0; i < 30; i++) {
  626. auto frequency = m_distance_frequencies[i];
  627. length += distance_bit_lengths[i] * frequency;
  628. length += packed_distances[i].extra_bits * frequency;
  629. }
  630. return length;
  631. }
  632. size_t DeflateCompressor::uncompressed_block_length()
  633. {
  634. auto padding = 8 - ((m_output_stream.bit_offset() + 3) % 8);
  635. // 3 bit block header + align to byte + 2 * 16 bit length fields + block contents
  636. return 3 + padding + (2 * 16) + m_pending_block_size * 8;
  637. }
  638. size_t DeflateCompressor::fixed_block_length()
  639. {
  640. // block header + fixed huffman encoded block contents
  641. return 3 + huffman_block_length(fixed_literal_bit_lengths, fixed_distance_bit_lengths);
  642. }
  643. size_t DeflateCompressor::dynamic_block_length(Array<u8, max_huffman_literals> const& literal_bit_lengths, Array<u8, max_huffman_distances> const& distance_bit_lengths, Array<u8, 19> const& code_lengths_bit_lengths, Array<u16, 19> const& code_lengths_frequencies, size_t code_lengths_count)
  644. {
  645. // block header + literal code count + distance code count + code length count
  646. auto length = 3 + 5 + 5 + 4;
  647. // 3 bits per code_length
  648. length += 3 * code_lengths_count;
  649. for (size_t i = 0; i < code_lengths_frequencies.size(); i++) {
  650. auto frequency = code_lengths_frequencies[i];
  651. length += code_lengths_bit_lengths[i] * frequency;
  652. if (i == deflate_special_code_length_copy) {
  653. length += 2 * frequency;
  654. } else if (i == deflate_special_code_length_zeros) {
  655. length += 3 * frequency;
  656. } else if (i == deflate_special_code_length_long_zeros) {
  657. length += 7 * frequency;
  658. }
  659. }
  660. return length + huffman_block_length(literal_bit_lengths, distance_bit_lengths);
  661. }
  662. void DeflateCompressor::write_huffman(CanonicalCode const& literal_code, Optional<CanonicalCode> const& distance_code)
  663. {
  664. auto has_distances = distance_code.has_value();
  665. for (size_t i = 0; i < m_pending_symbol_size; i++) {
  666. if (m_symbol_buffer[i].distance == 0) {
  667. literal_code.write_symbol(m_output_stream, m_symbol_buffer[i].literal);
  668. continue;
  669. }
  670. VERIFY(has_distances);
  671. auto symbol = length_to_symbol[m_symbol_buffer[i].length];
  672. literal_code.write_symbol(m_output_stream, symbol);
  673. // Emit extra bits if needed
  674. m_output_stream.write_bits(m_symbol_buffer[i].length - packed_length_symbols[symbol - 257].base_length, packed_length_symbols[symbol - 257].extra_bits);
  675. auto base_distance = distance_to_base(m_symbol_buffer[i].distance);
  676. distance_code.value().write_symbol(m_output_stream, base_distance);
  677. // Emit extra bits if needed
  678. m_output_stream.write_bits(m_symbol_buffer[i].distance - packed_distances[base_distance].base_distance, packed_distances[base_distance].extra_bits);
  679. }
  680. }
  681. size_t DeflateCompressor::encode_huffman_lengths(Array<u8, max_huffman_literals + max_huffman_distances> const& lengths, size_t lengths_count, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths)
  682. {
  683. size_t encoded_count = 0;
  684. size_t i = 0;
  685. while (i < lengths_count) {
  686. if (lengths[i] == 0) {
  687. auto zero_count = 0;
  688. for (size_t j = i; j < min(lengths_count, i + 138) && lengths[j] == 0; j++)
  689. zero_count++;
  690. if (zero_count < 3) { // below minimum repeated zero count
  691. encoded_lengths[encoded_count++].symbol = 0;
  692. i++;
  693. continue;
  694. }
  695. if (zero_count <= 10) {
  696. encoded_lengths[encoded_count].symbol = deflate_special_code_length_zeros;
  697. encoded_lengths[encoded_count++].count = zero_count;
  698. } else {
  699. encoded_lengths[encoded_count].symbol = deflate_special_code_length_long_zeros;
  700. encoded_lengths[encoded_count++].count = zero_count;
  701. }
  702. i += zero_count;
  703. continue;
  704. }
  705. encoded_lengths[encoded_count++].symbol = lengths[i++];
  706. auto copy_count = 0;
  707. for (size_t j = i; j < min(lengths_count, i + 6) && lengths[j] == lengths[i - 1]; j++)
  708. copy_count++;
  709. if (copy_count >= 3) {
  710. encoded_lengths[encoded_count].symbol = deflate_special_code_length_copy;
  711. encoded_lengths[encoded_count++].count = copy_count;
  712. i += copy_count;
  713. continue;
  714. }
  715. }
  716. return encoded_count;
  717. }
  718. size_t DeflateCompressor::encode_block_lengths(Array<u8, max_huffman_literals> const& literal_bit_lengths, Array<u8, max_huffman_distances> const& distance_bit_lengths, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count)
  719. {
  720. literal_code_count = max_huffman_literals;
  721. distance_code_count = max_huffman_distances;
  722. VERIFY(literal_bit_lengths[256] != 0); // Make sure at least the EndOfBlock marker is present
  723. while (literal_bit_lengths[literal_code_count - 1] == 0)
  724. literal_code_count--;
  725. // Drop trailing zero lengths, keeping at least one
  726. while (distance_bit_lengths[distance_code_count - 1] == 0 && distance_code_count > 1)
  727. distance_code_count--;
  728. Array<u8, max_huffman_literals + max_huffman_distances> all_lengths {};
  729. size_t lengths_count = 0;
  730. for (size_t i = 0; i < literal_code_count; i++) {
  731. all_lengths[lengths_count++] = literal_bit_lengths[i];
  732. }
  733. for (size_t i = 0; i < distance_code_count; i++) {
  734. all_lengths[lengths_count++] = distance_bit_lengths[i];
  735. }
  736. return encode_huffman_lengths(all_lengths, lengths_count, encoded_lengths);
  737. }
  738. void DeflateCompressor::write_dynamic_huffman(CanonicalCode const& literal_code, size_t literal_code_count, Optional<CanonicalCode> const& distance_code, size_t distance_code_count, Array<u8, 19> const& code_lengths_bit_lengths, size_t code_length_count, Array<code_length_symbol, max_huffman_literals + max_huffman_distances> const& encoded_lengths, size_t encoded_lengths_count)
  739. {
  740. m_output_stream.write_bits(literal_code_count - 257, 5);
  741. m_output_stream.write_bits(distance_code_count - 1, 5);
  742. m_output_stream.write_bits(code_length_count - 4, 4);
  743. for (size_t i = 0; i < code_length_count; i++) {
  744. m_output_stream.write_bits(code_lengths_bit_lengths[code_lengths_code_lengths_order[i]], 3);
  745. }
  746. auto code_lengths_code = CanonicalCode::from_bytes(code_lengths_bit_lengths);
  747. VERIFY(code_lengths_code.has_value());
  748. for (size_t i = 0; i < encoded_lengths_count; i++) {
  749. auto encoded_length = encoded_lengths[i];
  750. code_lengths_code->write_symbol(m_output_stream, encoded_length.symbol);
  751. if (encoded_length.symbol == deflate_special_code_length_copy) {
  752. m_output_stream.write_bits(encoded_length.count - 3, 2);
  753. } else if (encoded_length.symbol == deflate_special_code_length_zeros) {
  754. m_output_stream.write_bits(encoded_length.count - 3, 3);
  755. } else if (encoded_length.symbol == deflate_special_code_length_long_zeros) {
  756. m_output_stream.write_bits(encoded_length.count - 11, 7);
  757. }
  758. }
  759. write_huffman(literal_code, distance_code);
  760. }
  761. void DeflateCompressor::flush()
  762. {
  763. if (m_output_stream.handle_any_error()) {
  764. set_fatal_error();
  765. return;
  766. }
  767. m_output_stream.write_bit(m_finished);
  768. // if this is just an empty block to signify the end of the deflate stream use the smallest block possible (10 bits total)
  769. if (m_pending_block_size == 0) {
  770. VERIFY(m_finished); // we shouldn't be writing empty blocks unless this is the final one
  771. m_output_stream.write_bits(0b01, 2); // fixed huffman codes
  772. m_output_stream.write_bits(0b0000000, 7); // end of block symbol
  773. m_output_stream.align_to_byte_boundary();
  774. return;
  775. }
  776. auto write_uncompressed = [&]() {
  777. m_output_stream.write_bits(0b00, 2); // no compression
  778. m_output_stream.align_to_byte_boundary();
  779. LittleEndian<u16> len = m_pending_block_size;
  780. m_output_stream << len;
  781. LittleEndian<u16> nlen = ~m_pending_block_size;
  782. m_output_stream << nlen;
  783. m_output_stream.write_or_error(pending_block().slice(0, m_pending_block_size));
  784. };
  785. if (m_compression_level == CompressionLevel::STORE) { // disabled compression fast path
  786. write_uncompressed();
  787. m_pending_block_size = 0;
  788. return;
  789. }
  790. // The following implementation of lz77 compression and huffman encoding is based on the reference implementation by Hans Wennborg https://www.hanshq.net/zip.html
  791. // this reads from the pending block and writes to m_symbol_buffer
  792. lz77_compress_block();
  793. // insert EndOfBlock marker to the symbol buffer
  794. m_symbol_buffer[m_pending_symbol_size].distance = 0;
  795. m_symbol_buffer[m_pending_symbol_size++].literal = 256;
  796. m_symbol_frequencies[256]++;
  797. // generate optimal dynamic huffman code lengths
  798. Array<u8, max_huffman_literals> dynamic_literal_bit_lengths {};
  799. Array<u8, max_huffman_distances> dynamic_distance_bit_lengths {};
  800. generate_huffman_lengths(dynamic_literal_bit_lengths, m_symbol_frequencies, 15); // deflate data huffman can use up to 15 bits per symbol
  801. generate_huffman_lengths(dynamic_distance_bit_lengths, m_distance_frequencies, 15);
  802. // encode literal and distance lengths together in deflate format
  803. Array<code_length_symbol, max_huffman_literals + max_huffman_distances> encoded_lengths {};
  804. size_t literal_code_count;
  805. size_t distance_code_count;
  806. auto encoded_lengths_count = encode_block_lengths(dynamic_literal_bit_lengths, dynamic_distance_bit_lengths, encoded_lengths, literal_code_count, distance_code_count);
  807. // count code length frequencies
  808. Array<u16, 19> code_lengths_frequencies { 0 };
  809. for (size_t i = 0; i < encoded_lengths_count; i++) {
  810. code_lengths_frequencies[encoded_lengths[i].symbol]++;
  811. }
  812. // generate optimal huffman code lengths code lengths
  813. Array<u8, 19> code_lengths_bit_lengths {};
  814. generate_huffman_lengths(code_lengths_bit_lengths, code_lengths_frequencies, 7); // deflate code length huffman can use up to 7 bits per symbol
  815. // calculate actual code length code lengths count (without trailing zeros)
  816. auto code_lengths_count = code_lengths_bit_lengths.size();
  817. while (code_lengths_bit_lengths[code_lengths_code_lengths_order[code_lengths_count - 1]] == 0)
  818. code_lengths_count--;
  819. auto uncompressed_size = uncompressed_block_length();
  820. auto fixed_huffman_size = fixed_block_length();
  821. auto dynamic_huffman_size = dynamic_block_length(dynamic_literal_bit_lengths, dynamic_distance_bit_lengths, code_lengths_bit_lengths, code_lengths_frequencies, code_lengths_count);
  822. // If the compression somehow didn't reduce the size enough, just write out the block uncompressed as it allows for much faster decompression
  823. if (uncompressed_size <= min(fixed_huffman_size, dynamic_huffman_size)) {
  824. write_uncompressed();
  825. } else if (fixed_huffman_size <= dynamic_huffman_size) { // If the fixed and dynamic huffman codes come out the same size, prefer the fixed version, as it takes less time to decode
  826. m_output_stream.write_bits(0b01, 2); // fixed huffman codes
  827. write_huffman(CanonicalCode::fixed_literal_codes(), CanonicalCode::fixed_distance_codes());
  828. } else {
  829. m_output_stream.write_bits(0b10, 2); // dynamic huffman codes
  830. auto literal_code = CanonicalCode::from_bytes(dynamic_literal_bit_lengths);
  831. VERIFY(literal_code.has_value());
  832. auto distance_code = CanonicalCode::from_bytes(dynamic_distance_bit_lengths);
  833. write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code, distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count);
  834. }
  835. if (m_finished)
  836. m_output_stream.align_to_byte_boundary();
  837. // reset all block specific members
  838. m_pending_block_size = 0;
  839. m_pending_symbol_size = 0;
  840. m_symbol_frequencies.fill(0);
  841. m_distance_frequencies.fill(0);
  842. // On the final block this copy will potentially produce an invalid search window, but since its the final block we dont care
  843. pending_block().copy_trimmed_to({ m_rolling_window, block_size });
  844. }
  845. void DeflateCompressor::final_flush()
  846. {
  847. VERIFY(!m_finished);
  848. m_finished = true;
  849. flush();
  850. }
  851. Optional<ByteBuffer> DeflateCompressor::compress_all(ReadonlyBytes bytes, CompressionLevel compression_level)
  852. {
  853. DuplexMemoryStream output_stream;
  854. DeflateCompressor deflate_stream { output_stream, compression_level };
  855. deflate_stream.write_or_error(bytes);
  856. deflate_stream.final_flush();
  857. if (deflate_stream.handle_any_error())
  858. return {};
  859. return output_stream.copy_into_contiguous_buffer();
  860. }
  861. }