Deflate.cpp 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036
  1. /*
  2. * Copyright (c) 2020, the SerenityOS developers
  3. * Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this
  10. * list of conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice,
  13. * this list of conditions and the following disclaimer in the documentation
  14. * and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  20. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <AK/Array.h>
  28. #include <AK/Assertions.h>
  29. #include <AK/BinaryHeap.h>
  30. #include <AK/BinarySearch.h>
  31. #include <AK/MemoryStream.h>
  32. #include <string.h>
  33. #include <LibCompress/Deflate.h>
  34. namespace Compress {
  35. const CanonicalCode& CanonicalCode::fixed_literal_codes()
  36. {
  37. static CanonicalCode code;
  38. static bool initialized = false;
  39. if (initialized)
  40. return code;
  41. code = CanonicalCode::from_bytes(fixed_literal_bit_lengths).value();
  42. initialized = true;
  43. return code;
  44. }
  45. const CanonicalCode& CanonicalCode::fixed_distance_codes()
  46. {
  47. static CanonicalCode code;
  48. static bool initialized = false;
  49. if (initialized)
  50. return code;
  51. code = CanonicalCode::from_bytes(fixed_distance_bit_lengths).value();
  52. initialized = true;
  53. return code;
  54. }
  55. Optional<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
  56. {
  57. // FIXME: I can't quite follow the algorithm here, but it seems to work.
  58. CanonicalCode code;
  59. auto non_zero_symbols = 0;
  60. auto last_non_zero = -1;
  61. for (size_t i = 0; i < bytes.size(); i++) {
  62. if (bytes[i] != 0) {
  63. non_zero_symbols++;
  64. last_non_zero = i;
  65. }
  66. }
  67. if (non_zero_symbols == 1) { // special case - only 1 symbol
  68. code.m_symbol_codes.append(0b10);
  69. code.m_symbol_values.append(last_non_zero);
  70. code.m_bit_codes[last_non_zero] = 0;
  71. code.m_bit_code_lengths[last_non_zero] = 1;
  72. return code;
  73. }
  74. auto next_code = 0;
  75. for (size_t code_length = 1; code_length <= 15; ++code_length) {
  76. next_code <<= 1;
  77. auto start_bit = 1 << code_length;
  78. for (size_t symbol = 0; symbol < bytes.size(); ++symbol) {
  79. if (bytes[symbol] != code_length)
  80. continue;
  81. if (next_code > start_bit)
  82. return {};
  83. code.m_symbol_codes.append(start_bit | next_code);
  84. code.m_symbol_values.append(symbol);
  85. code.m_bit_codes[symbol] = fast_reverse16(start_bit | next_code, code_length); // DEFLATE writes huffman encoded symbols as lsb-first
  86. code.m_bit_code_lengths[symbol] = code_length;
  87. next_code++;
  88. }
  89. }
  90. if (next_code != (1 << 15)) {
  91. return {};
  92. }
  93. return code;
  94. }
  95. u32 CanonicalCode::read_symbol(InputBitStream& stream) const
  96. {
  97. u32 code_bits = 1;
  98. for (;;) {
  99. code_bits = code_bits << 1 | stream.read_bits(1);
  100. VERIFY(code_bits < (1 << 16));
  101. // FIXME: This is very inefficient and could greatly be improved by implementing this
  102. // algorithm: https://www.hanshq.net/zip.html#huffdec
  103. size_t index;
  104. if (binary_search(m_symbol_codes.span(), code_bits, &index))
  105. return m_symbol_values[index];
  106. }
  107. }
  108. void CanonicalCode::write_symbol(OutputBitStream& stream, u32 symbol) const
  109. {
  110. stream.write_bits(m_bit_codes[symbol], m_bit_code_lengths[symbol]);
  111. }
  112. DeflateDecompressor::CompressedBlock::CompressedBlock(DeflateDecompressor& decompressor, CanonicalCode literal_codes, Optional<CanonicalCode> distance_codes)
  113. : m_decompressor(decompressor)
  114. , m_literal_codes(literal_codes)
  115. , m_distance_codes(distance_codes)
  116. {
  117. }
  118. bool DeflateDecompressor::CompressedBlock::try_read_more()
  119. {
  120. if (m_eof == true)
  121. return false;
  122. const auto symbol = m_literal_codes.read_symbol(m_decompressor.m_input_stream);
  123. if (symbol < 256) {
  124. m_decompressor.m_output_stream << static_cast<u8>(symbol);
  125. return true;
  126. } else if (symbol == 256) {
  127. m_eof = true;
  128. return false;
  129. } else {
  130. if (!m_distance_codes.has_value()) {
  131. m_decompressor.set_fatal_error();
  132. return false;
  133. }
  134. const auto length = m_decompressor.decode_length(symbol);
  135. const auto distance = m_decompressor.decode_distance(m_distance_codes.value().read_symbol(m_decompressor.m_input_stream));
  136. for (size_t idx = 0; idx < length; ++idx) {
  137. u8 byte = 0;
  138. m_decompressor.m_output_stream.read({ &byte, sizeof(byte) }, distance);
  139. m_decompressor.m_output_stream << byte;
  140. }
  141. return true;
  142. }
  143. }
  144. DeflateDecompressor::UncompressedBlock::UncompressedBlock(DeflateDecompressor& decompressor, size_t length)
  145. : m_decompressor(decompressor)
  146. , m_bytes_remaining(length)
  147. {
  148. }
  149. bool DeflateDecompressor::UncompressedBlock::try_read_more()
  150. {
  151. if (m_bytes_remaining == 0)
  152. return false;
  153. const auto nread = min(m_bytes_remaining, m_decompressor.m_output_stream.remaining_contigous_space());
  154. m_bytes_remaining -= nread;
  155. m_decompressor.m_input_stream >> m_decompressor.m_output_stream.reserve_contigous_space(nread);
  156. return true;
  157. }
  158. DeflateDecompressor::DeflateDecompressor(InputStream& stream)
  159. : m_input_stream(stream)
  160. {
  161. }
  162. DeflateDecompressor::~DeflateDecompressor()
  163. {
  164. if (m_state == State::ReadingCompressedBlock)
  165. m_compressed_block.~CompressedBlock();
  166. if (m_state == State::ReadingUncompressedBlock)
  167. m_uncompressed_block.~UncompressedBlock();
  168. }
  169. size_t DeflateDecompressor::read(Bytes bytes)
  170. {
  171. if (has_any_error())
  172. return 0;
  173. if (m_state == State::Idle) {
  174. if (m_read_final_bock)
  175. return 0;
  176. m_read_final_bock = m_input_stream.read_bit();
  177. const auto block_type = m_input_stream.read_bits(2);
  178. if (block_type == 0b00) {
  179. m_input_stream.align_to_byte_boundary();
  180. LittleEndian<u16> length, negated_length;
  181. m_input_stream >> length >> negated_length;
  182. if ((length ^ 0xffff) != negated_length) {
  183. set_fatal_error();
  184. return 0;
  185. }
  186. m_state = State::ReadingUncompressedBlock;
  187. new (&m_uncompressed_block) UncompressedBlock(*this, length);
  188. return read(bytes);
  189. }
  190. if (block_type == 0b01) {
  191. m_state = State::ReadingCompressedBlock;
  192. new (&m_compressed_block) CompressedBlock(*this, CanonicalCode::fixed_literal_codes(), CanonicalCode::fixed_distance_codes());
  193. return read(bytes);
  194. }
  195. if (block_type == 0b10) {
  196. CanonicalCode literal_codes;
  197. Optional<CanonicalCode> distance_codes;
  198. decode_codes(literal_codes, distance_codes);
  199. m_state = State::ReadingCompressedBlock;
  200. new (&m_compressed_block) CompressedBlock(*this, literal_codes, distance_codes);
  201. return read(bytes);
  202. }
  203. set_fatal_error();
  204. return 0;
  205. }
  206. if (m_state == State::ReadingCompressedBlock) {
  207. auto nread = m_output_stream.read(bytes);
  208. while (nread < bytes.size() && m_compressed_block.try_read_more()) {
  209. nread += m_output_stream.read(bytes.slice(nread));
  210. }
  211. if (nread == bytes.size())
  212. return nread;
  213. m_compressed_block.~CompressedBlock();
  214. m_state = State::Idle;
  215. return nread + read(bytes.slice(nread));
  216. }
  217. if (m_state == State::ReadingUncompressedBlock) {
  218. auto nread = m_output_stream.read(bytes);
  219. while (nread < bytes.size() && m_uncompressed_block.try_read_more()) {
  220. nread += m_output_stream.read(bytes.slice(nread));
  221. }
  222. if (nread == bytes.size())
  223. return nread;
  224. m_uncompressed_block.~UncompressedBlock();
  225. m_state = State::Idle;
  226. return nread + read(bytes.slice(nread));
  227. }
  228. VERIFY_NOT_REACHED();
  229. }
  230. bool DeflateDecompressor::read_or_error(Bytes bytes)
  231. {
  232. if (read(bytes) < bytes.size()) {
  233. set_fatal_error();
  234. return false;
  235. }
  236. return true;
  237. }
  238. bool DeflateDecompressor::discard_or_error(size_t count)
  239. {
  240. u8 buffer[4096];
  241. size_t ndiscarded = 0;
  242. while (ndiscarded < count) {
  243. if (unreliable_eof()) {
  244. set_fatal_error();
  245. return false;
  246. }
  247. ndiscarded += read({ buffer, min<size_t>(count - ndiscarded, 4096) });
  248. }
  249. return true;
  250. }
  251. bool DeflateDecompressor::unreliable_eof() const { return m_state == State::Idle && m_read_final_bock; }
  252. bool DeflateDecompressor::handle_any_error()
  253. {
  254. return m_input_stream.handle_any_error() || Stream::handle_any_error();
  255. }
  256. Optional<ByteBuffer> DeflateDecompressor::decompress_all(ReadonlyBytes bytes)
  257. {
  258. InputMemoryStream memory_stream { bytes };
  259. DeflateDecompressor deflate_stream { memory_stream };
  260. DuplexMemoryStream output_stream;
  261. u8 buffer[4096];
  262. while (!deflate_stream.has_any_error() && !deflate_stream.unreliable_eof()) {
  263. const auto nread = deflate_stream.read({ buffer, sizeof(buffer) });
  264. output_stream.write_or_error({ buffer, nread });
  265. }
  266. if (deflate_stream.handle_any_error())
  267. return {};
  268. return output_stream.copy_into_contiguous_buffer();
  269. }
  270. u32 DeflateDecompressor::decode_length(u32 symbol)
  271. {
  272. // FIXME: I can't quite follow the algorithm here, but it seems to work.
  273. if (symbol <= 264)
  274. return symbol - 254;
  275. if (symbol <= 284) {
  276. auto extra_bits = (symbol - 261) / 4;
  277. return (((symbol - 265) % 4 + 4) << extra_bits) + 3 + m_input_stream.read_bits(extra_bits);
  278. }
  279. if (symbol == 285)
  280. return 258;
  281. VERIFY_NOT_REACHED();
  282. }
  283. u32 DeflateDecompressor::decode_distance(u32 symbol)
  284. {
  285. // FIXME: I can't quite follow the algorithm here, but it seems to work.
  286. if (symbol <= 3)
  287. return symbol + 1;
  288. if (symbol <= 29) {
  289. auto extra_bits = (symbol / 2) - 1;
  290. return ((symbol % 2 + 2) << extra_bits) + 1 + m_input_stream.read_bits(extra_bits);
  291. }
  292. VERIFY_NOT_REACHED();
  293. }
  294. void DeflateDecompressor::decode_codes(CanonicalCode& literal_code, Optional<CanonicalCode>& distance_code)
  295. {
  296. auto literal_code_count = m_input_stream.read_bits(5) + 257;
  297. auto distance_code_count = m_input_stream.read_bits(5) + 1;
  298. auto code_length_count = m_input_stream.read_bits(4) + 4;
  299. // First we have to extract the code lengths of the code that was used to encode the code lengths of
  300. // the code that was used to encode the block.
  301. u8 code_lengths_code_lengths[19] = { 0 };
  302. for (size_t i = 0; i < code_length_count; ++i) {
  303. code_lengths_code_lengths[code_lengths_code_lengths_order[i]] = m_input_stream.read_bits(3);
  304. }
  305. // Now we can extract the code that was used to encode the code lengths of the code that was used to
  306. // encode the block.
  307. auto code_length_code_result = CanonicalCode::from_bytes({ code_lengths_code_lengths, sizeof(code_lengths_code_lengths) });
  308. if (!code_length_code_result.has_value()) {
  309. set_fatal_error();
  310. return;
  311. }
  312. const auto code_length_code = code_length_code_result.value();
  313. // Next we extract the code lengths of the code that was used to encode the block.
  314. Vector<u8> code_lengths;
  315. while (code_lengths.size() < literal_code_count + distance_code_count) {
  316. auto symbol = code_length_code.read_symbol(m_input_stream);
  317. if (symbol < DeflateSpecialCodeLengths::COPY) {
  318. code_lengths.append(static_cast<u8>(symbol));
  319. continue;
  320. } else if (symbol == DeflateSpecialCodeLengths::ZEROS) {
  321. auto nrepeat = 3 + m_input_stream.read_bits(3);
  322. for (size_t j = 0; j < nrepeat; ++j)
  323. code_lengths.append(0);
  324. continue;
  325. } else if (symbol == DeflateSpecialCodeLengths::LONG_ZEROS) {
  326. auto nrepeat = 11 + m_input_stream.read_bits(7);
  327. for (size_t j = 0; j < nrepeat; ++j)
  328. code_lengths.append(0);
  329. continue;
  330. } else {
  331. VERIFY(symbol == DeflateSpecialCodeLengths::COPY);
  332. if (code_lengths.is_empty()) {
  333. set_fatal_error();
  334. return;
  335. }
  336. auto nrepeat = 3 + m_input_stream.read_bits(2);
  337. for (size_t j = 0; j < nrepeat; ++j)
  338. code_lengths.append(code_lengths.last());
  339. }
  340. }
  341. if (code_lengths.size() != literal_code_count + distance_code_count) {
  342. set_fatal_error();
  343. return;
  344. }
  345. // Now we extract the code that was used to encode literals and lengths in the block.
  346. auto literal_code_result = CanonicalCode::from_bytes(code_lengths.span().trim(literal_code_count));
  347. if (!literal_code_result.has_value()) {
  348. set_fatal_error();
  349. return;
  350. }
  351. literal_code = literal_code_result.value();
  352. // Now we extract the code that was used to encode distances in the block.
  353. if (distance_code_count == 1) {
  354. auto length = code_lengths[literal_code_count];
  355. if (length == 0) {
  356. return;
  357. } else if (length != 1) {
  358. set_fatal_error();
  359. return;
  360. }
  361. }
  362. auto distance_code_result = CanonicalCode::from_bytes(code_lengths.span().slice(literal_code_count));
  363. if (!distance_code_result.has_value()) {
  364. set_fatal_error();
  365. return;
  366. }
  367. distance_code = distance_code_result.value();
  368. }
  369. DeflateCompressor::DeflateCompressor(OutputStream& stream, CompressionLevel compression_level)
  370. : m_compression_level(compression_level)
  371. , m_compression_constants(compression_constants[static_cast<int>(m_compression_level)])
  372. , m_output_stream(stream)
  373. {
  374. m_symbol_frequencies.fill(0);
  375. m_distance_frequencies.fill(0);
  376. }
  377. DeflateCompressor::~DeflateCompressor()
  378. {
  379. VERIFY(m_finished);
  380. }
  381. size_t DeflateCompressor::write(ReadonlyBytes bytes)
  382. {
  383. VERIFY(!m_finished);
  384. if (bytes.size() == 0)
  385. return 0; // recursion base case
  386. auto n_written = bytes.copy_trimmed_to(pending_block().slice(m_pending_block_size));
  387. m_pending_block_size += n_written;
  388. if (m_pending_block_size == block_size)
  389. flush();
  390. return n_written + write(bytes.slice(n_written));
  391. }
  392. bool DeflateCompressor::write_or_error(ReadonlyBytes bytes)
  393. {
  394. if (write(bytes) < bytes.size()) {
  395. set_fatal_error();
  396. return false;
  397. }
  398. return true;
  399. }
  400. // Knuth's multiplicative hash on 4 bytes
  401. u16 DeflateCompressor::hash_sequence(const u8* bytes)
  402. {
  403. constexpr const u32 knuth_constant = 2654435761; // shares no common factors with 2^32
  404. return ((bytes[0] | bytes[1] << 8 | bytes[2] << 16 | bytes[3] << 24) * knuth_constant) >> (32 - hash_bits);
  405. }
  406. size_t DeflateCompressor::compare_match_candidate(size_t start, size_t candidate, size_t previous_match_length, size_t maximum_match_length)
  407. {
  408. VERIFY(previous_match_length < maximum_match_length);
  409. // We firstly check that the match is at least (prev_match_length + 1) long, we check backwards as theres a higher chance the end mismatches
  410. for (ssize_t i = previous_match_length; i >= 0; i--) {
  411. if (m_rolling_window[start + i] != m_rolling_window[candidate + i])
  412. return 0;
  413. }
  414. // Find the actual length
  415. auto match_length = previous_match_length + 1;
  416. while (match_length < maximum_match_length && m_rolling_window[start + match_length] == m_rolling_window[candidate + match_length]) {
  417. match_length++;
  418. }
  419. VERIFY(match_length > previous_match_length);
  420. VERIFY(match_length <= maximum_match_length);
  421. return match_length;
  422. }
  423. size_t DeflateCompressor::find_back_match(size_t start, u16 hash, size_t previous_match_length, size_t maximum_match_length, size_t& match_position)
  424. {
  425. auto max_chain_length = m_compression_constants.max_chain;
  426. if (previous_match_length == 0)
  427. previous_match_length = min_match_length - 1; // we only care about matches that are at least min_match_length long
  428. if (previous_match_length >= maximum_match_length)
  429. return 0; // we cant improve a maximum length match
  430. if (previous_match_length >= m_compression_constants.max_lazy_length)
  431. return 0; // the previous match is already pretty, we shouldn't waste another full search
  432. if (previous_match_length >= m_compression_constants.good_match_length)
  433. max_chain_length /= 4; // we already have a pretty good much, so do a shorter search
  434. auto candidate = m_hash_head[hash];
  435. auto match_found = false;
  436. while (max_chain_length--) {
  437. if (candidate == empty_slot)
  438. break; // no remaining candidates
  439. VERIFY(candidate < start);
  440. if (start - candidate > window_size)
  441. break; // outside the window
  442. auto match_length = compare_match_candidate(start, candidate, previous_match_length, maximum_match_length);
  443. if (match_length != 0) {
  444. match_found = true;
  445. match_position = candidate;
  446. previous_match_length = match_length;
  447. if (match_length == maximum_match_length)
  448. return match_length; // bail if we got the maximum possible length
  449. }
  450. candidate = m_hash_prev[candidate % window_size];
  451. }
  452. if (!match_found)
  453. return 0; // we didnt find any matches
  454. return previous_match_length; // we found matches, but they were at most previous_match_length long
  455. }
  456. ALWAYS_INLINE u8 DeflateCompressor::distance_to_base(u16 distance)
  457. {
  458. return (distance <= 256) ? distance_to_base_lo[distance - 1] : distance_to_base_hi[(distance - 1) >> 7];
  459. }
  460. template<size_t Size>
  461. void DeflateCompressor::generate_huffman_lengths(Array<u8, Size>& lengths, const Array<u16, Size>& frequencies, size_t max_bit_length, u16 frequency_cap)
  462. {
  463. VERIFY((1u << max_bit_length) >= Size);
  464. u16 heap_keys[Size]; // Used for O(n) heap construction
  465. u16 heap_values[Size];
  466. u16 huffman_links[Size * 2 + 1] = { 0 };
  467. size_t non_zero_freqs = 0;
  468. for (size_t i = 0; i < Size; i++) {
  469. auto frequency = frequencies[i];
  470. if (frequency == 0)
  471. continue;
  472. if (frequency > frequency_cap) {
  473. frequency = frequency_cap;
  474. }
  475. heap_keys[non_zero_freqs] = frequency; // sort symbols by frequency
  476. heap_values[non_zero_freqs] = Size + non_zero_freqs; // huffman_links "links"
  477. non_zero_freqs++;
  478. }
  479. // special case for only 1 used symbol
  480. if (non_zero_freqs < 2) {
  481. for (size_t i = 0; i < Size; i++)
  482. lengths[i] = (frequencies[i] == 0) ? 0 : 1;
  483. return;
  484. }
  485. BinaryHeap<u16, u16, Size> heap { heap_keys, heap_values, non_zero_freqs };
  486. // build the huffman tree - binary heap is used for efficient frequency comparisons
  487. while (heap.size() > 1) {
  488. u16 lowest_frequency = heap.peek_min_key();
  489. u16 lowest_link = heap.pop_min();
  490. u16 second_lowest_frequency = heap.peek_min_key();
  491. u16 second_lowest_link = heap.pop_min();
  492. u16 new_link = heap.size() + 2;
  493. heap.insert(lowest_frequency + second_lowest_frequency, new_link);
  494. huffman_links[lowest_link] = new_link;
  495. huffman_links[second_lowest_link] = new_link;
  496. }
  497. non_zero_freqs = 0;
  498. for (size_t i = 0; i < Size; i++) {
  499. if (frequencies[i] == 0) {
  500. lengths[i] = 0;
  501. continue;
  502. }
  503. u16 link = huffman_links[Size + non_zero_freqs];
  504. non_zero_freqs++;
  505. size_t bit_length = 1;
  506. while (link != 2) {
  507. bit_length++;
  508. link = huffman_links[link];
  509. }
  510. if (bit_length > max_bit_length) {
  511. VERIFY(frequency_cap != 1);
  512. return generate_huffman_lengths(lengths, frequencies, max_bit_length, frequency_cap / 2);
  513. }
  514. lengths[i] = bit_length;
  515. }
  516. }
  517. void DeflateCompressor::lz77_compress_block()
  518. {
  519. for (auto& slot : m_hash_head) { // initialize chained hash table
  520. slot = empty_slot;
  521. }
  522. auto insert_hash = [&](auto pos, auto hash) {
  523. auto window_pos = pos % window_size;
  524. m_hash_prev[window_pos] = m_hash_head[hash];
  525. m_hash_head[hash] = window_pos;
  526. };
  527. auto emit_literal = [&](auto literal) {
  528. VERIFY(m_pending_symbol_size <= block_size + 1);
  529. auto index = m_pending_symbol_size++;
  530. m_symbol_buffer[index].distance = 0;
  531. m_symbol_buffer[index].literal = literal;
  532. m_symbol_frequencies[literal]++;
  533. };
  534. auto emit_back_reference = [&](auto distance, auto length) {
  535. VERIFY(m_pending_symbol_size <= block_size + 1);
  536. auto index = m_pending_symbol_size++;
  537. m_symbol_buffer[index].distance = distance;
  538. m_symbol_buffer[index].length = length;
  539. m_symbol_frequencies[length_to_symbol[length]]++;
  540. m_distance_frequencies[distance_to_base(distance)]++;
  541. };
  542. size_t previous_match_length = 0;
  543. size_t previous_match_position = 0;
  544. VERIFY(m_compression_constants.great_match_length <= max_match_length);
  545. // our block starts at block_size and is m_pending_block_size in length
  546. auto block_end = block_size + m_pending_block_size;
  547. size_t current_position;
  548. for (current_position = block_size; current_position < block_end - min_match_length + 1; current_position++) {
  549. auto hash = hash_sequence(&m_rolling_window[current_position]);
  550. size_t match_position;
  551. auto match_length = find_back_match(current_position, hash, previous_match_length,
  552. min(m_compression_constants.great_match_length, block_end - current_position), match_position);
  553. insert_hash(current_position, hash);
  554. // if the previous match is as good as the new match, just use it
  555. if (previous_match_length != 0 && previous_match_length >= match_length) {
  556. emit_back_reference((current_position - 1) - previous_match_position, previous_match_length);
  557. // skip all the bytes that are included in this match
  558. for (size_t j = current_position + 1; j < min(current_position - 1 + previous_match_length, block_end - min_match_length + 1); j++) {
  559. insert_hash(j, hash_sequence(&m_rolling_window[j]));
  560. }
  561. current_position = (current_position - 1) + previous_match_length - 1;
  562. previous_match_length = 0;
  563. continue;
  564. }
  565. if (match_length == 0) {
  566. VERIFY(previous_match_length == 0);
  567. emit_literal(m_rolling_window[current_position]);
  568. continue;
  569. }
  570. // if this is a lazy match, and the new match is better than the old one, output previous as literal
  571. if (previous_match_length != 0) {
  572. emit_literal(m_rolling_window[current_position - 1]);
  573. }
  574. previous_match_length = match_length;
  575. previous_match_position = match_position;
  576. }
  577. // clean up leftover lazy match
  578. if (previous_match_length != 0) {
  579. emit_back_reference((current_position - 1) - previous_match_position, previous_match_length);
  580. current_position = (current_position - 1) + previous_match_length;
  581. }
  582. // output remaining literals
  583. while (current_position < block_end) {
  584. emit_literal(m_rolling_window[current_position++]);
  585. }
  586. }
  587. size_t DeflateCompressor::huffman_block_length(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths)
  588. {
  589. size_t length = 0;
  590. for (size_t i = 0; i < 286; i++) {
  591. auto frequency = m_symbol_frequencies[i];
  592. length += literal_bit_lengths[i] * frequency;
  593. if (i >= 257) // back reference length symbols
  594. length += packed_length_symbols[i - 257].extra_bits * frequency;
  595. }
  596. for (size_t i = 0; i < 30; i++) {
  597. auto frequency = m_distance_frequencies[i];
  598. length += distance_bit_lengths[i] * frequency;
  599. length += packed_distances[i].extra_bits * frequency;
  600. }
  601. return length;
  602. }
  603. size_t DeflateCompressor::uncompressed_block_length()
  604. {
  605. auto padding = 8 - ((m_output_stream.bit_offset() + 3) % 8);
  606. // 3 bit block header + align to byte + 2 * 16 bit length fields + block contents
  607. return 3 + padding + (2 * 16) + m_pending_block_size * 8;
  608. }
  609. size_t DeflateCompressor::fixed_block_length()
  610. {
  611. // block header + fixed huffman encoded block contents
  612. return 3 + huffman_block_length(fixed_literal_bit_lengths, fixed_distance_bit_lengths);
  613. }
  614. size_t DeflateCompressor::dynamic_block_length(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths, const Array<u8, 19>& code_lengths_bit_lengths, const Array<u16, 19>& code_lengths_frequencies, size_t code_lengths_count)
  615. {
  616. // block header + literal code count + distance code count + code length count
  617. auto length = 3 + 5 + 5 + 4;
  618. // 3 bits per code_length
  619. length += 3 * code_lengths_count;
  620. for (size_t i = 0; i < code_lengths_frequencies.size(); i++) {
  621. auto frequency = code_lengths_frequencies[i];
  622. length += code_lengths_bit_lengths[i] * frequency;
  623. if (i == DeflateSpecialCodeLengths::COPY) {
  624. length += 2 * frequency;
  625. } else if (i == DeflateSpecialCodeLengths::ZEROS) {
  626. length += 3 * frequency;
  627. } else if (i == DeflateSpecialCodeLengths::LONG_ZEROS) {
  628. length += 7 * frequency;
  629. }
  630. }
  631. return length + huffman_block_length(literal_bit_lengths, distance_bit_lengths);
  632. }
  633. void DeflateCompressor::write_huffman(const CanonicalCode& literal_code, const Optional<CanonicalCode>& distance_code)
  634. {
  635. auto has_distances = distance_code.has_value();
  636. for (size_t i = 0; i < m_pending_symbol_size; i++) {
  637. if (m_symbol_buffer[i].distance == 0) {
  638. literal_code.write_symbol(m_output_stream, m_symbol_buffer[i].literal);
  639. continue;
  640. }
  641. VERIFY(has_distances);
  642. auto symbol = length_to_symbol[m_symbol_buffer[i].length];
  643. literal_code.write_symbol(m_output_stream, symbol);
  644. // Emit extra bits if needed
  645. m_output_stream.write_bits(m_symbol_buffer[i].length - packed_length_symbols[symbol - 257].base_length, packed_length_symbols[symbol - 257].extra_bits);
  646. auto base_distance = distance_to_base(m_symbol_buffer[i].distance);
  647. distance_code.value().write_symbol(m_output_stream, base_distance);
  648. // Emit extra bits if needed
  649. m_output_stream.write_bits(m_symbol_buffer[i].distance - packed_distances[base_distance].base_distance, packed_distances[base_distance].extra_bits);
  650. }
  651. }
  652. size_t DeflateCompressor::encode_huffman_lengths(const Array<u8, max_huffman_literals + max_huffman_distances>& lengths, size_t lengths_count, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths)
  653. {
  654. size_t encoded_count = 0;
  655. size_t i = 0;
  656. while (i < lengths_count) {
  657. if (lengths[i] == 0) {
  658. auto zero_count = 0;
  659. for (size_t j = i; j < min(lengths_count, i + 138) && lengths[j] == 0; j++)
  660. zero_count++;
  661. if (zero_count < 3) { // below minimum repeated zero count
  662. encoded_lengths[encoded_count++].symbol = 0;
  663. i++;
  664. continue;
  665. }
  666. if (zero_count <= 10) {
  667. encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::ZEROS;
  668. encoded_lengths[encoded_count++].count = zero_count;
  669. } else {
  670. encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::LONG_ZEROS;
  671. encoded_lengths[encoded_count++].count = zero_count;
  672. }
  673. i += zero_count;
  674. continue;
  675. }
  676. encoded_lengths[encoded_count++].symbol = lengths[i++];
  677. auto copy_count = 0;
  678. for (size_t j = i; j < min(lengths_count, i + 6) && lengths[j] == lengths[i - 1]; j++)
  679. copy_count++;
  680. if (copy_count >= 3) {
  681. encoded_lengths[encoded_count].symbol = DeflateSpecialCodeLengths::COPY;
  682. encoded_lengths[encoded_count++].count = copy_count;
  683. i += copy_count;
  684. continue;
  685. }
  686. }
  687. return encoded_count;
  688. }
  689. size_t DeflateCompressor::encode_block_lengths(const Array<u8, max_huffman_literals>& literal_bit_lengths, const Array<u8, max_huffman_distances>& distance_bit_lengths, Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t& literal_code_count, size_t& distance_code_count)
  690. {
  691. literal_code_count = max_huffman_literals;
  692. distance_code_count = max_huffman_distances;
  693. VERIFY(literal_bit_lengths[256] != 0); // Make sure at least the EndOfBlock marker is present
  694. while (literal_bit_lengths[literal_code_count - 1] == 0)
  695. literal_code_count--;
  696. // Drop trailing zero lengths, keeping at least one
  697. while (distance_bit_lengths[distance_code_count - 1] == 0 && distance_code_count > 1)
  698. distance_code_count--;
  699. Array<u8, max_huffman_literals + max_huffman_distances> all_lengths {};
  700. size_t lengths_count = 0;
  701. for (size_t i = 0; i < literal_code_count; i++) {
  702. all_lengths[lengths_count++] = literal_bit_lengths[i];
  703. }
  704. for (size_t i = 0; i < distance_code_count; i++) {
  705. all_lengths[lengths_count++] = distance_bit_lengths[i];
  706. }
  707. return encode_huffman_lengths(all_lengths, lengths_count, encoded_lengths);
  708. }
  709. void DeflateCompressor::write_dynamic_huffman(const CanonicalCode& literal_code, size_t literal_code_count, const Optional<CanonicalCode>& distance_code, size_t distance_code_count, const Array<u8, 19>& code_lengths_bit_lengths, size_t code_length_count, const Array<code_length_symbol, max_huffman_literals + max_huffman_distances>& encoded_lengths, size_t encoded_lengths_count)
  710. {
  711. m_output_stream.write_bits(literal_code_count - 257, 5);
  712. m_output_stream.write_bits(distance_code_count - 1, 5);
  713. m_output_stream.write_bits(code_length_count - 4, 4);
  714. for (size_t i = 0; i < code_length_count; i++) {
  715. m_output_stream.write_bits(code_lengths_bit_lengths[code_lengths_code_lengths_order[i]], 3);
  716. }
  717. auto code_lengths_code = CanonicalCode::from_bytes(code_lengths_bit_lengths);
  718. VERIFY(code_lengths_code.has_value());
  719. for (size_t i = 0; i < encoded_lengths_count; i++) {
  720. auto encoded_length = encoded_lengths[i];
  721. code_lengths_code->write_symbol(m_output_stream, encoded_length.symbol);
  722. if (encoded_length.symbol == DeflateSpecialCodeLengths::COPY) {
  723. m_output_stream.write_bits(encoded_length.count - 3, 2);
  724. } else if (encoded_length.symbol == DeflateSpecialCodeLengths::ZEROS) {
  725. m_output_stream.write_bits(encoded_length.count - 3, 3);
  726. } else if (encoded_length.symbol == DeflateSpecialCodeLengths::LONG_ZEROS) {
  727. m_output_stream.write_bits(encoded_length.count - 11, 7);
  728. }
  729. }
  730. write_huffman(literal_code, distance_code);
  731. }
  732. void DeflateCompressor::flush()
  733. {
  734. if (m_output_stream.handle_any_error()) {
  735. set_fatal_error();
  736. return;
  737. }
  738. m_output_stream.write_bit(m_finished);
  739. // if this is just an empty block to signify the end of the deflate stream use the smallest block possible (10 bits total)
  740. if (m_pending_block_size == 0) {
  741. VERIFY(m_finished); // we shouldn't be writing empty blocks unless this is the final one
  742. m_output_stream.write_bits(0b01, 2); // fixed huffman codes
  743. m_output_stream.write_bits(0b0000000, 7); // end of block symbol
  744. m_output_stream.align_to_byte_boundary();
  745. return;
  746. }
  747. auto write_uncompressed = [&]() {
  748. m_output_stream.write_bits(0b00, 2); // no compression
  749. m_output_stream.align_to_byte_boundary();
  750. LittleEndian<u16> len = m_pending_block_size;
  751. m_output_stream << len;
  752. LittleEndian<u16> nlen = ~m_pending_block_size;
  753. m_output_stream << nlen;
  754. m_output_stream.write_or_error(pending_block().slice(0, m_pending_block_size));
  755. };
  756. if (m_compression_level == CompressionLevel::STORE) { // disabled compression fast path
  757. write_uncompressed();
  758. m_pending_block_size = 0;
  759. return;
  760. }
  761. // The following implementation of lz77 compression and huffman encoding is based on the reference implementation by Hans Wennborg https://www.hanshq.net/zip.html
  762. // this reads from the pending block and writes to m_symbol_buffer
  763. lz77_compress_block();
  764. // insert EndOfBlock marker to the symbol buffer
  765. m_symbol_buffer[m_pending_symbol_size].distance = 0;
  766. m_symbol_buffer[m_pending_symbol_size++].literal = 256;
  767. m_symbol_frequencies[256]++;
  768. // generate optimal dynamic huffman code lengths
  769. Array<u8, max_huffman_literals> dynamic_literal_bit_lengths {};
  770. Array<u8, max_huffman_distances> dynamic_distance_bit_lengths {};
  771. generate_huffman_lengths(dynamic_literal_bit_lengths, m_symbol_frequencies, 15); // deflate data huffman can use up to 15 bits per symbol
  772. generate_huffman_lengths(dynamic_distance_bit_lengths, m_distance_frequencies, 15);
  773. // encode literal and distance lengths together in deflate format
  774. Array<code_length_symbol, max_huffman_literals + max_huffman_distances> encoded_lengths {};
  775. size_t literal_code_count;
  776. size_t distance_code_count;
  777. auto encoded_lengths_count = encode_block_lengths(dynamic_literal_bit_lengths, dynamic_distance_bit_lengths, encoded_lengths, literal_code_count, distance_code_count);
  778. // count code length frequencies
  779. Array<u16, 19> code_lengths_frequencies { 0 };
  780. for (size_t i = 0; i < encoded_lengths_count; i++) {
  781. code_lengths_frequencies[encoded_lengths[i].symbol]++;
  782. }
  783. // generate optimal huffman code lengths code lengths
  784. Array<u8, 19> code_lengths_bit_lengths {};
  785. generate_huffman_lengths(code_lengths_bit_lengths, code_lengths_frequencies, 7); // deflate code length huffman can use up to 7 bits per symbol
  786. // calculate actual code length code lengths count (without trailing zeros)
  787. auto code_lengths_count = code_lengths_bit_lengths.size();
  788. while (code_lengths_bit_lengths[code_lengths_code_lengths_order[code_lengths_count - 1]] == 0)
  789. code_lengths_count--;
  790. auto uncompressed_size = uncompressed_block_length();
  791. auto fixed_huffman_size = fixed_block_length();
  792. auto dynamic_huffman_size = dynamic_block_length(dynamic_literal_bit_lengths, dynamic_distance_bit_lengths, code_lengths_bit_lengths, code_lengths_frequencies, code_lengths_count);
  793. // If the compression somehow didnt reduce the size enough, just write out the block uncompressed as it allows for much faster decompression
  794. if (uncompressed_size <= min(fixed_huffman_size, dynamic_huffman_size)) {
  795. write_uncompressed();
  796. } else if (fixed_huffman_size <= dynamic_huffman_size) { // If the fixed and dynamic huffman codes come out the same size, prefer the fixed version, as it takes less time to decode
  797. m_output_stream.write_bits(0b01, 2); // fixed huffman codes
  798. write_huffman(CanonicalCode::fixed_literal_codes(), CanonicalCode::fixed_distance_codes());
  799. } else {
  800. m_output_stream.write_bits(0b10, 2); // dynamic huffman codes
  801. auto literal_code = CanonicalCode::from_bytes(dynamic_literal_bit_lengths);
  802. VERIFY(literal_code.has_value());
  803. auto distance_code = CanonicalCode::from_bytes(dynamic_distance_bit_lengths);
  804. write_dynamic_huffman(literal_code.value(), literal_code_count, distance_code, distance_code_count, code_lengths_bit_lengths, code_lengths_count, encoded_lengths, encoded_lengths_count);
  805. }
  806. if (m_finished)
  807. m_output_stream.align_to_byte_boundary();
  808. // reset all block specific members
  809. m_pending_block_size = 0;
  810. m_pending_symbol_size = 0;
  811. m_symbol_frequencies.fill(0);
  812. m_distance_frequencies.fill(0);
  813. // On the final block this copy will potentially produce an invalid search window, but since its the final block we dont care
  814. pending_block().copy_trimmed_to({ m_rolling_window, block_size });
  815. }
  816. void DeflateCompressor::final_flush()
  817. {
  818. VERIFY(!m_finished);
  819. m_finished = true;
  820. flush();
  821. }
  822. Optional<ByteBuffer> DeflateCompressor::compress_all(const ReadonlyBytes& bytes, CompressionLevel compression_level)
  823. {
  824. DuplexMemoryStream output_stream;
  825. DeflateCompressor deflate_stream { output_stream, compression_level };
  826. deflate_stream.write_or_error(bytes);
  827. deflate_stream.final_flush();
  828. if (deflate_stream.handle_any_error())
  829. return {};
  830. return output_stream.copy_into_contiguous_buffer();
  831. }
  832. }