WebPWriterLossless.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. * Copyright (c) 2024, Nico Weber <thakis@chromium.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. // Lossless format: https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification
  7. #include <AK/BitStream.h>
  8. #include <AK/Debug.h>
  9. #include <AK/Endian.h>
  10. #include <AK/MemoryStream.h>
  11. #include <LibCompress/DeflateTables.h>
  12. #include <LibCompress/Huffman.h>
  13. #include <LibGfx/Bitmap.h>
  14. #include <LibGfx/ImageFormats/WebPSharedLossless.h>
  15. #include <LibGfx/ImageFormats/WebPWriterLossless.h>
  16. namespace Gfx {
  17. NEVER_INLINE static ErrorOr<void> write_image_data(LittleEndianOutputBitStream& bit_stream, Bitmap const& bitmap, PrefixCodeGroup const& prefix_code_group)
  18. {
  19. // This is currently the hot loop. Keep performance in mind when you change it.
  20. for (ARGB32 pixel : bitmap) {
  21. u8 a = pixel >> 24;
  22. u8 r = pixel >> 16;
  23. u8 g = pixel >> 8;
  24. u8 b = pixel;
  25. TRY(prefix_code_group[0].write_symbol(bit_stream, g));
  26. TRY(prefix_code_group[1].write_symbol(bit_stream, r));
  27. TRY(prefix_code_group[2].write_symbol(bit_stream, b));
  28. TRY(prefix_code_group[3].write_symbol(bit_stream, a));
  29. }
  30. return {};
  31. }
  32. struct CodeLengthSymbol {
  33. u8 symbol { 0 };
  34. u8 count { 0 }; // used for special symbols 16-18
  35. };
  36. // This is very similar to DeflateCompressor::encode_huffman_lengths().
  37. // But:
  38. // * size can be larger than 288 for green, is always 256 for r, b, a, and is always 40 for distance codes
  39. // * code 16 has different semantics, requires last_non_zero_symbol
  40. static size_t encode_huffman_lengths(ReadonlyBytes lengths, Array<CodeLengthSymbol, 256>& encoded_lengths)
  41. {
  42. size_t encoded_count = 0;
  43. size_t i = 0;
  44. u8 last_non_zero_symbol = 8; // "If code 16 is used before a non-zero value has been emitted, a value of 8 is repeated."
  45. while (i < lengths.size()) {
  46. if (lengths[i] == 0) {
  47. auto zero_count = 0;
  48. for (size_t j = i; j < min(lengths.size(), i + 138) && lengths[j] == 0; j++)
  49. zero_count++;
  50. if (zero_count < 3) { // below minimum repeated zero count
  51. encoded_lengths[encoded_count++].symbol = 0;
  52. i++;
  53. continue;
  54. }
  55. if (zero_count <= 10) {
  56. // "Code 17 emits a streak of zeros [3..10], i.e., 3 + ReadBits(3) times."
  57. encoded_lengths[encoded_count].symbol = 17;
  58. encoded_lengths[encoded_count++].count = zero_count;
  59. } else {
  60. // "Code 18 emits a streak of zeros of length [11..138], i.e., 11 + ReadBits(7) times."
  61. encoded_lengths[encoded_count].symbol = 18;
  62. encoded_lengths[encoded_count++].count = zero_count;
  63. }
  64. i += zero_count;
  65. continue;
  66. }
  67. VERIFY(lengths[i] != 0);
  68. last_non_zero_symbol = lengths[i];
  69. encoded_lengths[encoded_count++].symbol = lengths[i++];
  70. // "Code 16 repeats the previous non-zero value [3..6] times, i.e., 3 + ReadBits(2) times."
  71. // This is different from deflate.
  72. auto copy_count = 0;
  73. for (size_t j = i; j < min(lengths.size(), i + 6) && lengths[j] == last_non_zero_symbol; j++)
  74. copy_count++;
  75. if (copy_count >= 3) {
  76. encoded_lengths[encoded_count].symbol = 16;
  77. encoded_lengths[encoded_count++].count = copy_count;
  78. i += copy_count;
  79. continue;
  80. }
  81. }
  82. return encoded_count;
  83. }
  84. static ErrorOr<CanonicalCode> write_simple_code_lengths(LittleEndianOutputBitStream& bit_stream, ReadonlyBytes symbols)
  85. {
  86. VERIFY(symbols.size() <= 2);
  87. static constexpr Array<u8, 1> empty { 0 };
  88. if (symbols.size() == 0) {
  89. // "Another special case is when all prefix code lengths are zeros (an empty prefix code). [...]
  90. // empty prefix codes can be coded as those containing a single symbol 0."
  91. symbols = empty;
  92. }
  93. unsigned non_zero_symbol_count = symbols.size();
  94. TRY(bit_stream.write_bits(1u, 1u)); // Simple code length code.
  95. TRY(bit_stream.write_bits(non_zero_symbol_count - 1, 1u)); // num_symbols - 1
  96. if (symbols[0] <= 1) {
  97. TRY(bit_stream.write_bits(0u, 1u)); // is_first_8bits: no
  98. TRY(bit_stream.write_bits(symbols[0], 1u)); // symbol0
  99. } else {
  100. TRY(bit_stream.write_bits(1u, 1u)); // is_first_8bits: yes
  101. TRY(bit_stream.write_bits(symbols[0], 8u)); // symbol0
  102. }
  103. if (non_zero_symbol_count > 1)
  104. TRY(bit_stream.write_bits(symbols[1], 8u)); // symbol1
  105. Array<u8, 256> bits_per_symbol {};
  106. // "When coding a single leaf node [...], all but one code length are zeros, and the single leaf node value
  107. // is marked with the length of 1 -- even when no bits are consumed when that single leaf node tree is used."
  108. // CanonicalCode follows that convention too, even when describing simple code lengths.
  109. bits_per_symbol[symbols[0]] = 1;
  110. if (non_zero_symbol_count > 1)
  111. bits_per_symbol[symbols[1]] = 1;
  112. return MUST(CanonicalCode::from_bytes(bits_per_symbol));
  113. }
  114. static ErrorOr<CanonicalCode> write_normal_code_lengths(LittleEndianOutputBitStream& bit_stream, Array<u8, 256> const& bit_lengths, size_t alphabet_size)
  115. {
  116. // bit_lengths stores how many bits each symbol is encoded with.
  117. // Drop trailing zero lengths.
  118. // This will keep at least three symbols; else we would've called write_simple_code_lengths() instead.
  119. // This is similar to the loops in Deflate::encode_block_lengths().
  120. size_t code_count = bit_lengths.size();
  121. while (bit_lengths[code_count - 1] == 0) {
  122. code_count--;
  123. VERIFY(code_count > 2);
  124. }
  125. Array<CodeLengthSymbol, 256> encoded_lengths {};
  126. auto encoded_lengths_count = encode_huffman_lengths(bit_lengths.span().trim(code_count), encoded_lengths);
  127. // The code to compute code length code lengths is very similar to some of the code in DeflateCompressor::flush().
  128. // count code length frequencies
  129. Array<u16, 19> code_lengths_frequencies { 0 };
  130. for (size_t i = 0; i < encoded_lengths_count; i++) {
  131. VERIFY(code_lengths_frequencies[encoded_lengths[i].symbol] < UINT16_MAX);
  132. code_lengths_frequencies[encoded_lengths[i].symbol]++;
  133. }
  134. // generate optimal huffman code lengths code lengths
  135. Array<u8, 19> code_lengths_bit_lengths {};
  136. Compress::generate_huffman_lengths(code_lengths_bit_lengths, code_lengths_frequencies, 7); // deflate code length huffman can use up to 7 bits per symbol
  137. // calculate actual code length code lengths count (without trailing zeros)
  138. auto code_lengths_count = code_lengths_bit_lengths.size();
  139. while (code_lengths_bit_lengths[kCodeLengthCodeOrder[code_lengths_count - 1]] == 0)
  140. code_lengths_count--;
  141. TRY(bit_stream.write_bits(0u, 1u)); // Normal code length code.
  142. // This here isn't needed in Deflate because it always writes EndOfBlock. WebP does not have an EndOfBlock marker, so it needs this check.
  143. if (code_lengths_count < 4)
  144. code_lengths_count = 4;
  145. dbgln_if(WEBP_DEBUG, "writing code_lengths_count: {}", code_lengths_count);
  146. // WebP uses a different kCodeLengthCodeOrder than deflate. Other than that, the following is similar to a loop in Compress::write_dynamic_huffman().
  147. // "int num_code_lengths = 4 + ReadBits(4);"
  148. TRY(bit_stream.write_bits(code_lengths_count - 4u, 4u));
  149. for (size_t i = 0; i < code_lengths_count; i++) {
  150. TRY(bit_stream.write_bits(code_lengths_bit_lengths[kCodeLengthCodeOrder[i]], 3));
  151. }
  152. // Write code lengths. This is slightly different from deflate too -- deflate writes literal and distance lengths here,
  153. // while WebP writes one of these codes each for g, r, b, a, and distance.
  154. if (alphabet_size == encoded_lengths_count) {
  155. TRY(bit_stream.write_bits(0u, 1u)); // max_symbol is alphabet_size
  156. } else {
  157. TRY(bit_stream.write_bits(1u, 1u)); // max_symbol is explicitly coded
  158. // "int length_nbits = 2 + 2 * ReadBits(3);
  159. // int max_symbol = 2 + ReadBits(length_nbits);"
  160. // => length_nbits is at most 2 + 2*7 == 16
  161. unsigned needed_length_nbits = ceil(log2(encoded_lengths_count - 2));
  162. VERIFY(needed_length_nbits <= 16);
  163. needed_length_nbits = ceil_div(needed_length_nbits, 2) * 2;
  164. TRY(bit_stream.write_bits((needed_length_nbits - 2) / 2, 3u)); // length_nbits = 2 + 2 * 3 == 8
  165. TRY(bit_stream.write_bits(encoded_lengths_count - 2, needed_length_nbits)); // max_symbol = 2 + 254
  166. }
  167. // The rest is identical to write_dynamic_huffman() again. (Code 16 has different semantics, but that doesn't matter here.)
  168. auto code_lengths_code = MUST(CanonicalCode::from_bytes(code_lengths_bit_lengths));
  169. for (size_t i = 0; i < encoded_lengths_count; i++) {
  170. auto encoded_length = encoded_lengths[i];
  171. TRY(code_lengths_code.write_symbol(bit_stream, encoded_length.symbol));
  172. if (encoded_length.symbol == 16) {
  173. // "Code 16 repeats the previous non-zero value [3..6] times, i.e., 3 + ReadBits(2) times."
  174. TRY(bit_stream.write_bits<u8>(encoded_length.count - 3, 2));
  175. } else if (encoded_length.symbol == 17) {
  176. // "Code 17 emits a streak of zeros [3..10], i.e., 3 + ReadBits(3) times."
  177. TRY(bit_stream.write_bits<u8>(encoded_length.count - 3, 3));
  178. } else if (encoded_length.symbol == 18) {
  179. // "Code 18 emits a streak of zeros of length [11..138], i.e., 11 + ReadBits(7) times."
  180. TRY(bit_stream.write_bits<u8>(encoded_length.count - 11, 7));
  181. }
  182. }
  183. return CanonicalCode::from_bytes(bit_lengths.span().trim(code_count));
  184. }
  185. static ErrorOr<void> write_VP8L_image_data(Stream& stream, Bitmap const& bitmap)
  186. {
  187. LittleEndianOutputBitStream bit_stream { MaybeOwned<Stream>(stream) };
  188. // optional-transform = (%b1 transform optional-transform) / %b0
  189. TRY(bit_stream.write_bits(0u, 1u)); // No transform for now.
  190. // https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification#5_image_data
  191. // spatially-coded-image = color-cache-info meta-prefix data
  192. // color-cache-info = %b0
  193. // color-cache-info =/ (%b1 4BIT) ; 1 followed by color cache size
  194. TRY(bit_stream.write_bits(0u, 1u)); // No color cache for now.
  195. // meta-prefix = %b0 / (%b1 entropy-image)
  196. TRY(bit_stream.write_bits(0u, 1u)); // No meta prefix for now.
  197. // data = prefix-codes lz77-coded-image
  198. // prefix-codes = prefix-code-group *prefix-codes
  199. // prefix-code-group =
  200. // 5prefix-code ; See "Interpretation of Meta Prefix Codes" to
  201. // ; understand what each of these five prefix
  202. // ; codes are for.
  203. // We're writing a single prefix-code-group.
  204. // "These codes are (in bitstream order):
  205. // Prefix code #1: Used for green channel, backward-reference length, and color cache.
  206. // Prefix code #2, #3, and #4: Used for red, blue, and alpha channels, respectively.
  207. // Prefix code #5: Used for backward-reference distance."
  208. // We use neither back-references not color cache entries yet.
  209. // We can make this smarter later on.
  210. size_t const color_cache_size = 0;
  211. constexpr Array alphabet_sizes = to_array<size_t>({ 256 + 24 + static_cast<size_t>(color_cache_size), 256, 256, 256, 40 });
  212. // If you add support for color cache: At the moment, CanonicalCodes does not support writing more than 288 symbols.
  213. if (alphabet_sizes[0] > 288)
  214. return Error::from_string_literal("Invalid alphabet size");
  215. // We do use huffman coding by writing a single prefix-code-group for the entire image.
  216. // FIXME: Consider using a meta-prefix image and using one prefix-code-group per tile.
  217. // FIXME: generate_huffman_lengths() currently halves a frequency cap if the maximum bit length is reached.
  218. // This has the effect of giving very frequent symbols a higher bit length than they would have otherwise.
  219. // Instead, try dividing frequencies by 2 if the maximum bit length is reached.
  220. // Then, low-frequency symbols will get a higher bit length than they would have otherwise, which might help
  221. // compressed size. (For deflate, it doesn't matter much since their blocks are 64kiB large, but for WebP
  222. // we currently use a single huffman tree per channel for the entire image.)
  223. Array<Array<u16, 256>, 4> symbol_frequencies {};
  224. for (ARGB32 pixel : bitmap) {
  225. static constexpr auto saturating_increment = [](u16& value) {
  226. if (value < UINT16_MAX)
  227. value++;
  228. };
  229. saturating_increment(symbol_frequencies[0][(pixel >> 8) & 0xff]); // green
  230. saturating_increment(symbol_frequencies[1][(pixel >> 16) & 0xff]); // red
  231. saturating_increment(symbol_frequencies[2][pixel & 0xff]); // blue
  232. saturating_increment(symbol_frequencies[3][pixel >> 24]); // alpha
  233. }
  234. Array<Array<u8, 256>, 4> code_lengths {};
  235. for (int i = 0; i < 4; ++i) {
  236. // "Code [0..15] indicates literal code lengths." => the maximum bit length is 15.
  237. Compress::generate_huffman_lengths(code_lengths[i], symbol_frequencies[i], 15);
  238. }
  239. PrefixCodeGroup prefix_code_group;
  240. for (int i = 0; i < 4; ++i) {
  241. u8 symbols[2];
  242. unsigned non_zero_symbol_count = 0;
  243. for (int j = 0; j < 256; ++j) {
  244. if (code_lengths[i][j] != 0) {
  245. if (non_zero_symbol_count < 2)
  246. symbols[non_zero_symbol_count] = j;
  247. non_zero_symbol_count++;
  248. }
  249. }
  250. if (non_zero_symbol_count <= 2)
  251. prefix_code_group[i] = TRY(write_simple_code_lengths(bit_stream, { symbols, non_zero_symbol_count }));
  252. else
  253. prefix_code_group[i] = TRY(write_normal_code_lengths(bit_stream, code_lengths[i], alphabet_sizes[i]));
  254. }
  255. // For code #5, use a simple empty code, since we don't use this yet.
  256. prefix_code_group[4] = TRY(write_simple_code_lengths(bit_stream, {}));
  257. // Image data.
  258. TRY(write_image_data(bit_stream, bitmap, prefix_code_group));
  259. // FIXME: Make ~LittleEndianOutputBitStream do this, or make it VERIFY() that it has happened at least.
  260. TRY(bit_stream.align_to_byte_boundary());
  261. TRY(bit_stream.flush_buffer_to_stream());
  262. return {};
  263. }
  264. ErrorOr<ByteBuffer> compress_VP8L_image_data(Bitmap const& bitmap)
  265. {
  266. AllocatingMemoryStream vp8l_data_stream;
  267. TRY(write_VP8L_image_data(vp8l_data_stream, bitmap));
  268. return vp8l_data_stream.read_until_eof();
  269. }
  270. }