WebPLoaderLossy.cpp 58 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264
  1. /*
  2. * Copyright (c) 2023, Nico Weber <thakis@chromium.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <AK/Endian.h>
  8. #include <AK/Format.h>
  9. #include <AK/MemoryStream.h>
  10. #include <AK/Vector.h>
  11. #include <LibGfx/ImageFormats/BooleanDecoder.h>
  12. #include <LibGfx/ImageFormats/WebPLoaderLossy.h>
  13. #include <LibGfx/ImageFormats/WebPLoaderLossyTables.h>
  14. // Lossy format: https://datatracker.ietf.org/doc/html/rfc6386
  15. // Summary:
  16. // A lossy webp image is a VP8 keyframe.
  17. // A VP8 keyframe consists of 16x16 pixel tiles called macroblocks. Each macroblock is subdivided into 4x4 pixel tiles called subblocks.
  18. // Pixel values are stored as YUV 4:2:0. That is, each 4x4 luma pixels are covered by 1 pixel U chroma and 1 pixel V chroma.
  19. // This means one macroblock is covered by 4x4 Y subblocks and 2x2 U and V subblocks each.
  20. // VP8 data consists of:
  21. // * A tiny bit of uncompressed data, storing image dimensions and the size of the first compressed chunk of data, called the first partition
  22. // * The first partition, which is a entropy-coded bitstream storing:
  23. // 1. A fixed-size header.
  24. // The main piece of data this stores is a probability distribution for how pixel values of each macroblock are predicted from previously decoded data.
  25. // It also stores how may independent entropy-coded bitstreams are used to store the actual pixel data (for all images I've seen so far, just one).
  26. // 2. For each macroblock, it stores how that macroblock's pixel values are predicted from previously decoded data (and some more per-macroblock metadata).
  27. // There are independent prediction modes for Y, U, V.
  28. // U and V store a single prediction mode per macroblock.
  29. // Y can store a single prediction mode per macroblock, or it can store one subblock prediction mode for each of the 4x4 luma subblocks.
  30. // * One or more additional entropy-coded bitstreams ("partitions") that store the discrete cosine transform ("DCT") coefficients for the actual pixel data for each macroblock.
  31. // Each macroblock is subdivided into 4x4 tiles called "subblocks". A 16x16 pixel macroblock consists of:
  32. // 0. If the macroblock stores 4x4 luma subblock prediction modes, the 4x4 DC coefficients of each subblock's DCT are stored at the start of the macroblock's data,
  33. // as coefficients of an inverse Walsh-Hadamard Transform (WHT).
  34. // 1. 4x4 luma subblocks
  35. // 2. 2x2 U chrome subblocks
  36. // 3. 2x2 U chrome subblocks
  37. // That is, each macroblock stores 24 or 25 sets of coefficients.
  38. // Each set of coefficients stores 16 numbers, using a combination of a custom prefix tree and dequantization.
  39. // The inverse DCT output is added to the output of the prediction.
  40. namespace Gfx {
  41. // https://developers.google.com/speed/webp/docs/riff_container#simple_file_format_lossy
  42. // https://datatracker.ietf.org/doc/html/rfc6386#section-19 "Annex A: Bitstream Syntax"
  43. ErrorOr<VP8Header> decode_webp_chunk_VP8_header(ReadonlyBytes vp8_data)
  44. {
  45. if (vp8_data.size() < 10)
  46. return Error::from_string_literal("WebPImageDecoderPlugin: 'VP8 ' chunk too small");
  47. // FIXME: Eventually, this should probably call into LibVideo/VP8,
  48. // and image decoders should move into LibImageDecoders which depends on both LibGfx and LibVideo.
  49. // (LibVideo depends on LibGfx, so LibGfx can't depend on LibVideo itself.)
  50. // https://datatracker.ietf.org/doc/html/rfc6386#section-4 "Overview of Compressed Data Format"
  51. // "The decoder is simply presented with a sequence of compressed frames [...]
  52. // The first frame presented to the decompressor is [...] a key frame. [...]
  53. // [E]very compressed frame has three or more pieces. It begins with an uncompressed data chunk comprising 10 bytes in the case of key frames"
  54. u8 const* data = vp8_data.data();
  55. // https://datatracker.ietf.org/doc/html/rfc6386#section-9.1 "Uncompressed Data Chunk"
  56. u32 frame_tag = data[0] | (data[1] << 8) | (data[2] << 16);
  57. bool is_key_frame = (frame_tag & 1) == 0; // https://www.rfc-editor.org/errata/eid5534
  58. u8 version = (frame_tag & 0xe) >> 1;
  59. bool show_frame = (frame_tag & 0x10) != 0;
  60. u32 size_of_first_partition = frame_tag >> 5;
  61. if (!is_key_frame)
  62. return Error::from_string_literal("WebPImageDecoderPlugin: 'VP8 ' chunk not a key frame");
  63. if (!show_frame)
  64. return Error::from_string_literal("WebPImageDecoderPlugin: 'VP8 ' chunk has invalid visibility for webp image");
  65. if (version > 3)
  66. return Error::from_string_literal("WebPImageDecoderPlugin: unknown version number in 'VP8 ' chunk");
  67. u32 start_code = data[3] | (data[4] << 8) | (data[5] << 16);
  68. if (start_code != 0x2a019d) // https://www.rfc-editor.org/errata/eid7370
  69. return Error::from_string_literal("WebPImageDecoderPlugin: 'VP8 ' chunk invalid start_code");
  70. // "The scaling specifications for each dimension are encoded as follows.
  71. // 0 | No upscaling (the most common case).
  72. // 1 | Upscale by 5/4.
  73. // 2 | Upscale by 5/3.
  74. // 3 | Upscale by 2."
  75. // This is a display-time operation and doesn't affect decoding."
  76. u16 width_and_horizontal_scale = data[6] | (data[7] << 8);
  77. u16 width = width_and_horizontal_scale & 0x3fff;
  78. u8 horizontal_scale = width_and_horizontal_scale >> 14;
  79. u16 heigth_and_vertical_scale = data[8] | (data[9] << 8);
  80. u16 height = heigth_and_vertical_scale & 0x3fff;
  81. u8 vertical_scale = heigth_and_vertical_scale >> 14;
  82. dbgln_if(WEBP_DEBUG, "version {}, show_frame {}, size_of_first_partition {}, width {}, horizontal_scale {}, height {}, vertical_scale {}",
  83. version, show_frame, size_of_first_partition, width, horizontal_scale, height, vertical_scale);
  84. if (vp8_data.size() < 10 + size_of_first_partition)
  85. return Error::from_string_literal("WebPImageDecoderPlugin: 'VP8 ' chunk too small for full first partition");
  86. return VP8Header { version, show_frame, size_of_first_partition, width, horizontal_scale, height, vertical_scale, vp8_data.slice(10, size_of_first_partition), vp8_data.slice(10 + size_of_first_partition) };
  87. }
  88. namespace {
  89. // Reads n bits followed by a sign bit (0: positive, 1: negative).
  90. ErrorOr<i8> read_signed_literal(BooleanDecoder& decoder, u8 n)
  91. {
  92. VERIFY(n <= 7);
  93. i8 i = TRY(decoder.read_literal(n));
  94. if (TRY(decoder.read_literal(1)))
  95. i = -i;
  96. return i;
  97. }
  98. // https://datatracker.ietf.org/doc/html/rfc6386#section-19 "Annex A: Bitstream Syntax"
  99. #define L(n) decoder.read_literal(n)
  100. #define B(prob) decoder.read_bool(prob)
  101. #define L_signed(n) read_signed_literal(decoder, n)
  102. // https://datatracker.ietf.org/doc/html/rfc6386#section-9.2 "Color Space and Pixel Type (Key Frames Only)"
  103. enum class ColorSpaceAndPixelType {
  104. YUV = 0,
  105. ReservedForFutureUse = 1,
  106. };
  107. enum class ClampingSpecification {
  108. DecoderMustClampTo0To255 = 0,
  109. NoClampingNecessary = 1,
  110. };
  111. // https://datatracker.ietf.org/doc/html/rfc6386#section-9.3 Segment-Based Adjustments"
  112. // https://datatracker.ietf.org/doc/html/rfc6386#section-19.2 "Frame Header"
  113. enum class SegmentFeatureMode {
  114. // Spec 19.2 says 0 is delta, 1 absolute; spec 9.3 has it the other way round. 19.2 is correct.
  115. // https://www.rfc-editor.org/errata/eid7519
  116. DeltaValueMode = 0,
  117. AbsoluteValueMode = 1,
  118. };
  119. struct Segmentation {
  120. bool update_macroblock_segmentation_map { false };
  121. SegmentFeatureMode segment_feature_mode { SegmentFeatureMode::DeltaValueMode };
  122. i8 quantizer_update_value[4] {};
  123. i8 loop_filter_update_value[4] {};
  124. u8 macroblock_segment_tree_probabilities[3] = { 255, 255, 255 };
  125. };
  126. ErrorOr<Segmentation> decode_VP8_frame_header_segmentation(BooleanDecoder&);
  127. // Also https://datatracker.ietf.org/doc/html/rfc6386#section-9.6 "Dequantization Indices"
  128. struct QuantizationIndices {
  129. u8 y_ac { 0 };
  130. i8 y_dc_delta { 0 };
  131. i8 y2_dc_delta { 0 };
  132. i8 y2_ac_delta { 0 };
  133. i8 uv_dc_delta { 0 };
  134. i8 uv_ac_delta { 0 };
  135. };
  136. ErrorOr<QuantizationIndices> decode_VP8_frame_header_quantization_indices(BooleanDecoder&);
  137. struct LoopFilterAdjustment {
  138. bool enable_loop_filter_adjustment { false };
  139. i8 ref_frame_delta[4] {};
  140. i8 mb_mode_delta[4] {};
  141. };
  142. ErrorOr<LoopFilterAdjustment> decode_VP8_frame_header_loop_filter_adjustment(BooleanDecoder&);
  143. using CoefficientProbabilities = Prob[4][8][3][num_dct_tokens - 1];
  144. ErrorOr<void> decode_VP8_frame_header_coefficient_probabilities(BooleanDecoder&, CoefficientProbabilities);
  145. // https://datatracker.ietf.org/doc/html/rfc6386#section-15 "Loop Filter"
  146. // "The first is a flag (filter_type) selecting the type of filter (normal or simple)"
  147. enum class FilterType {
  148. Normal = 0,
  149. Simple = 1,
  150. };
  151. // https://datatracker.ietf.org/doc/html/rfc6386#section-19.2 "Frame Header"
  152. struct FrameHeader {
  153. ColorSpaceAndPixelType color_space {};
  154. ClampingSpecification clamping_type {};
  155. bool is_segmentation_enabled {};
  156. Segmentation segmentation {};
  157. FilterType filter_type {};
  158. u8 loop_filter_level {};
  159. u8 sharpness_level {};
  160. LoopFilterAdjustment loop_filter_adjustment {};
  161. u8 number_of_dct_partitions {};
  162. QuantizationIndices quantization_indices {};
  163. CoefficientProbabilities coefficient_probabilities;
  164. bool enable_skipping_of_macroblocks_containing_only_zero_coefficients {};
  165. u8 probability_skip_false;
  166. };
  167. ErrorOr<FrameHeader> decode_VP8_frame_header(BooleanDecoder& decoder)
  168. {
  169. // https://datatracker.ietf.org/doc/html/rfc6386#section-19.2 "Frame Header"
  170. FrameHeader header;
  171. // In the VP8 spec, this is in an `if (key_frames)`, but webp files only have key frames.
  172. header.color_space = ColorSpaceAndPixelType { TRY(L(1)) };
  173. header.clamping_type = ClampingSpecification { TRY(L(1)) };
  174. dbgln_if(WEBP_DEBUG, "color_space {} clamping_type {}", (int)header.color_space, (int)header.clamping_type);
  175. // https://datatracker.ietf.org/doc/html/rfc6386#section-9.3 "Segment-Based Adjustments"
  176. header.is_segmentation_enabled = TRY(L(1));
  177. dbgln_if(WEBP_DEBUG, "segmentation_enabled {}", header.is_segmentation_enabled);
  178. if (header.is_segmentation_enabled)
  179. header.segmentation = TRY(decode_VP8_frame_header_segmentation(decoder));
  180. header.filter_type = FilterType { TRY(L(1)) };
  181. header.loop_filter_level = TRY(L(6));
  182. header.sharpness_level = TRY(L(3));
  183. dbgln_if(WEBP_DEBUG, "filter_type {} loop_filter_level {} sharpness_level {}", (int)header.filter_type, header.loop_filter_level, header.sharpness_level);
  184. header.loop_filter_adjustment = TRY(decode_VP8_frame_header_loop_filter_adjustment(decoder));
  185. u8 log2_nbr_of_dct_partitions = TRY(L(2));
  186. dbgln_if(WEBP_DEBUG, "log2_nbr_of_dct_partitions {}", log2_nbr_of_dct_partitions);
  187. header.number_of_dct_partitions = 1 << log2_nbr_of_dct_partitions;
  188. header.quantization_indices = TRY(decode_VP8_frame_header_quantization_indices(decoder));
  189. // In the VP8 spec, this is in an `if (key_frames)` followed by a lengthy `else`, but webp files only have key frames.
  190. u8 refresh_entropy_probs = TRY(L(1)); // Has no effect in webp files.
  191. dbgln_if(WEBP_DEBUG, "refresh_entropy_probs {}", refresh_entropy_probs);
  192. memcpy(header.coefficient_probabilities, DEFAULT_COEFFICIENT_PROBABILITIES, sizeof(header.coefficient_probabilities));
  193. TRY(decode_VP8_frame_header_coefficient_probabilities(decoder, header.coefficient_probabilities));
  194. // https://datatracker.ietf.org/doc/html/rfc6386#section-9.11 "Remaining Frame Header Data (Key Frame)"
  195. header.enable_skipping_of_macroblocks_containing_only_zero_coefficients = TRY(L(1));
  196. dbgln_if(WEBP_DEBUG, "mb_no_skip_coeff {}", header.enable_skipping_of_macroblocks_containing_only_zero_coefficients);
  197. if (header.enable_skipping_of_macroblocks_containing_only_zero_coefficients) {
  198. header.probability_skip_false = TRY(L(8));
  199. dbgln_if(WEBP_DEBUG, "prob_skip_false {}", header.probability_skip_false);
  200. }
  201. // In the VP8 spec, there is a length `if (!key_frames)` here, but webp files only have key frames.
  202. return header;
  203. }
  204. ErrorOr<Segmentation> decode_VP8_frame_header_segmentation(BooleanDecoder& decoder)
  205. {
  206. // Corresponds to "update_segmentation()" in section 19.2 of the spec.
  207. Segmentation segmentation;
  208. segmentation.update_macroblock_segmentation_map = TRY(L(1));
  209. u8 update_segment_feature_data = TRY(L(1));
  210. dbgln_if(WEBP_DEBUG, "update_mb_segmentation_map {} update_segment_feature_data {}",
  211. segmentation.update_macroblock_segmentation_map, update_segment_feature_data);
  212. if (update_segment_feature_data) {
  213. segmentation.segment_feature_mode = static_cast<SegmentFeatureMode>(TRY(L(1)));
  214. dbgln_if(WEBP_DEBUG, "segment_feature_mode {}", (int)segmentation.segment_feature_mode);
  215. for (int i = 0; i < 4; ++i) {
  216. u8 quantizer_update = TRY(L(1));
  217. dbgln_if(WEBP_DEBUG, "quantizer_update {}", quantizer_update);
  218. if (quantizer_update) {
  219. i8 quantizer_update_value = TRY(L_signed(7));
  220. dbgln_if(WEBP_DEBUG, "quantizer_update_value {}", quantizer_update_value);
  221. segmentation.quantizer_update_value[i] = quantizer_update_value;
  222. }
  223. }
  224. for (int i = 0; i < 4; ++i) {
  225. u8 loop_filter_update = TRY(L(1));
  226. dbgln_if(WEBP_DEBUG, "loop_filter_update {}", loop_filter_update);
  227. if (loop_filter_update) {
  228. i8 loop_filter_update_value = TRY(L_signed(6));
  229. dbgln_if(WEBP_DEBUG, "loop_filter_update_value {}", loop_filter_update_value);
  230. segmentation.loop_filter_update_value[i] = loop_filter_update_value;
  231. }
  232. }
  233. }
  234. if (segmentation.update_macroblock_segmentation_map) {
  235. // This reads mb_segment_tree_probs for https://datatracker.ietf.org/doc/html/rfc6386#section-10.
  236. for (int i = 0; i < 3; ++i) {
  237. u8 segment_prob_update = TRY(L(1));
  238. dbgln_if(WEBP_DEBUG, "segment_prob_update {}", segment_prob_update);
  239. if (segment_prob_update) {
  240. u8 segment_prob = TRY(L(8));
  241. dbgln_if(WEBP_DEBUG, "segment_prob {}", segment_prob);
  242. segmentation.macroblock_segment_tree_probabilities[i] = segment_prob;
  243. }
  244. }
  245. }
  246. return segmentation;
  247. }
  248. ErrorOr<QuantizationIndices> decode_VP8_frame_header_quantization_indices(BooleanDecoder& decoder)
  249. {
  250. // Corresponds to "quant_indices()" in section 19.2 of the spec.
  251. QuantizationIndices quantization_indices;
  252. // "The first 7-bit index gives the dequantization table index for
  253. // Y-plane AC coefficients, called yac_qi. It is always coded and acts
  254. // as a baseline for the other 5 quantization indices, each of which is
  255. // represented by a delta from this baseline index."
  256. quantization_indices.y_ac = TRY(L(7));
  257. dbgln_if(WEBP_DEBUG, "y_ac_qi {}", quantization_indices.y_ac);
  258. auto read_delta = [&decoder](StringView name, i8* destination) -> ErrorOr<void> {
  259. u8 is_present = TRY(L(1));
  260. dbgln_if(WEBP_DEBUG, "{}_present {}", name, is_present);
  261. if (is_present) {
  262. i8 delta = TRY(L_signed(4));
  263. dbgln_if(WEBP_DEBUG, "{} {}", name, delta);
  264. *destination = delta;
  265. }
  266. return {};
  267. };
  268. TRY(read_delta("y_dc_delta"sv, &quantization_indices.y_dc_delta));
  269. TRY(read_delta("y2_dc_delta"sv, &quantization_indices.y2_dc_delta));
  270. TRY(read_delta("y2_ac_delta"sv, &quantization_indices.y2_ac_delta));
  271. TRY(read_delta("uv_dc_delta"sv, &quantization_indices.uv_dc_delta));
  272. TRY(read_delta("uv_ac_delta"sv, &quantization_indices.uv_ac_delta));
  273. return quantization_indices;
  274. }
  275. ErrorOr<LoopFilterAdjustment> decode_VP8_frame_header_loop_filter_adjustment(BooleanDecoder& decoder)
  276. {
  277. // Corresponds to "mb_lf_adjustments()" in section 19.2 of the spec.
  278. LoopFilterAdjustment adjustment;
  279. adjustment.enable_loop_filter_adjustment = TRY(L(1));
  280. if (adjustment.enable_loop_filter_adjustment) {
  281. u8 mode_ref_lf_delta_update = TRY(L(1));
  282. dbgln_if(WEBP_DEBUG, "mode_ref_lf_delta_update {}", mode_ref_lf_delta_update);
  283. if (mode_ref_lf_delta_update) {
  284. for (int i = 0; i < 4; ++i) {
  285. u8 ref_frame_delta_update_flag = TRY(L(1));
  286. dbgln_if(WEBP_DEBUG, "ref_frame_delta_update_flag {}", ref_frame_delta_update_flag);
  287. if (ref_frame_delta_update_flag) {
  288. i8 delta = TRY(L_signed(6));
  289. dbgln_if(WEBP_DEBUG, "delta {}", delta);
  290. adjustment.ref_frame_delta[i] = delta;
  291. }
  292. }
  293. for (int i = 0; i < 4; ++i) {
  294. u8 mb_mode_delta_update_flag = TRY(L(1));
  295. dbgln_if(WEBP_DEBUG, "mb_mode_delta_update_flag {}", mb_mode_delta_update_flag);
  296. if (mb_mode_delta_update_flag) {
  297. i8 delta = TRY(L_signed(6));
  298. dbgln_if(WEBP_DEBUG, "delta {}", delta);
  299. adjustment.mb_mode_delta[i] = delta;
  300. }
  301. }
  302. }
  303. }
  304. return adjustment;
  305. }
  306. ErrorOr<void> decode_VP8_frame_header_coefficient_probabilities(BooleanDecoder& decoder, CoefficientProbabilities coefficient_probabilities)
  307. {
  308. // Corresponds to "token_prob_update()" in section 19.2 of the spec.
  309. for (int i = 0; i < 4; i++) {
  310. for (int j = 0; j < 8; j++) {
  311. for (int k = 0; k < 3; k++) {
  312. for (int l = 0; l < 11; l++) {
  313. // token_prob_update() says L(1) and L(8), but it's actually B(p) and L(8).
  314. // https://datatracker.ietf.org/doc/html/rfc6386#section-13.4 "Token Probability Updates" describes it correctly.
  315. if (TRY(B(COEFFICIENT_UPDATE_PROBABILITIES[i][j][k][l])))
  316. coefficient_probabilities[i][j][k][l] = TRY(L(8));
  317. }
  318. }
  319. }
  320. }
  321. return {};
  322. }
  323. // https://datatracker.ietf.org/doc/html/rfc6386#section-8.1 "Tree Coding Implementation"
  324. ErrorOr<u8> tree_decode(BooleanDecoder& decoder, ReadonlySpan<TreeIndex> tree, ReadonlyBytes probabilities, TreeIndex initial_i = 0)
  325. {
  326. TreeIndex i = initial_i;
  327. while (true) {
  328. u8 b = TRY(B(probabilities[i >> 1]));
  329. i = tree[i + b];
  330. if (i <= 0)
  331. return -i;
  332. }
  333. }
  334. // Similar to BlockContext in LibVideo/VP9/Context.h
  335. struct MacroblockMetadata {
  336. // https://datatracker.ietf.org/doc/html/rfc6386#section-10 "Segment-Based Feature Adjustments"
  337. // Read only if `update_mb_segmentation_map` is set.
  338. int segment_id { 0 }; // 0, 1, 2, or 3. Fits in two bits.
  339. // https://datatracker.ietf.org/doc/html/rfc6386#section-11.1 "mb_skip_coeff"
  340. bool skip_coefficients { false };
  341. IntraMacroblockMode intra_y_mode;
  342. IntraMacroblockMode uv_mode;
  343. IntraBlockMode intra_b_modes[16];
  344. };
  345. ErrorOr<Vector<MacroblockMetadata>> decode_VP8_macroblock_metadata(BooleanDecoder& decoder, FrameHeader const& header, int macroblock_width, int macroblock_height)
  346. {
  347. // https://datatracker.ietf.org/doc/html/rfc6386#section-19.3
  348. // Corresponds to "macroblock_header()" in section 19.3 of the spec.
  349. Vector<MacroblockMetadata> macroblock_metadata;
  350. // Key frames must use intra prediction, that is new macroblocks are predicted from old macroblocks in the same frame.
  351. // (Inter prediction on the other hand predicts new macroblocks from the corresponding macroblock in the previous frame.)
  352. // https://datatracker.ietf.org/doc/html/rfc6386#section-11.3 "Subblock Mode Contexts"
  353. // "For macroblocks on the top row or left edge of the image, some of
  354. // the predictors will be non-existent. Such predictors are taken
  355. // to have had the value B_DC_PRED, which, perhaps conveniently,
  356. // takes the value 0 in the enumeration above.
  357. // A simple management scheme for these contexts might maintain a row
  358. // of above predictors and four left predictors. Before decoding the
  359. // frame, the entire row is initialized to B_DC_PRED; before decoding
  360. // each row of macroblocks, the four left predictors are also set to
  361. // B_DC_PRED. After decoding a macroblock, the bottom four subblock
  362. // modes are copied into the row predictor (at the current position,
  363. // which then advances to be above the next macroblock), and the
  364. // right four subblock modes are copied into the left predictor."
  365. Vector<IntraBlockMode> above;
  366. TRY(above.try_resize(macroblock_width * 4)); // One per 4x4 subblock.
  367. // It's possible to not decode all macroblock metadata at once. Instead, this could for example decode one row of metadata,
  368. // then decode the coefficients for one row of macroblocks, convert that row to pixels, and then go on to the next row of macroblocks.
  369. // That'd require slightly less memory. But MacroblockMetadata is fairly small, and this way we can keep the context
  370. // (`above`, `left`) in stack variables instead of having to have a class for that. So keep it simple for now.
  371. for (int mb_y = 0; mb_y < macroblock_height; ++mb_y) {
  372. IntraBlockMode left[4] {};
  373. for (int mb_x = 0; mb_x < macroblock_width; ++mb_x) {
  374. MacroblockMetadata metadata;
  375. if (header.segmentation.update_macroblock_segmentation_map)
  376. metadata.segment_id = TRY(tree_decode(decoder, MACROBLOCK_SEGMENT_TREE, header.segmentation.macroblock_segment_tree_probabilities));
  377. if (header.enable_skipping_of_macroblocks_containing_only_zero_coefficients)
  378. metadata.skip_coefficients = TRY(B(header.probability_skip_false));
  379. int intra_y_mode = TRY(tree_decode(decoder, KEYFRAME_YMODE_TREE, KEYFRAME_YMODE_PROBABILITIES));
  380. metadata.intra_y_mode = (IntraMacroblockMode)intra_y_mode;
  381. // "If the Ymode is B_PRED, it is followed by a (tree-coded) mode for each of the 16 Y subblocks."
  382. if (intra_y_mode == B_PRED) {
  383. for (int y = 0; y < 4; ++y) {
  384. for (int x = 0; x < 4; ++x) {
  385. // "The outer two dimensions of this array are indexed by the already-
  386. // coded subblock modes above and to the left of the current block,
  387. // respectively."
  388. int A = above[mb_x * 4 + x];
  389. int L = left[y];
  390. auto intra_b_mode = static_cast<IntraBlockMode>(TRY(tree_decode(decoder, BLOCK_MODE_TREE, KEYFRAME_BLOCK_MODE_PROBABILITIES[A][L])));
  391. metadata.intra_b_modes[y * 4 + x] = intra_b_mode;
  392. above[mb_x * 4 + x] = intra_b_mode;
  393. left[y] = intra_b_mode;
  394. }
  395. }
  396. } else {
  397. VERIFY(intra_y_mode < B_PRED);
  398. constexpr IntraBlockMode b_mode_from_y_mode[] = { B_DC_PRED, B_VE_PRED, B_HE_PRED, B_TM_PRED };
  399. IntraBlockMode intra_b_mode = b_mode_from_y_mode[intra_y_mode];
  400. for (int i = 0; i < 4; ++i) {
  401. above[mb_x * 4 + i] = intra_b_mode;
  402. left[i] = intra_b_mode;
  403. }
  404. }
  405. metadata.uv_mode = (IntraMacroblockMode)TRY(tree_decode(decoder, UV_MODE_TREE, KEYFRAME_UV_MODE_PROBABILITIES));
  406. TRY(macroblock_metadata.try_append(metadata));
  407. }
  408. }
  409. return macroblock_metadata;
  410. }
  411. // Every macroblock stores:
  412. // - One optional set of coefficients for Y2
  413. // - 16 sets of Y coefficients for the 4x4 Y subblocks of the macroblock
  414. // - 4 sets of U coefficients for the 2x2 U subblocks of the macroblock
  415. // - 4 sets of V coefficients for the 2x2 V subblocks of the macroblock
  416. // That's 24 or 25 sets of coefficients total. This struct identifies one of these sets by index.
  417. // If a macroblock does not have Y2, then i goes from [1..25], else it goes [0..25].
  418. struct CoefficientBlockIndex {
  419. int i;
  420. CoefficientBlockIndex(int i)
  421. : i(i)
  422. {
  423. VERIFY(i >= 0);
  424. VERIFY(i <= 25);
  425. }
  426. bool is_y2() const { return i == 0; }
  427. bool is_y() const { return i >= 1 && i <= 16; }
  428. bool is_u() const { return i >= 17 && i <= 20; }
  429. bool is_v() const { return i >= 21; }
  430. u8 sub_x() const
  431. {
  432. VERIFY(i > 0);
  433. if (i <= 16)
  434. return (i - 1) % 4;
  435. if (i <= 20)
  436. return (i - 17) % 2;
  437. return (i - 21) % 2;
  438. }
  439. u8 sub_y() const
  440. {
  441. VERIFY(i > 0);
  442. if (i <= 16)
  443. return (i - 1) / 4;
  444. if (i <= 20)
  445. return (i - 17) / 2;
  446. return (i - 21) / 2;
  447. }
  448. };
  449. int plane_index(CoefficientBlockIndex index, bool have_y2)
  450. {
  451. // https://datatracker.ietf.org/doc/html/rfc6386#section-13.3 "Token Probabilities"
  452. // "o 0 - Y beginning at coefficient 1 (i.e., Y after Y2)
  453. // o 1 - Y2
  454. // o 2 - U or V
  455. // o 3 - Y beginning at coefficient 0 (i.e., Y in the absence of Y2)."
  456. if (index.is_y2())
  457. return 1;
  458. if (index.is_u() || index.is_v())
  459. return 2;
  460. if (have_y2)
  461. return 0;
  462. return 3;
  463. }
  464. ErrorOr<i16> coefficient_value_for_token(BooleanDecoder& decoder, u8 token)
  465. {
  466. // Implements the second half of https://datatracker.ietf.org/doc/html/rfc6386#section-13.2 "Coding of Individual Coefficient Values"
  467. i16 v = static_cast<i16>(token); // For DCT_0 to DCT4
  468. if (token >= dct_cat1 && token <= dct_cat6) {
  469. static int constexpr starts[] = { 5, 7, 11, 19, 35, 67 };
  470. static int constexpr bits[] = { 1, 2, 3, 4, 5, 11 };
  471. static Prob constexpr Pcat1[] = { 159 };
  472. static Prob constexpr Pcat2[] = { 165, 145 };
  473. static Prob constexpr Pcat3[] = { 173, 148, 140 };
  474. static Prob constexpr Pcat4[] = { 176, 155, 140, 135 };
  475. static Prob constexpr Pcat5[] = { 180, 157, 141, 134, 130 };
  476. static Prob constexpr Pcat6[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
  477. static Prob const* const Pcats[] = { Pcat1, Pcat2, Pcat3, Pcat4, Pcat5, Pcat6 };
  478. v = 0;
  479. // This loop corresponds to `DCTextra` in the spec in section 13.2.
  480. for (int i = 0; i < bits[token - dct_cat1]; ++i)
  481. v = (v << 1) | TRY(decoder.read_bool(Pcats[token - dct_cat1][i]));
  482. v += starts[token - dct_cat1];
  483. }
  484. if (v) {
  485. if (TRY(decoder.read_bool(128)))
  486. v = -v;
  487. }
  488. return v;
  489. }
  490. i16 dequantize_value(i16 value, bool is_dc, QuantizationIndices const& quantization_indices, Segmentation const& segmentation, int segment_id, CoefficientBlockIndex index)
  491. {
  492. // https://datatracker.ietf.org/doc/html/rfc6386#section-9.6 "Dequantization Indices"
  493. // "before inverting the transform, each decoded coefficient
  494. // is multiplied by one of six dequantization factors, the choice of
  495. // which depends on the plane (Y, chroma = U or V, Y2) and coefficient
  496. // position (DC = coefficient 0, AC = coefficients 1-15). The six
  497. // values are specified using 7-bit indices into six corresponding fixed
  498. // tables (the tables are given in Section 14)."
  499. // Section 14 then lists two (!) fixed tables (which are in WebPLoaderLossyTables.h)
  500. // "Lookup values from the above two tables are directly used in the DC
  501. // and AC coefficients in Y1, respectively. For Y2 and chroma, values
  502. // from the above tables undergo either scaling or clamping before the
  503. // multiplies. Details regarding these scaling and clamping processes
  504. // can be found in related lookup functions in dixie.c (Section 20.4)."
  505. // Apparently spec writing became too much work at this point. In section 20.4, in dequant_init():
  506. // * For y2, the output (!) of dc_qlookup is multiplied by 2, the output of ac_qlookup is multiplied by 155 / 100
  507. // * Also for y2, ac_qlookup is at least 8 for lower table entries (XXX!)
  508. // * For uv, the dc_qlookup index is clamped to 117 (instead of 127 for everything else)
  509. // (or, alternatively, the value is clamped to 132 at most)
  510. u8 y_ac_base = quantization_indices.y_ac;
  511. if (segmentation.update_macroblock_segmentation_map) {
  512. if (segmentation.segment_feature_mode == SegmentFeatureMode::DeltaValueMode)
  513. y_ac_base += segmentation.quantizer_update_value[segment_id];
  514. else
  515. y_ac_base = segmentation.quantizer_update_value[segment_id];
  516. }
  517. u8 dequantization_index;
  518. if (index.is_y2())
  519. dequantization_index = y_ac_base + (is_dc ? quantization_indices.y2_dc_delta : quantization_indices.y2_ac_delta);
  520. else if (index.is_u() || index.is_v())
  521. dequantization_index = y_ac_base + (is_dc ? quantization_indices.uv_dc_delta : quantization_indices.uv_ac_delta);
  522. else
  523. dequantization_index = is_dc ? (y_ac_base + quantization_indices.y_dc_delta) : y_ac_base;
  524. // clamp index
  525. if ((index.is_u() || index.is_v()) && is_dc)
  526. dequantization_index = min(dequantization_index, 117);
  527. else
  528. dequantization_index = min(dequantization_index, 127);
  529. // "the multiplies are computed and stored using 16-bit signed integers."
  530. i16 dequantization_factor;
  531. if (is_dc)
  532. dequantization_factor = (i16)dc_qlookup[dequantization_index];
  533. else
  534. dequantization_factor = (i16)ac_qlookup[dequantization_index];
  535. if (index.is_y2()) {
  536. if (is_dc)
  537. dequantization_factor *= 2;
  538. else
  539. dequantization_factor = (dequantization_factor * 155) / 100;
  540. }
  541. return dequantization_factor * value;
  542. }
  543. // Reading macroblock coefficients requires needing to know if the block to the left and above the current macroblock
  544. // has non-zero coefficients. This stores that state.
  545. struct CoefficientReadingContext {
  546. // Store if each plane has nonzero coefficients in the block above and to the left of the current block.
  547. Vector<bool> y2_above;
  548. Vector<bool> y_above;
  549. Vector<bool> u_above;
  550. Vector<bool> v_above;
  551. bool y2_left {};
  552. bool y_left[4] {};
  553. bool u_left[2] {};
  554. bool v_left[2] {};
  555. ErrorOr<void> initialize(int macroblock_width)
  556. {
  557. TRY(y2_above.try_resize(macroblock_width));
  558. TRY(y_above.try_resize(macroblock_width * 4));
  559. TRY(u_above.try_resize(macroblock_width * 2));
  560. TRY(v_above.try_resize(macroblock_width * 2));
  561. return {};
  562. }
  563. void start_new_row()
  564. {
  565. y2_left = false;
  566. for (bool& b : y_left)
  567. b = false;
  568. for (bool& b : u_left)
  569. b = false;
  570. for (bool& b : v_left)
  571. b = false;
  572. }
  573. bool& was_above_nonzero(CoefficientBlockIndex index, int mb_x)
  574. {
  575. if (index.is_y2())
  576. return y2_above[mb_x];
  577. if (index.is_u())
  578. return u_above[mb_x * 2 + index.sub_x()];
  579. if (index.is_v())
  580. return v_above[mb_x * 2 + index.sub_x()];
  581. return y_above[mb_x * 4 + index.sub_x()];
  582. }
  583. bool was_above_nonzero(CoefficientBlockIndex index, int mb_x) const { return const_cast<CoefficientReadingContext&>(*this).was_above_nonzero(index, mb_x); }
  584. bool& was_left_nonzero(CoefficientBlockIndex index)
  585. {
  586. if (index.is_y2())
  587. return y2_left;
  588. if (index.is_u())
  589. return u_left[index.sub_y()];
  590. if (index.is_v())
  591. return v_left[index.sub_y()];
  592. return y_left[index.sub_y()];
  593. }
  594. bool was_left_nonzero(CoefficientBlockIndex index) const { return const_cast<CoefficientReadingContext&>(*this).was_left_nonzero(index); }
  595. void update(CoefficientBlockIndex index, int mb_x, bool subblock_has_nonzero_coefficients)
  596. {
  597. was_above_nonzero(index, mb_x) = subblock_has_nonzero_coefficients;
  598. was_left_nonzero(index) = subblock_has_nonzero_coefficients;
  599. }
  600. };
  601. using Coefficients = i16[16];
  602. // Returns if any non-zero coefficients were read.
  603. ErrorOr<bool> read_coefficent_block(BooleanDecoder& decoder, Coefficients out_coefficients, CoefficientBlockIndex block_index, CoefficientReadingContext& coefficient_reading_context, int mb_x, bool have_y2, int segment_id, FrameHeader const& header)
  604. {
  605. // Corresponds to `residual_block()` in https://datatracker.ietf.org/doc/html/rfc6386#section-19.3,
  606. // but also does dequantization of the stored values.
  607. // "firstCoeff is 1 for luma blocks of macroblocks containing Y2 subblock; otherwise 0"
  608. int firstCoeff = have_y2 && block_index.is_y() ? 1 : 0;
  609. i16 last_decoded_value = num_dct_tokens; // Start with an invalid value
  610. bool subblock_has_nonzero_coefficients = false;
  611. for (int j = firstCoeff; j < 16; ++j) {
  612. // https://datatracker.ietf.org/doc/html/rfc6386#section-13.2 "Coding of Individual Coefficient Values"
  613. // https://datatracker.ietf.org/doc/html/rfc6386#section-13.3 "Token Probabilities"
  614. // "Working from the outside in, the outermost dimension is indexed by
  615. // the type of plane being decoded"
  616. int plane = plane_index(block_index, have_y2);
  617. // "The next dimension is selected by the position of the coefficient
  618. // being decoded. That position, c, steps by ones up to 15, starting
  619. // from zero for block types 1, 2, or 3 and starting from one for block
  620. // type 0. The second array index is then"
  621. // "block type" here seems to refer to the "type of plane" in the previous paragraph.
  622. static int constexpr coeff_bands[16] = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 };
  623. int band = coeff_bands[j];
  624. // "The third dimension is the trickiest."
  625. int tricky = 0;
  626. // "For the first coefficient (DC, unless the block type is 0), we
  627. // consider the (already encoded) blocks within the same plane (Y2, Y,
  628. // U, or V) above and to the left of the current block. The context
  629. // index is then the number (0, 1, or 2) of these blocks that had at
  630. // least one non-zero coefficient in their residue record. Specifically
  631. // for Y2, because macroblocks above and to the left may or may not have
  632. // a Y2 block, the block above is determined by the most recent
  633. // macroblock in the same column that has a Y2 block, and the block to
  634. // the left is determined by the most recent macroblock in the same row
  635. // that has a Y2 block.
  636. // [...]
  637. // As with other contexts used by VP8, the "neighboring block" context
  638. // described here needs a special definition for subblocks lying along
  639. // the top row or left edge of the frame. These "non-existent"
  640. // predictors above and to the left of the image are simply taken to be
  641. // empty -- that is, taken to contain no non-zero coefficients."
  642. if (j == firstCoeff) {
  643. bool was_left_nonzero = coefficient_reading_context.was_left_nonzero(block_index);
  644. bool was_above_nonzero = coefficient_reading_context.was_above_nonzero(block_index, mb_x);
  645. tricky = static_cast<int>(was_left_nonzero) + static_cast<int>(was_above_nonzero);
  646. }
  647. // "Beyond the first coefficient, the context index is determined by the
  648. // absolute value of the most recently decoded coefficient (necessarily
  649. // within the current block) and is 0 if the last coefficient was a
  650. // zero, 1 if it was plus or minus one, and 2 if its absolute value
  651. // exceeded one."
  652. else {
  653. if (last_decoded_value == 0)
  654. tricky = 0;
  655. else if (last_decoded_value == 1 || last_decoded_value == -1)
  656. tricky = 1;
  657. else
  658. tricky = 2;
  659. }
  660. // "In general, all DCT coefficients are decoded using the same tree.
  661. // However, if the preceding coefficient is a DCT_0, decoding will skip
  662. // the first branch, since it is not possible for dct_eob to follow a
  663. // DCT_0."
  664. u8 token = TRY(tree_decode(decoder, COEFFICIENT_TREE, header.coefficient_probabilities[plane][band][tricky], last_decoded_value == DCT_0 ? 2 : 0));
  665. if (token == dct_eob)
  666. break;
  667. i16 v = TRY(coefficient_value_for_token(decoder, token));
  668. if (v) {
  669. // Subblock has non-0 coefficients. Store that, so that `tricky` on the next subblock is initialized correctly.
  670. subblock_has_nonzero_coefficients = true;
  671. }
  672. // last_decoded_value is used for setting `tricky`. It needs to be set to the last decoded token, not to the last dequantized value.
  673. last_decoded_value = v;
  674. i16 dequantized_value = dequantize_value(v, j == 0, header.quantization_indices, header.segmentation, segment_id, block_index);
  675. static int constexpr Zigzag[] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 };
  676. out_coefficients[Zigzag[j]] = dequantized_value;
  677. }
  678. return subblock_has_nonzero_coefficients;
  679. }
  680. struct MacroblockCoefficients {
  681. Coefficients y_coeffs[16] {};
  682. Coefficients u_coeffs[4] {};
  683. Coefficients v_coeffs[4] {};
  684. };
  685. ErrorOr<MacroblockCoefficients> read_macroblock_coefficients(BooleanDecoder& decoder, FrameHeader const& header, CoefficientReadingContext& coefficient_reading_context, MacroblockMetadata const& metadata, int mb_x)
  686. {
  687. // Corresponds to `residual_data()` in https://datatracker.ietf.org/doc/html/rfc6386#section-19.3,
  688. // but also does the inverse walsh-hadamard transform if a Y2 block is present.
  689. MacroblockCoefficients coefficients;
  690. Coefficients y2_coeffs {};
  691. // "firstCoeff is 1 for luma blocks of macroblocks containing Y2 subblock; otherwise 0"
  692. // https://datatracker.ietf.org/doc/html/rfc6386#section-13
  693. // "For all intra- and inter-prediction modes apart from B_PRED (intra:
  694. // whose Y subblocks are independently predicted) and SPLITMV (inter),
  695. // each macroblock's residue record begins with the Y2 component of the
  696. // residue, coded using a WHT. B_PRED and SPLITMV coded macroblocks
  697. // omit this WHT and specify the 0th DCT coefficient in each of the 16 Y
  698. // subblocks."
  699. bool have_y2 = metadata.intra_y_mode != B_PRED;
  700. // "for Y2, because macroblocks above and to the left may or may not have
  701. // a Y2 block, the block above is determined by the most recent
  702. // macroblock in the same column that has a Y2 block, and the block to
  703. // the left is determined by the most recent macroblock in the same row
  704. // that has a Y2 block."
  705. // We only write to y2_above / y2_left when it's present, so we don't need to do any explicit work to get the right behavior.
  706. // "After the optional Y2 block, the residue record continues with 16
  707. // DCTs for the Y subblocks, followed by 4 DCTs for the U subblocks,
  708. // ending with 4 DCTs for the V subblocks. The subblocks occur in the
  709. // usual order."
  710. /* (1 Y2)?, 16 Y, 4 U, 4 V */
  711. for (int i = have_y2 ? 0 : 1; i < 25; ++i) {
  712. CoefficientBlockIndex block_index { i };
  713. bool subblock_has_nonzero_coefficients = false;
  714. if (!metadata.skip_coefficients) {
  715. i16* to_read;
  716. if (block_index.is_y2())
  717. to_read = y2_coeffs;
  718. else if (block_index.is_u())
  719. to_read = coefficients.u_coeffs[i - 17];
  720. else if (block_index.is_v())
  721. to_read = coefficients.v_coeffs[i - 21];
  722. else // Y
  723. to_read = coefficients.y_coeffs[i - 1];
  724. subblock_has_nonzero_coefficients = TRY(read_coefficent_block(decoder, to_read, block_index, coefficient_reading_context, mb_x, have_y2, metadata.segment_id, header));
  725. }
  726. coefficient_reading_context.update(block_index, mb_x, subblock_has_nonzero_coefficients);
  727. }
  728. // https://datatracker.ietf.org/doc/html/rfc6386#section-14.2 "Inverse Transforms"
  729. // "If the Y2 residue block exists (i.e., the macroblock luma mode is not
  730. // SPLITMV or B_PRED), it is inverted first (using the inverse WHT) and
  731. // the element of the result at row i, column j is used as the 0th
  732. // coefficient of the Y subblock at position (i, j), that is, the Y
  733. // subblock whose index is (i * 4) + j."
  734. if (have_y2) {
  735. Coefficients wht_output;
  736. vp8_short_inv_walsh4x4_c(y2_coeffs, wht_output);
  737. for (size_t i = 0; i < 16; ++i)
  738. coefficients.y_coeffs[i][0] = wht_output[i];
  739. }
  740. return coefficients;
  741. }
  742. template<int N>
  743. void predict_macroblock(Span<i16> prediction, IntraMacroblockMode mode, int mb_x, int mb_y, ReadonlySpan<i16> left, ReadonlySpan<i16> above, i16 truemotion_corner)
  744. {
  745. // https://datatracker.ietf.org/doc/html/rfc6386#section-12.2 "Chroma Prediction"
  746. // (Also used for the DC_PRED, H_PRED, V_PRED, TM_PRED for luma prediction.)
  747. if (mode == DC_PRED) {
  748. if (mb_x == 0 && mb_y == 0) {
  749. for (size_t i = 0; i < N * N; ++i)
  750. prediction[i] = 128;
  751. } else {
  752. int sum = 0, n = 0;
  753. if (mb_x > 0) {
  754. for (int i = 0; i < N; ++i)
  755. sum += left[i];
  756. n += N;
  757. }
  758. if (mb_y > 0) {
  759. for (int i = 0; i < N; ++i)
  760. sum += above[mb_x * N + i];
  761. n += N;
  762. }
  763. i16 average = (sum + n / 2) / n;
  764. for (size_t i = 0; i < N * N; ++i)
  765. prediction[i] = average;
  766. }
  767. } else if (mode == H_PRED) {
  768. for (int y = 0; y < N; ++y)
  769. for (int x = 0; x < N; ++x)
  770. prediction[y * N + x] = left[y];
  771. } else if (mode == V_PRED) {
  772. for (int y = 0; y < N; ++y)
  773. for (int x = 0; x < N; ++x)
  774. prediction[y * N + x] = above[mb_x * N + x];
  775. } else {
  776. VERIFY(mode == TM_PRED);
  777. for (int y = 0; y < N; ++y)
  778. for (int x = 0; x < N; ++x)
  779. prediction[y * N + x] = left[y] + above[mb_x * N + x] - truemotion_corner;
  780. }
  781. }
  782. void predict_y_subblock(Span<i16> y_prediction, IntraBlockMode mode, int x, int y, ReadonlySpan<i16> left, ReadonlySpan<i16> above, i16 corner)
  783. {
  784. // https://datatracker.ietf.org/doc/html/rfc6386#section-12.3 "Luma Prediction"
  785. // Roughly corresponds to "subblock_intra_predict()" in the spec.
  786. auto weighted_average = [](i16 x, i16 y, i16 z) { return (x + 2 * y + z + 2) / 4; };
  787. auto average = [](i16 x, i16 y) { return (x + y + 1) / 2; };
  788. auto at = [&y_prediction, y, x](int px, int py) -> i16& { return y_prediction[(4 * y + py) * 16 + 4 * x + px]; };
  789. if (mode == B_DC_PRED) {
  790. // XXX spec text says this is like DC_PRED but predict_dc_nxn() in the sample impl looks like it doesn't do the "oob isn't read" part. what's right?
  791. // DC16NoTopLeft_C vs DC4_C in libwebp dec.c / common_dec.h suggests the spec text is incomplete :/
  792. int sum = 0, n = 8;
  793. for (int i = 0; i < 4; ++i)
  794. sum += left[i] + above[i];
  795. i16 average = (sum + n / 2) / n;
  796. for (int py = 0; py < 4; ++py)
  797. for (int px = 0; px < 4; ++px)
  798. y_prediction[(4 * y + py) * 16 + 4 * x + px] = average;
  799. } else if (mode == B_TM_PRED) {
  800. for (int py = 0; py < 4; ++py)
  801. for (int px = 0; px < 4; ++px)
  802. y_prediction[(4 * y + py) * 16 + 4 * x + px] = clamp(left[py] + above[px] - corner, 0, 255);
  803. } else if (mode == B_VE_PRED) {
  804. for (int py = 0; py < 4; ++py)
  805. for (int px = 0; px < 4; ++px) {
  806. auto top_left = (px > 0 ? above[px - 1] : corner);
  807. y_prediction[(4 * y + py) * 16 + 4 * x + px] = weighted_average(top_left, above[px], above[px + 1]);
  808. }
  809. } else if (mode == B_HE_PRED) {
  810. for (int py = 0; py < 4; ++py)
  811. for (int px = 0; px < 4; ++px) {
  812. if (py == 0) {
  813. y_prediction[(4 * y + py) * 16 + 4 * x + px] = weighted_average(corner, left[py], left[py + 1]);
  814. } else if (py == 3) {
  815. /* Bottom row is exceptional because L[4] does not exist */
  816. y_prediction[(4 * y + py) * 16 + 4 * x + px] = weighted_average(left[2], left[3], left[3]);
  817. } else {
  818. y_prediction[(4 * y + py) * 16 + 4 * x + px] = weighted_average(left[py - 1], left[py], left[py + 1]);
  819. }
  820. }
  821. } else if (mode == B_LD_PRED) {
  822. // this is 45-deg prediction from above, going left-down (i.e. isochromes on -1/+1 diags)
  823. at(0, 0) = weighted_average(above[0], above[1], above[2]);
  824. at(0, 1) = at(1, 0) = weighted_average(above[1], above[2], above[3]);
  825. at(0, 2) = at(1, 1) = at(2, 0) = weighted_average(above[2], above[3], above[4]);
  826. at(0, 3) = at(1, 2) = at(2, 1) = at(3, 0) = weighted_average(above[3], above[4], above[5]);
  827. at(1, 3) = at(2, 2) = at(3, 1) = weighted_average(above[4], above[5], above[6]);
  828. at(2, 3) = at(3, 2) = weighted_average(above[5], above[6], above[7]);
  829. at(3, 3) = weighted_average(above[6], above[7], above[7]); // intentionally 6, 7, 7
  830. } else if (mode == B_RD_PRED) {
  831. // this is 45-deg prediction from above / left, going right-down (i.e. isochromes on +1/+1 diags)
  832. at(0, 3) = weighted_average(left[3], left[2], left[1]);
  833. at(0, 2) = at(1, 3) = weighted_average(left[2], left[1], left[0]);
  834. at(0, 1) = at(1, 2) = at(2, 3) = weighted_average(left[1], left[0], corner);
  835. at(0, 0) = at(1, 1) = at(2, 2) = at(3, 3) = weighted_average(left[0], corner, above[0]);
  836. at(1, 0) = at(2, 1) = at(3, 2) = weighted_average(corner, above[0], above[1]);
  837. at(2, 0) = at(3, 1) = weighted_average(above[0], above[1], above[2]);
  838. at(3, 0) = weighted_average(above[1], above[2], above[3]);
  839. } else if (mode == B_VR_PRED) {
  840. // this is 22.5-deg prediction
  841. at(0, 3) = weighted_average(left[2], left[1], left[0]);
  842. at(0, 2) = weighted_average(left[1], left[0], corner);
  843. at(1, 3) = at(0, 1) = weighted_average(left[0], corner, above[0]);
  844. at(1, 2) = at(0, 0) = average(corner, above[0]);
  845. at(2, 3) = at(1, 1) = weighted_average(corner, above[0], above[1]);
  846. at(2, 2) = at(1, 0) = average(above[0], above[1]);
  847. at(3, 3) = at(2, 1) = weighted_average(above[0], above[1], above[2]);
  848. at(3, 2) = at(2, 0) = average(above[1], above[2]);
  849. at(3, 1) = weighted_average(above[1], above[2], above[3]);
  850. at(3, 0) = average(above[2], above[3]);
  851. } else if (mode == B_VL_PRED) {
  852. // this is 22.5-deg prediction
  853. at(0, 0) = average(above[0], above[1]);
  854. at(0, 1) = weighted_average(above[0], above[1], above[2]);
  855. at(0, 2) = at(1, 0) = average(above[1], above[2]);
  856. at(1, 1) = at(0, 3) = weighted_average(above[1], above[2], above[3]);
  857. at(1, 2) = at(2, 0) = average(above[2], above[3]);
  858. at(1, 3) = at(2, 1) = weighted_average(above[2], above[3], above[4]);
  859. at(2, 2) = at(3, 0) = average(above[3], above[4]);
  860. at(2, 3) = at(3, 1) = weighted_average(above[3], above[4], above[5]);
  861. /* Last two values do not strictly follow the pattern. */
  862. at(3, 2) = weighted_average(above[4], above[5], above[6]);
  863. at(3, 3) = weighted_average(above[5], above[6], above[7]);
  864. } else if (mode == B_HD_PRED) {
  865. // this is 22.5-deg prediction
  866. at(0, 3) = average(left[3], left[2]);
  867. at(1, 3) = weighted_average(left[3], left[2], left[1]);
  868. at(0, 2) = at(2, 3) = average(left[2], left[1]);
  869. at(1, 2) = at(3, 3) = weighted_average(left[2], left[1], left[0]);
  870. at(2, 2) = at(0, 1) = average(left[1], left[0]);
  871. at(3, 2) = at(1, 1) = weighted_average(left[1], left[0], corner);
  872. at(2, 1) = at(0, 0) = average(left[0], corner);
  873. at(3, 1) = at(1, 0) = weighted_average(left[0], corner, above[0]);
  874. at(2, 0) = weighted_average(corner, above[0], above[1]);
  875. at(3, 0) = weighted_average(above[0], above[1], above[2]);
  876. } else {
  877. VERIFY(mode == B_HU_PRED);
  878. // this is 22.5-deg prediction
  879. at(0, 0) = average(left[0], left[1]);
  880. at(1, 0) = weighted_average(left[0], left[1], left[2]);
  881. at(2, 0) = at(0, 1) = average(left[1], left[2]);
  882. at(3, 0) = at(1, 1) = weighted_average(left[1], left[2], left[3]);
  883. at(2, 1) = at(0, 2) = average(left[2], left[3]);
  884. at(3, 1) = at(1, 2) = weighted_average(left[2], left[3], left[3]); // Intentionally 2, 3, 3
  885. /* Not possible to follow pattern for much of the bottom
  886. row because no (nearby) already-constructed pixels lie
  887. on the diagonals in question. */
  888. at(2, 2) = at(3, 2) = at(0, 3) = at(1, 3) = at(2, 3) = at(3, 3) = left[3];
  889. }
  890. }
  891. template<int N>
  892. void add_idct_to_prediction(Span<i16> prediction, Coefficients coefficients, int x, int y)
  893. {
  894. Coefficients idct_output;
  895. short_idct4x4llm_c(coefficients, idct_output, 4 * sizeof(i16));
  896. // https://datatracker.ietf.org/doc/html/rfc6386#section-14.5 "Summation of Predictor and Residue"
  897. for (int py = 0; py < 4; ++py) { // Loop over 4x4 pixels in subblock
  898. for (int px = 0; px < 4; ++px) {
  899. // sum with prediction
  900. i16& p = prediction[(4 * y + py) * N + (4 * x + px)];
  901. p += idct_output[py * 4 + px];
  902. // p = clamp(p, 0, 255);
  903. }
  904. }
  905. }
  906. template<int N>
  907. void process_macroblock(Span<i16> output, IntraMacroblockMode mode, int mb_x, int mb_y, ReadonlySpan<i16> left, ReadonlySpan<i16> above, i16 truemotion_corner, Coefficients coefficients_array[])
  908. {
  909. predict_macroblock<4 * N>(output, mode, mb_x, mb_y, left, above, truemotion_corner);
  910. // https://datatracker.ietf.org/doc/html/rfc6386#section-14.4 "Implementation of the DCT Inversion"
  911. // Loop over the 4x4 subblocks
  912. for (int y = 0, i = 0; y < N; ++y)
  913. for (int x = 0; x < N; ++x, ++i)
  914. add_idct_to_prediction<4 * N>(output, coefficients_array[i], x, y);
  915. }
  916. void process_subblocks(Span<i16> y_output, MacroblockMetadata const& metadata, int mb_x, int mb_y, ReadonlySpan<i16> predicted_y_left, ReadonlySpan<i16> predicted_y_above, i16 y_truemotion_corner, Coefficients coefficients_array[], int macroblock_width)
  917. {
  918. // Loop over the 4x4 subblocks
  919. for (int y = 0, i = 0; y < 4; ++y) {
  920. for (int x = 0; x < 4; ++x, ++i) {
  921. i16 corner = y_truemotion_corner;
  922. if (x > 0 && y == 0)
  923. corner = predicted_y_above[mb_x * 16 + 4 * x - 1];
  924. else if (x > 0 && y > 0)
  925. corner = y_output[(4 * y - 1) * 16 + 4 * x - 1];
  926. else if (x == 0 && y > 0)
  927. corner = predicted_y_left[4 * y - 1];
  928. i16 left[4], above[8];
  929. for (int i = 0; i < 4; ++i) {
  930. if (x == 0)
  931. left[i] = predicted_y_left[4 * y + i];
  932. else
  933. left[i] = y_output[(4 * y + i) * 16 + 4 * x - 1];
  934. }
  935. // Subblock prediction can read 8 pixels above the block.
  936. // For rightmost subblocks, the right 4 pixels there aren't initialized yet, so those get the 4 pixels to the right above the macroblock.
  937. // For the rightmost macroblock, there's no macroblock to its right, so there they get the rightmost pixel above.
  938. // But in the 0th row, there's no pixel above, so there they become 127.
  939. for (int i = 0; i < 8; ++i) {
  940. if (x == 3 && i >= 4) { // rightmost subblock, 4 right pixels?
  941. if (mb_x == macroblock_width - 1) { // rightmost macroblock
  942. if (mb_y == 0) { // topmost macroblock row
  943. above[i] = 127;
  944. } else {
  945. above[i] = predicted_y_above[mb_x * 16 + 4 * x + 3];
  946. }
  947. } else {
  948. above[i] = predicted_y_above[mb_x * 16 + 4 * x + i];
  949. }
  950. } else if (y == 0) {
  951. above[i] = predicted_y_above[mb_x * 16 + 4 * x + i];
  952. } else {
  953. above[i] = y_output[(4 * y - 1) * 16 + 4 * x + i];
  954. }
  955. }
  956. predict_y_subblock(y_output, metadata.intra_b_modes[y * 4 + x], x, y, left, above, corner);
  957. // Have to do IDCT summation here, since its results affect prediction of next subblock already.
  958. add_idct_to_prediction<16>(y_output, coefficients_array[4 * y + x], x, y);
  959. }
  960. }
  961. }
  962. void convert_yuv_to_rgb(Bitmap& bitmap, int mb_x, int mb_y, ReadonlySpan<i16> y_data, ReadonlySpan<i16> u_data, ReadonlySpan<i16> v_data)
  963. {
  964. // Convert YUV to RGB.
  965. for (int y = 0; y < 16; ++y) {
  966. for (int x = 0; x < 16; ++x) {
  967. // "is then saturated to 8-bit unsigned range (using, say, the
  968. // clamp255 function defined above) before being stored as an 8-bit
  969. // unsigned pixel value."
  970. u8 Y = clamp(y_data[y * 16 + x], 0, 255);
  971. // FIXME: Could do nicer upsampling than just nearest neighbor
  972. u8 U = clamp(u_data[(y / 2) * 8 + x / 2], 0, 255);
  973. u8 V = clamp(v_data[(y / 2) * 8 + x / 2], 0, 255);
  974. // XXX: These numbers are from the fixed-point values in libwebp's yuv.h. There's probably a better reference somewhere.
  975. int r = 1.1655 * Y + 1.596 * V - 222.4;
  976. int g = 1.1655 * Y - 0.3917 * U - 0.8129 * V + 136.0625;
  977. int b = 1.1655 * Y + 2.0172 * U - 276.33;
  978. bitmap.scanline(mb_y * 16 + y)[mb_x * 16 + x] = Color(clamp(r, 0, 255), clamp(g, 0, 255), clamp(b, 0, 255)).value();
  979. }
  980. }
  981. }
  982. ErrorOr<void> decode_VP8_image_data(Gfx::Bitmap& bitmap, FrameHeader const& header, ReadonlyBytes data, int macroblock_width, int macroblock_height, Vector<MacroblockMetadata> const& macroblock_metadata)
  983. {
  984. FixedMemoryStream memory_stream { data };
  985. BigEndianInputBitStream bit_stream { MaybeOwned<Stream>(memory_stream) };
  986. auto decoder = TRY(BooleanDecoder::initialize(MaybeOwned { bit_stream }, data.size() * 8));
  987. CoefficientReadingContext coefficient_reading_context;
  988. TRY(coefficient_reading_context.initialize(macroblock_width));
  989. Vector<i16> predicted_y_above;
  990. TRY(predicted_y_above.try_resize(macroblock_width * 16));
  991. for (size_t i = 0; i < predicted_y_above.size(); ++i)
  992. predicted_y_above[i] = 127;
  993. Vector<i16> predicted_u_above;
  994. TRY(predicted_u_above.try_resize(macroblock_width * 8));
  995. for (size_t i = 0; i < predicted_u_above.size(); ++i)
  996. predicted_u_above[i] = 127;
  997. Vector<i16> predicted_v_above;
  998. TRY(predicted_v_above.try_resize(macroblock_width * 8));
  999. for (size_t i = 0; i < predicted_v_above.size(); ++i)
  1000. predicted_v_above[i] = 127;
  1001. for (int mb_y = 0, macroblock_index = 0; mb_y < macroblock_height; ++mb_y) {
  1002. coefficient_reading_context.start_new_row();
  1003. i16 predicted_y_left[16] { 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129 };
  1004. i16 predicted_u_left[8] { 129, 129, 129, 129, 129, 129, 129, 129 };
  1005. i16 predicted_v_left[8] { 129, 129, 129, 129, 129, 129, 129, 129 };
  1006. // The spec doesn't say if this should be 127, 129, or something else.
  1007. // But ReconstructRow in frame_dec.c in libwebp suggests 129.
  1008. i16 y_truemotion_corner = 129;
  1009. i16 u_truemotion_corner = 129;
  1010. i16 v_truemotion_corner = 129;
  1011. for (int mb_x = 0; mb_x < macroblock_width; ++mb_x, ++macroblock_index) {
  1012. auto const& metadata = macroblock_metadata[macroblock_index];
  1013. auto coefficients = TRY(read_macroblock_coefficients(decoder, header, coefficient_reading_context, metadata, mb_x));
  1014. i16 y_data[16 * 16] {};
  1015. if (metadata.intra_y_mode == B_PRED)
  1016. process_subblocks(y_data, metadata, mb_x, mb_y, predicted_y_left, predicted_y_above, y_truemotion_corner, coefficients.y_coeffs, macroblock_width);
  1017. else
  1018. process_macroblock<4>(y_data, metadata.intra_y_mode, mb_x, mb_y, predicted_y_left, predicted_y_above, y_truemotion_corner, coefficients.y_coeffs);
  1019. i16 u_data[8 * 8] {};
  1020. process_macroblock<2>(u_data, metadata.uv_mode, mb_x, mb_y, predicted_u_left, predicted_u_above, u_truemotion_corner, coefficients.u_coeffs);
  1021. i16 v_data[8 * 8] {};
  1022. process_macroblock<2>(v_data, metadata.uv_mode, mb_x, mb_y, predicted_v_left, predicted_v_above, v_truemotion_corner, coefficients.v_coeffs);
  1023. // FIXME: insert loop filtering here
  1024. convert_yuv_to_rgb(bitmap, mb_x, mb_y, y_data, u_data, v_data);
  1025. y_truemotion_corner = predicted_y_above[mb_x * 16 + 15];
  1026. for (int i = 0; i < 16; ++i)
  1027. predicted_y_left[i] = y_data[15 + i * 16];
  1028. for (int i = 0; i < 16; ++i)
  1029. predicted_y_above[mb_x * 16 + i] = y_data[15 * 16 + i];
  1030. u_truemotion_corner = predicted_u_above[mb_x * 8 + 7];
  1031. for (int i = 0; i < 8; ++i)
  1032. predicted_u_left[i] = u_data[7 + i * 8];
  1033. for (int i = 0; i < 8; ++i)
  1034. predicted_u_above[mb_x * 8 + i] = u_data[7 * 8 + i];
  1035. v_truemotion_corner = predicted_v_above[mb_x * 8 + 7];
  1036. for (int i = 0; i < 8; ++i)
  1037. predicted_v_left[i] = v_data[7 + i * 8];
  1038. for (int i = 0; i < 8; ++i)
  1039. predicted_v_above[mb_x * 8 + i] = v_data[7 * 8 + i];
  1040. }
  1041. }
  1042. return {};
  1043. }
  1044. }
  1045. ErrorOr<NonnullRefPtr<Bitmap>> decode_webp_chunk_VP8_contents(VP8Header const& vp8_header, bool include_alpha_channel)
  1046. {
  1047. // The first partition stores header, per-segment state, and macroblock metadata.
  1048. FixedMemoryStream memory_stream { vp8_header.first_partition };
  1049. BigEndianInputBitStream bit_stream { MaybeOwned<Stream>(memory_stream) };
  1050. auto decoder = TRY(BooleanDecoder::initialize(MaybeOwned { bit_stream }, vp8_header.first_partition.size() * 8));
  1051. auto header = TRY(decode_VP8_frame_header(decoder));
  1052. // https://datatracker.ietf.org/doc/html/rfc6386#section-2 "Format Overview"
  1053. // "Internally, VP8 decomposes each output frame into an array of
  1054. // macroblocks. A macroblock is a square array of pixels whose Y
  1055. // dimensions are 16x16 and whose U and V dimensions are 8x8."
  1056. int macroblock_width = ceil_div(vp8_header.width, 16);
  1057. int macroblock_height = ceil_div(vp8_header.height, 16);
  1058. auto macroblock_metadata = TRY(decode_VP8_macroblock_metadata(decoder, header, macroblock_width, macroblock_height));
  1059. // Done with the first partition!
  1060. if (header.number_of_dct_partitions > 1)
  1061. return Error::from_string_literal("WebPImageDecoderPlugin: decoding lossy webps with more than one dct partition not yet implemented");
  1062. auto bitmap_format = include_alpha_channel ? BitmapFormat::BGRA8888 : BitmapFormat::BGRx8888;
  1063. auto bitmap = TRY(Bitmap::create(bitmap_format, { macroblock_width * 16, macroblock_height * 16 }));
  1064. TRY(decode_VP8_image_data(*bitmap, header, vp8_header.second_partition, macroblock_width, macroblock_height, macroblock_metadata));
  1065. auto width = static_cast<int>(vp8_header.width);
  1066. auto height = static_cast<int>(vp8_header.height);
  1067. if (bitmap->physical_size() == IntSize { width, height })
  1068. return bitmap;
  1069. return bitmap->cropped({ 0, 0, width, height });
  1070. }
  1071. }