Decoder.cpp 104 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870
  1. /*
  2. * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
  3. * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/IntegralMath.h>
  8. #include <AK/TypedTransfer.h>
  9. #include <LibGfx/Size.h>
  10. #include <LibVideo/Color/CodingIndependentCodePoints.h>
  11. #include "Context.h"
  12. #include "Decoder.h"
  13. #include "Utilities.h"
  14. #if defined(AK_COMPILER_GCC)
  15. # pragma GCC optimize("O3")
  16. #endif
  17. namespace Video::VP9 {
  18. Decoder::Decoder()
  19. : m_parser(make<Parser>(*this))
  20. {
  21. }
  22. DecoderErrorOr<void> Decoder::receive_sample(ReadonlyBytes chunk_data)
  23. {
  24. auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);
  25. if (superframe_sizes.is_empty()) {
  26. return decode_frame(chunk_data);
  27. }
  28. size_t offset = 0;
  29. for (auto superframe_size : superframe_sizes) {
  30. auto checked_size = Checked<size_t>(superframe_size);
  31. checked_size += offset;
  32. if (checked_size.has_overflow() || checked_size.value() > chunk_data.size())
  33. return DecoderError::with_description(DecoderErrorCategory::Corrupted, "Superframe size invalid"sv);
  34. auto frame_data = chunk_data.slice(offset, superframe_size);
  35. TRY(decode_frame(frame_data));
  36. offset = checked_size.value();
  37. }
  38. return {};
  39. }
  40. inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
  41. {
  42. return row * stride + column;
  43. }
  44. DecoderErrorOr<void> Decoder::decode_frame(ReadonlyBytes frame_data)
  45. {
  46. // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
  47. // tables include function calls indicating when the block decode processes should be triggered.
  48. auto frame_context = TRY(m_parser->parse_frame(frame_data));
  49. // 2. If loop_filter_level is not equal to 0, the loop filter process as specified in section 8.8 is invoked once the
  50. // coded frame has been decoded.
  51. // FIXME: Implement loop filtering.
  52. // 3. If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to
  53. // SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1:
  54. // − show_existing_frame is equal to 0,
  55. // − segmentation_enabled is equal to 1,
  56. // − segmentation_update_map is equal to 1.
  57. // This is handled by update_reference_frames.
  58. // 4. The output process as specified in section 8.9 is invoked.
  59. if (frame_context.shows_a_frame())
  60. TRY(create_video_frame(frame_context));
  61. // 5. The reference frame update process as specified in section 8.10 is invoked.
  62. TRY(update_reference_frames(frame_context));
  63. return {};
  64. }
  65. inline CodingIndependentCodePoints get_cicp_color_space(FrameContext const& frame_context)
  66. {
  67. ColorPrimaries color_primaries;
  68. TransferCharacteristics transfer_characteristics;
  69. MatrixCoefficients matrix_coefficients;
  70. switch (frame_context.color_config.color_space) {
  71. case ColorSpace::Unknown:
  72. color_primaries = ColorPrimaries::Unspecified;
  73. transfer_characteristics = TransferCharacteristics::Unspecified;
  74. matrix_coefficients = MatrixCoefficients::Unspecified;
  75. break;
  76. case ColorSpace::Bt601:
  77. color_primaries = ColorPrimaries::BT601;
  78. transfer_characteristics = TransferCharacteristics::BT601;
  79. matrix_coefficients = MatrixCoefficients::BT601;
  80. break;
  81. case ColorSpace::Bt709:
  82. color_primaries = ColorPrimaries::BT709;
  83. transfer_characteristics = TransferCharacteristics::BT709;
  84. matrix_coefficients = MatrixCoefficients::BT709;
  85. break;
  86. case ColorSpace::Smpte170:
  87. // https://www.kernel.org/doc/html/v4.9/media/uapi/v4l/pixfmt-007.html#colorspace-smpte-170m-v4l2-colorspace-smpte170m
  88. color_primaries = ColorPrimaries::BT601;
  89. transfer_characteristics = TransferCharacteristics::BT709;
  90. matrix_coefficients = MatrixCoefficients::BT601;
  91. break;
  92. case ColorSpace::Smpte240:
  93. color_primaries = ColorPrimaries::SMPTE240;
  94. transfer_characteristics = TransferCharacteristics::SMPTE240;
  95. matrix_coefficients = MatrixCoefficients::SMPTE240;
  96. break;
  97. case ColorSpace::Bt2020:
  98. color_primaries = ColorPrimaries::BT2020;
  99. // Bit depth doesn't actually matter to our transfer functions since we
  100. // convert in floats of range 0-1 (for now?), but just for correctness set
  101. // the TC to match the bit depth here.
  102. if (frame_context.color_config.bit_depth == 12)
  103. transfer_characteristics = TransferCharacteristics::BT2020BitDepth12;
  104. else if (frame_context.color_config.bit_depth == 10)
  105. transfer_characteristics = TransferCharacteristics::BT2020BitDepth10;
  106. else
  107. transfer_characteristics = TransferCharacteristics::BT709;
  108. matrix_coefficients = MatrixCoefficients::BT2020NonConstantLuminance;
  109. break;
  110. case ColorSpace::RGB:
  111. color_primaries = ColorPrimaries::BT709;
  112. transfer_characteristics = TransferCharacteristics::Linear;
  113. matrix_coefficients = MatrixCoefficients::Identity;
  114. break;
  115. case ColorSpace::Reserved:
  116. VERIFY_NOT_REACHED();
  117. break;
  118. }
  119. return { color_primaries, transfer_characteristics, matrix_coefficients, frame_context.color_config.color_range };
  120. }
  121. DecoderErrorOr<void> Decoder::create_video_frame(FrameContext const& frame_context)
  122. {
  123. // (8.9) Output process
  124. // FIXME: If show_existing_frame is set, output from FrameStore[frame_to_show_map_index] here instead.
  125. // FIXME: The math isn't entirely accurate to spec. output_uv_size is probably incorrect for certain
  126. // sizes, as the spec seems to prefer that the halved sizes be ceiled.
  127. u32 decoded_y_width = frame_context.columns() * 8;
  128. Gfx::Size<u32> output_y_size = frame_context.size();
  129. auto decoded_uv_width = decoded_y_width >> frame_context.color_config.subsampling_x;
  130. Gfx::Size<u32> output_uv_size = {
  131. output_y_size.width() >> frame_context.color_config.subsampling_x,
  132. output_y_size.height() >> frame_context.color_config.subsampling_y,
  133. };
  134. Array<FixedArray<u16>, 3> output_buffers = {
  135. DECODER_TRY_ALLOC(FixedArray<u16>::create(output_y_size.width() * output_y_size.height())),
  136. DECODER_TRY_ALLOC(FixedArray<u16>::create(output_uv_size.width() * output_uv_size.height())),
  137. DECODER_TRY_ALLOC(FixedArray<u16>::create(output_uv_size.width() * output_uv_size.height())),
  138. };
  139. for (u8 plane = 0; plane < 3; plane++) {
  140. auto& buffer = output_buffers[plane];
  141. auto decoded_width = plane == 0 ? decoded_y_width : decoded_uv_width;
  142. auto output_size = plane == 0 ? output_y_size : output_uv_size;
  143. auto const& decoded_buffer = get_output_buffer(plane);
  144. for (u32 row = 0; row < output_size.height(); row++) {
  145. memcpy(
  146. buffer.data() + row * output_size.width(),
  147. decoded_buffer.data() + row * decoded_width,
  148. output_size.width() * sizeof(*buffer.data()));
  149. }
  150. }
  151. auto frame = DECODER_TRY_ALLOC(adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame(
  152. { output_y_size.width(), output_y_size.height() },
  153. frame_context.color_config.bit_depth, get_cicp_color_space(frame_context),
  154. frame_context.color_config.subsampling_x, frame_context.color_config.subsampling_y,
  155. output_buffers[0], output_buffers[1], output_buffers[2])));
  156. m_video_frame_queue.enqueue(move(frame));
  157. return {};
  158. }
  159. inline size_t buffer_size(size_t width, size_t height)
  160. {
  161. return width * height;
  162. }
  163. inline size_t buffer_size(Gfx::Size<size_t> size)
  164. {
  165. return buffer_size(size.width(), size.height());
  166. }
  167. DecoderErrorOr<void> Decoder::allocate_buffers(FrameContext const& frame_context)
  168. {
  169. for (size_t plane = 0; plane < 3; plane++) {
  170. auto size = m_parser->get_decoded_size_for_plane(frame_context, plane);
  171. auto& output_buffer = get_output_buffer(plane);
  172. output_buffer.clear_with_capacity();
  173. DECODER_TRY_ALLOC(output_buffer.try_resize_and_keep_capacity(buffer_size(size)));
  174. }
  175. return {};
  176. }
  177. Vector<u16>& Decoder::get_output_buffer(u8 plane)
  178. {
  179. return m_output_buffers[plane];
  180. }
  181. DecoderErrorOr<NonnullOwnPtr<VideoFrame>> Decoder::get_decoded_frame()
  182. {
  183. if (m_video_frame_queue.is_empty())
  184. return DecoderError::format(DecoderErrorCategory::NeedsMoreInput, "No video frame in queue.");
  185. return m_video_frame_queue.dequeue();
  186. }
  187. u8 Decoder::merge_prob(u8 pre_prob, u32 count_0, u32 count_1, u8 count_sat, u8 max_update_factor)
  188. {
  189. auto total_decode_count = count_0 + count_1;
  190. u8 prob = 128;
  191. if (total_decode_count != 0) {
  192. prob = static_cast<u8>(clip_3(1u, 255u, (count_0 * 256 + (total_decode_count >> 1)) / total_decode_count));
  193. }
  194. auto count = min(total_decode_count, count_sat);
  195. auto factor = (max_update_factor * count) / count_sat;
  196. return rounded_right_shift(pre_prob * (256 - factor) + (prob * factor), 8);
  197. }
  198. u32 Decoder::merge_probs(int const* tree, int index, u8* probs, u32* counts, u8 count_sat, u8 max_update_factor)
  199. {
  200. auto s = tree[index];
  201. auto left_count = (s <= 0) ? counts[-s] : merge_probs(tree, s, probs, counts, count_sat, max_update_factor);
  202. auto r = tree[index + 1];
  203. auto right_count = (r <= 0) ? counts[-r] : merge_probs(tree, r, probs, counts, count_sat, max_update_factor);
  204. probs[index >> 1] = merge_prob(probs[index >> 1], left_count, right_count, count_sat, max_update_factor);
  205. return left_count + right_count;
  206. }
  207. DecoderErrorOr<void> Decoder::adapt_coef_probs(bool is_inter_predicted_frame)
  208. {
  209. u8 update_factor;
  210. if (!is_inter_predicted_frame || m_parser->m_previous_frame_type != FrameType::KeyFrame)
  211. update_factor = 112;
  212. else
  213. update_factor = 128;
  214. for (size_t t = 0; t < 4; t++) {
  215. for (size_t i = 0; i < 2; i++) {
  216. for (size_t j = 0; j < 2; j++) {
  217. for (size_t k = 0; k < 6; k++) {
  218. size_t max_l = (k == 0) ? 3 : 6;
  219. for (size_t l = 0; l < max_l; l++) {
  220. auto& coef_probs = m_parser->m_probability_tables->coef_probs()[t][i][j][k][l];
  221. merge_probs(small_token_tree, 2, coef_probs,
  222. m_parser->m_syntax_element_counter->m_counts_token[t][i][j][k][l],
  223. 24, update_factor);
  224. merge_probs(binary_tree, 0, coef_probs,
  225. m_parser->m_syntax_element_counter->m_counts_more_coefs[t][i][j][k][l],
  226. 24, update_factor);
  227. }
  228. }
  229. }
  230. }
  231. }
  232. return {};
  233. }
  234. #define ADAPT_PROB_TABLE(name, size) \
  235. do { \
  236. for (size_t i = 0; i < (size); i++) { \
  237. auto table = probs.name##_prob(); \
  238. table[i] = adapt_prob(table[i], counter.m_counts_##name[i]); \
  239. } \
  240. } while (0)
  241. #define ADAPT_TREE(tree_name, prob_name, count_name, size) \
  242. do { \
  243. for (size_t i = 0; i < (size); i++) { \
  244. adapt_probs(tree_name##_tree, probs.prob_name##_probs()[i], counter.m_counts_##count_name[i]); \
  245. } \
  246. } while (0)
  247. DecoderErrorOr<void> Decoder::adapt_non_coef_probs(FrameContext const& frame_context)
  248. {
  249. auto& probs = *m_parser->m_probability_tables;
  250. auto& counter = *m_parser->m_syntax_element_counter;
  251. ADAPT_PROB_TABLE(is_inter, IS_INTER_CONTEXTS);
  252. ADAPT_PROB_TABLE(comp_mode, COMP_MODE_CONTEXTS);
  253. ADAPT_PROB_TABLE(comp_ref, REF_CONTEXTS);
  254. for (size_t i = 0; i < REF_CONTEXTS; i++) {
  255. for (size_t j = 0; j < 2; j++)
  256. probs.single_ref_prob()[i][j] = adapt_prob(probs.single_ref_prob()[i][j], counter.m_counts_single_ref[i][j]);
  257. }
  258. ADAPT_TREE(inter_mode, inter_mode, inter_mode, INTER_MODE_CONTEXTS);
  259. ADAPT_TREE(intra_mode, y_mode, intra_mode, BLOCK_SIZE_GROUPS);
  260. ADAPT_TREE(intra_mode, uv_mode, uv_mode, INTRA_MODES);
  261. ADAPT_TREE(partition, partition, partition, PARTITION_CONTEXTS);
  262. ADAPT_PROB_TABLE(skip, SKIP_CONTEXTS);
  263. if (frame_context.interpolation_filter == Switchable) {
  264. ADAPT_TREE(interp_filter, interp_filter, interp_filter, INTERP_FILTER_CONTEXTS);
  265. }
  266. if (frame_context.transform_mode == TransformMode::Select) {
  267. for (size_t i = 0; i < TX_SIZE_CONTEXTS; i++) {
  268. auto& tx_probs = probs.tx_probs();
  269. auto& tx_counts = counter.m_counts_tx_size;
  270. adapt_probs(tx_size_8_tree, tx_probs[Transform_8x8][i], tx_counts[Transform_8x8][i]);
  271. adapt_probs(tx_size_16_tree, tx_probs[Transform_16x16][i], tx_counts[Transform_16x16][i]);
  272. adapt_probs(tx_size_32_tree, tx_probs[Transform_32x32][i], tx_counts[Transform_32x32][i]);
  273. }
  274. }
  275. adapt_probs(mv_joint_tree, probs.mv_joint_probs(), counter.m_counts_mv_joint);
  276. for (size_t i = 0; i < 2; i++) {
  277. probs.mv_sign_prob()[i] = adapt_prob(probs.mv_sign_prob()[i], counter.m_counts_mv_sign[i]);
  278. adapt_probs(mv_class_tree, probs.mv_class_probs()[i], counter.m_counts_mv_class[i]);
  279. probs.mv_class0_bit_prob()[i] = adapt_prob(probs.mv_class0_bit_prob()[i], counter.m_counts_mv_class0_bit[i]);
  280. for (size_t j = 0; j < MV_OFFSET_BITS; j++)
  281. probs.mv_bits_prob()[i][j] = adapt_prob(probs.mv_bits_prob()[i][j], counter.m_counts_mv_bits[i][j]);
  282. for (size_t j = 0; j < CLASS0_SIZE; j++)
  283. adapt_probs(mv_fr_tree, probs.mv_class0_fr_probs()[i][j], counter.m_counts_mv_class0_fr[i][j]);
  284. adapt_probs(mv_fr_tree, probs.mv_fr_probs()[i], counter.m_counts_mv_fr[i]);
  285. if (frame_context.high_precision_motion_vectors_allowed) {
  286. probs.mv_class0_hp_prob()[i] = adapt_prob(probs.mv_class0_hp_prob()[i], counter.m_counts_mv_class0_hp[i]);
  287. probs.mv_hp_prob()[i] = adapt_prob(probs.mv_hp_prob()[i], counter.m_counts_mv_hp[i]);
  288. }
  289. }
  290. return {};
  291. }
  292. void Decoder::adapt_probs(int const* tree, u8* probs, u32* counts)
  293. {
  294. merge_probs(tree, 0, probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR);
  295. }
  296. u8 Decoder::adapt_prob(u8 prob, u32 counts[2])
  297. {
  298. return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR);
  299. }
  300. DecoderErrorOr<void> Decoder::predict_intra(u8 plane, BlockContext const& block_context, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TransformSize tx_size, u32 block_index)
  301. {
  302. auto& frame_buffer = get_output_buffer(plane);
  303. // 8.5.1 Intra prediction process
  304. // The intra prediction process is invoked for intra coded blocks to predict a part of the block corresponding to a
  305. // transform block. When the transform size is smaller than the block size, this process can be invoked multiple
  306. // times within a single block for the same plane, and the invocations are in raster order within the block.
  307. // The variable mode is specified by:
  308. // 1. If plane is greater than 0, mode is set equal to uv_mode.
  309. // 2. Otherwise, if MiSize is greater than or equal to BLOCK_8X8, mode is set equal to y_mode.
  310. // 3. Otherwise, mode is set equal to sub_modes[ blockIdx ].
  311. PredictionMode mode;
  312. if (plane > 0)
  313. mode = block_context.uv_prediction_mode;
  314. else if (block_context.size >= Block_8x8)
  315. mode = block_context.y_prediction_mode();
  316. else
  317. mode = block_context.sub_block_prediction_modes[block_index];
  318. // The variable log2Size specifying the base 2 logarithm of the width of the transform block is set equal to txSz + 2.
  319. u8 log2_of_block_size = tx_size + 2;
  320. // The variable size is set equal to 1 << log2Size.
  321. u8 block_size = 1 << log2_of_block_size;
  322. // The variable maxX is set equal to (MiCols * 8) - 1.
  323. // The variable maxY is set equal to (MiRows * 8) - 1.
  324. // If plane is greater than 0, then:
  325. // − maxX is set equal to ((MiCols * 8) >> subsampling_x) - 1.
  326. // − maxY is set equal to ((MiRows * 8) >> subsampling_y) - 1.
  327. auto subsampling_x = plane > 0 ? block_context.frame_context.color_config.subsampling_x : false;
  328. auto subsampling_y = plane > 0 ? block_context.frame_context.color_config.subsampling_y : false;
  329. auto max_x = ((block_context.frame_context.columns() * 8u) >> subsampling_x) - 1u;
  330. auto max_y = ((block_context.frame_context.rows() * 8u) >> subsampling_y) - 1u;
  331. auto const frame_buffer_at = [&](u32 row, u32 column) -> u16& {
  332. const auto frame_stride = max_x + 1u;
  333. return frame_buffer[index_from_row_and_column(row, column, frame_stride)];
  334. };
  335. // The array aboveRow[ i ] for i = 0..size-1 is specified by:
  336. // ..
  337. // The array aboveRow[ i ] for i = size..2*size-1 is specified by:
  338. // ..
  339. // The array aboveRow[ i ] for i = -1 is specified by:
  340. // ..
  341. // NOTE: above_row is an array ranging from 0 to (2*block_size).
  342. // There are three sections to the array:
  343. // - [0]
  344. // - [1 .. block_size]
  345. // - [block_size + 1 .. block_size * 2]
  346. // The array indices must be offset by 1 to accommodate index -1.
  347. Array<Intermediate, maximum_block_dimensions * 2 + 1> above_row;
  348. auto above_row_at = [&](i32 index) -> Intermediate& {
  349. return above_row[index + 1];
  350. };
  351. // NOTE: This value is pre-calculated since it is reused in spec below.
  352. // Use this to replace spec text "(1<<(BitDepth-1))".
  353. Intermediate half_sample_value = (1 << (block_context.frame_context.color_config.bit_depth - 1));
  354. // The array aboveRow[ i ] for i = 0..size-1 is specified by:
  355. if (!have_above) {
  356. // 1. If haveAbove is equal to 0, aboveRow[ i ] is set equal to (1<<(BitDepth-1)) - 1.
  357. // FIXME: Use memset?
  358. for (auto i = 0u; i < block_size; i++)
  359. above_row_at(i) = half_sample_value - 1;
  360. } else {
  361. // 2. Otherwise, aboveRow[ i ] is set equal to CurrFrame[ plane ][ y-1 ][ Min(maxX, x+i) ].
  362. for (auto i = 0u; i < block_size; i++)
  363. above_row_at(i) = frame_buffer_at(y - 1, min(max_x, x + i));
  364. }
  365. // The array aboveRow[ i ] for i = size..2*size-1 is specified by:
  366. if (have_above && not_on_right && tx_size == Transform_4x4) {
  367. // 1. If haveAbove is equal to 1 and notOnRight is equal to 1 and txSz is equal to 0,
  368. // aboveRow[ i ] is set equal to CurrFrame[ plane ][ y-1 ][ Min(maxX, x+i) ].
  369. for (auto i = block_size; i < block_size * 2; i++)
  370. above_row_at(i) = frame_buffer_at(y - 1, min(max_x, x + i));
  371. } else {
  372. // 2. Otherwise, aboveRow[ i ] is set equal to aboveRow[ size-1 ].
  373. for (auto i = block_size; i < block_size * 2; i++)
  374. above_row_at(i) = above_row_at(block_size - 1);
  375. }
  376. // The array aboveRow[ i ] for i = -1 is specified by:
  377. if (have_above && have_left) {
  378. // 1. If haveAbove is equal to 1 and haveLeft is equal to 1, aboveRow[ -1 ] is set equal to
  379. // CurrFrame[ plane ][ y-1 ][ Min(maxX, x-1) ].
  380. above_row_at(-1) = frame_buffer_at(y - 1, min(max_x, x - 1));
  381. } else if (have_above) {
  382. // 2. Otherwise if haveAbove is equal to 1, aboveRow[ -1] is set equal to (1<<(BitDepth-1)) + 1.
  383. above_row_at(-1) = half_sample_value + 1;
  384. } else {
  385. // 3. Otherwise, aboveRow[ -1 ] is set equal to (1<<(BitDepth-1)) - 1
  386. above_row_at(-1) = half_sample_value - 1;
  387. }
  388. // The array leftCol[ i ] for i = 0..size-1 is specified by:
  389. Array<Intermediate, maximum_block_dimensions> left_column;
  390. if (have_left) {
  391. // − If haveLeft is equal to 1, leftCol[ i ] is set equal to CurrFrame[ plane ][ Min(maxY, y+i) ][ x-1 ].
  392. for (auto i = 0u; i < block_size; i++)
  393. left_column[i] = frame_buffer_at(min(max_y, y + i), x - 1);
  394. } else {
  395. // − Otherwise, leftCol[ i ] is set equal to (1<<(BitDepth-1)) + 1.
  396. for (auto i = 0u; i < block_size; i++)
  397. left_column[i] = half_sample_value + 1;
  398. }
  399. // A 2D array named pred containing the intra predicted samples is constructed as follows:
  400. Array<Intermediate, maximum_block_size> predicted_samples;
  401. auto const predicted_sample_at = [&](u32 row, u32 column) -> Intermediate& {
  402. return predicted_samples[index_from_row_and_column(row, column, block_size)];
  403. };
  404. // FIXME: One of the two below should be a simple memcpy of 1D arrays.
  405. switch (mode) {
  406. case PredictionMode::VPred:
  407. // − If mode is equal to V_PRED, pred[ i ][ j ] is set equal to aboveRow[ j ] with j = 0..size-1 and i = 0..size-1
  408. // (each row of the block is filled with a copy of aboveRow).
  409. for (auto j = 0u; j < block_size; j++) {
  410. for (auto i = 0u; i < block_size; i++)
  411. predicted_sample_at(i, j) = above_row_at(j);
  412. }
  413. break;
  414. case PredictionMode::HPred:
  415. // − Otherwise if mode is equal to H_PRED, pred[ i ][ j ] is set equal to leftCol[ i ] with j = 0..size-1 and i =
  416. // 0..size-1 (each column of the block is filled with a copy of leftCol).
  417. for (auto j = 0u; j < block_size; j++) {
  418. for (auto i = 0u; i < block_size; i++)
  419. predicted_sample_at(i, j) = left_column[i];
  420. }
  421. break;
  422. case PredictionMode::D207Pred:
  423. // − Otherwise if mode is equal to D207_PRED, the following applies:
  424. // 1. pred[ size - 1 ][ j ] = leftCol[ size - 1] for j = 0..size-1
  425. for (auto j = 0u; j < block_size; j++)
  426. predicted_sample_at(block_size - 1, j) = left_column[block_size - 1];
  427. // 2. pred[ i ][ 0 ] = Round2( leftCol[ i ] + leftCol[ i + 1 ], 1 ) for i = 0..size-2
  428. for (auto i = 0u; i < block_size - 1u; i++)
  429. predicted_sample_at(i, 0) = rounded_right_shift(left_column[i] + left_column[i + 1], 1);
  430. // 3. pred[ i ][ 1 ] = Round2( leftCol[ i ] + 2 * leftCol[ i + 1 ] + leftCol[ i + 2 ], 2 ) for i = 0..size-3
  431. for (auto i = 0u; i < block_size - 2u; i++)
  432. predicted_sample_at(i, 1) = rounded_right_shift(left_column[i] + (2 * left_column[i + 1]) + left_column[i + 2], 2);
  433. // 4. pred[ size - 2 ][ 1 ] = Round2( leftCol[ size - 2 ] + 3 * leftCol[ size - 1 ], 2 )
  434. predicted_sample_at(block_size - 2, 1) = rounded_right_shift(left_column[block_size - 2] + (3 * left_column[block_size - 1]), 2);
  435. // 5. pred[ i ][ j ] = pred[ i + 1 ][ j - 2 ] for i = (size-2)..0, for j = 2..size-1
  436. // NOTE – In the last step i iterates in reverse order.
  437. for (auto i = block_size - 2u;;) {
  438. for (auto j = 2u; j < block_size; j++)
  439. predicted_sample_at(i, j) = predicted_sample_at(i + 1, j - 2);
  440. if (i == 0)
  441. break;
  442. i--;
  443. }
  444. break;
  445. case PredictionMode::D45Pred:
  446. // Otherwise if mode is equal to D45_PRED,
  447. // for i = 0..size-1, for j = 0..size-1.
  448. for (auto i = 0u; i < block_size; i++) {
  449. for (auto j = 0; j < block_size; j++) {
  450. // pred[ i ][ j ] is set equal to (i + j + 2 < size * 2) ?
  451. if (i + j + 2 < block_size * 2)
  452. // Round2( aboveRow[ i + j ] + aboveRow[ i + j + 1 ] * 2 + aboveRow[ i + j + 2 ], 2 ) :
  453. predicted_sample_at(i, j) = rounded_right_shift(above_row_at(i + j) + above_row_at(i + j + 1) * 2 + above_row_at(i + j + 2), 2);
  454. else
  455. // aboveRow[ 2 * size - 1 ]
  456. predicted_sample_at(i, j) = above_row_at(2 * block_size - 1);
  457. }
  458. }
  459. break;
  460. case PredictionMode::D63Pred:
  461. // Otherwise if mode is equal to D63_PRED,
  462. for (auto i = 0u; i < block_size; i++) {
  463. for (auto j = 0u; j < block_size; j++) {
  464. // i/2 + j
  465. auto row_index = (i / 2) + j;
  466. // pred[ i ][ j ] is set equal to (i & 1) ?
  467. if (i & 1)
  468. // Round2( aboveRow[ i/2 + j ] + aboveRow[ i/2 + j + 1 ] * 2 + aboveRow[ i/2 + j + 2 ], 2 ) :
  469. predicted_sample_at(i, j) = rounded_right_shift(above_row_at(row_index) + above_row_at(row_index + 1) * 2 + above_row_at(row_index + 2), 2);
  470. else
  471. // Round2( aboveRow[ i/2 + j ] + aboveRow[ i/2 + j + 1 ], 1 ) for i = 0..size-1, for j = 0..size-1.
  472. predicted_sample_at(i, j) = rounded_right_shift(above_row_at(row_index) + above_row_at(row_index + 1), 1);
  473. }
  474. }
  475. break;
  476. case PredictionMode::D117Pred:
  477. // Otherwise if mode is equal to D117_PRED, the following applies:
  478. // 1. pred[ 0 ][ j ] = Round2( aboveRow[ j - 1 ] + aboveRow[ j ], 1 ) for j = 0..size-1
  479. for (auto j = 0; j < block_size; j++)
  480. predicted_sample_at(0, j) = rounded_right_shift(above_row_at(j - 1) + above_row_at(j), 1);
  481. // 2. pred[ 1 ][ 0 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 )
  482. predicted_sample_at(1, 0) = rounded_right_shift(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2);
  483. // 3. pred[ 1 ][ j ] = Round2( aboveRow[ j - 2 ] + 2 * aboveRow[ j - 1 ] + aboveRow[ j ], 2 ) for j = 1..size-1
  484. for (auto j = 1; j < block_size; j++)
  485. predicted_sample_at(1, j) = rounded_right_shift(above_row_at(j - 2) + 2 * above_row_at(j - 1) + above_row_at(j), 2);
  486. // 4. pred[ 2 ][ 0 ] = Round2( aboveRow[ -1 ] + 2 * leftCol[ 0 ] + leftCol[ 1 ], 2 )
  487. predicted_sample_at(2, 0) = rounded_right_shift(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2);
  488. // 5. pred[ i ][ 0 ] = Round2( leftCol[ i - 3 ] + 2 * leftCol[ i - 2 ] + leftCol[ i - 1 ], 2 ) for i = 3..size-1
  489. for (auto i = 3u; i < block_size; i++)
  490. predicted_sample_at(i, 0) = rounded_right_shift(left_column[i - 3] + 2 * left_column[i - 2] + left_column[i - 1], 2);
  491. // 6. pred[ i ][ j ] = pred[ i - 2 ][ j - 1 ] for i = 2..size-1, for j = 1..size-1
  492. for (auto i = 2u; i < block_size; i++) {
  493. for (auto j = 1u; j < block_size; j++)
  494. predicted_sample_at(i, j) = predicted_sample_at(i - 2, j - 1);
  495. }
  496. break;
  497. case PredictionMode::D135Pred:
  498. // Otherwise if mode is equal to D135_PRED, the following applies:
  499. // 1. pred[ 0 ][ 0 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 )
  500. predicted_sample_at(0, 0) = rounded_right_shift(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2);
  501. // 2. pred[ 0 ][ j ] = Round2( aboveRow[ j - 2 ] + 2 * aboveRow[ j - 1 ] + aboveRow[ j ], 2 ) for j = 1..size-1
  502. for (auto j = 1; j < block_size; j++)
  503. predicted_sample_at(0, j) = rounded_right_shift(above_row_at(j - 2) + 2 * above_row_at(j - 1) + above_row_at(j), 2);
  504. // 3. pred[ 1 ][ 0 ] = Round2( aboveRow [ -1 ] + 2 * leftCol[ 0 ] + leftCol[ 1 ], 2 ) for i = 1..size-1
  505. predicted_sample_at(1, 0) = rounded_right_shift(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2);
  506. // 4. pred[ i ][ 0 ] = Round2( leftCol[ i - 2 ] + 2 * leftCol[ i - 1 ] + leftCol[ i ], 2 ) for i = 2..size-1
  507. for (auto i = 2u; i < block_size; i++)
  508. predicted_sample_at(i, 0) = rounded_right_shift(left_column[i - 2] + 2 * left_column[i - 1] + left_column[i], 2);
  509. // 5. pred[ i ][ j ] = pred[ i - 1 ][ j - 1 ] for i = 1..size-1, for j = 1..size-1
  510. for (auto i = 1u; i < block_size; i++) {
  511. for (auto j = 1; j < block_size; j++)
  512. predicted_sample_at(i, j) = predicted_sample_at(i - 1, j - 1);
  513. }
  514. break;
  515. case PredictionMode::D153Pred:
  516. // Otherwise if mode is equal to D153_PRED, the following applies:
  517. // 1. pred[ 0 ][ 0 ] = Round2( leftCol[ 0 ] + aboveRow[ -1 ], 1 )
  518. predicted_sample_at(0, 0) = rounded_right_shift(left_column[0] + above_row_at(-1), 1);
  519. // 2. pred[ i ][ 0 ] = Round2( leftCol[ i - 1] + leftCol[ i ], 1 ) for i = 1..size-1
  520. for (auto i = 1u; i < block_size; i++)
  521. predicted_sample_at(i, 0) = rounded_right_shift(left_column[i - 1] + left_column[i], 1);
  522. // 3. pred[ 0 ][ 1 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 )
  523. predicted_sample_at(0, 1) = rounded_right_shift(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2);
  524. // 4. pred[ 1 ][ 1 ] = Round2( aboveRow[ -1 ] + 2 * leftCol [ 0 ] + leftCol [ 1 ], 2 )
  525. predicted_sample_at(1, 1) = rounded_right_shift(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2);
  526. // 5. pred[ i ][ 1 ] = Round2( leftCol[ i - 2 ] + 2 * leftCol[ i - 1 ] + leftCol[ i ], 2 ) for i = 2..size-1
  527. for (auto i = 2u; i < block_size; i++)
  528. predicted_sample_at(i, 1) = rounded_right_shift(left_column[i - 2] + 2 * left_column[i - 1] + left_column[i], 2);
  529. // 6. pred[ 0 ][ j ] = Round2( aboveRow[ j - 3 ] + 2 * aboveRow[ j - 2 ] + aboveRow[ j - 1 ], 2 ) for j = 2..size-1
  530. for (auto j = 2; j < block_size; j++)
  531. predicted_sample_at(0, j) = rounded_right_shift(above_row_at(j - 3) + 2 * above_row_at(j - 2) + above_row_at(j - 1), 2);
  532. // 7. pred[ i ][ j ] = pred[ i - 1 ][ j - 2 ] for i = 1..size-1, for j = 2..size-1
  533. for (auto i = 1u; i < block_size; i++) {
  534. for (auto j = 2u; j < block_size; j++)
  535. predicted_sample_at(i, j) = predicted_sample_at(i - 1, j - 2);
  536. }
  537. break;
  538. case PredictionMode::TmPred:
  539. // Otherwise if mode is equal to TM_PRED,
  540. // pred[ i ][ j ] is set equal to Clip1( aboveRow[ j ] + leftCol[ i ] - aboveRow[ -1 ] )
  541. // for i = 0..size-1, for j = 0..size-1.
  542. for (auto i = 0u; i < block_size; i++) {
  543. for (auto j = 0u; j < block_size; j++)
  544. predicted_sample_at(i, j) = clip_1(block_context.frame_context.color_config.bit_depth, above_row_at(j) + left_column[i] - above_row_at(-1));
  545. }
  546. break;
  547. case PredictionMode::DcPred: {
  548. Intermediate average = 0;
  549. if (have_left && have_above) {
  550. // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 1 and haveAbove is equal to 1,
  551. // The variable avg (the average of the samples in union of aboveRow and leftCol)
  552. // is specified as follows:
  553. // sum = 0
  554. // for ( k = 0; k < size; k++ ) {
  555. // sum += leftCol[ k ]
  556. // sum += aboveRow[ k ]
  557. // }
  558. // avg = (sum + size) >> (log2Size + 1)
  559. Intermediate sum = 0;
  560. for (auto k = 0u; k < block_size; k++) {
  561. sum += left_column[k];
  562. sum += above_row_at(k);
  563. }
  564. average = (sum + block_size) >> (log2_of_block_size + 1);
  565. } else if (have_left && !have_above) {
  566. // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 1 and haveAbove is equal to 0,
  567. // The variable leftAvg is specified as follows:
  568. // sum = 0
  569. // for ( k = 0; k < size; k++ ) {
  570. // sum += leftCol[ k ]
  571. // }
  572. // leftAvg = (sum + (1 << (log2Size - 1) ) ) >> log2Size
  573. Intermediate sum = 0;
  574. for (auto k = 0u; k < block_size; k++)
  575. sum += left_column[k];
  576. average = (sum + (1 << (log2_of_block_size - 1))) >> log2_of_block_size;
  577. } else if (!have_left && have_above) {
  578. // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 0 and haveAbove is equal to 1,
  579. // The variable aboveAvg is specified as follows:
  580. // sum = 0
  581. // for ( k = 0; k < size; k++ ) {
  582. // sum += aboveRow[ k ]
  583. // }
  584. // aboveAvg = (sum + (1 << (log2Size - 1) ) ) >> log2Size
  585. Intermediate sum = 0;
  586. for (auto k = 0u; k < block_size; k++)
  587. sum += above_row_at(k);
  588. average = (sum + (1 << (log2_of_block_size - 1))) >> log2_of_block_size;
  589. } else {
  590. // Otherwise (mode is DC_PRED),
  591. // pred[ i ][ j ] is set equal to 1<<(BitDepth - 1) with i = 0..size-1 and j = 0..size-1.
  592. average = 1 << (block_context.frame_context.color_config.bit_depth - 1);
  593. }
  594. // pred[ i ][ j ] is set equal to avg with i = 0..size-1 and j = 0..size-1.
  595. for (auto i = 0u; i < block_size; i++) {
  596. for (auto j = 0u; j < block_size; j++)
  597. predicted_sample_at(i, j) = average;
  598. }
  599. break;
  600. }
  601. default:
  602. dbgln("Unknown prediction mode {}", static_cast<u8>(mode));
  603. VERIFY_NOT_REACHED();
  604. }
  605. // The current frame is updated as follows:
  606. // − CurrFrame[ plane ][ y + i ][ x + j ] is set equal to pred[ i ][ j ] for i = 0..size-1 and j = 0..size-1.
  607. auto width_in_frame_buffer = min(static_cast<u32>(block_size), max_x - x + 1);
  608. auto height_in_frame_buffer = min(static_cast<u32>(block_size), max_y - y + 1);
  609. for (auto i = 0u; i < height_in_frame_buffer; i++) {
  610. for (auto j = 0u; j < width_in_frame_buffer; j++)
  611. frame_buffer_at(y + i, x + j) = predicted_sample_at(i, j);
  612. }
  613. return {};
  614. }
  615. MotionVector Decoder::select_motion_vector(u8 plane, BlockContext const& block_context, ReferenceIndex reference_index, u32 block_index)
  616. {
  617. // The inputs to this process are:
  618. // − a variable plane specifying which plane is being predicted,
  619. // − a variable refList specifying that we should select the motion vector from BlockMvs[ refList ],
  620. // − a variable blockIdx, specifying how much of the block has already been predicted in units of 4x4 samples.
  621. // The output of this process is a 2 element array called mv containing the motion vector for this block.
  622. // The purpose of this process is to find the motion vector for this block. Motion vectors are specified for each
  623. // luma block, but a chroma block may cover more than one luma block due to subsampling. In this case, an
  624. // average motion vector is constructed for the chroma block.
  625. // The functions round_mv_comp_q2 and round_mv_comp_q4 perform division with rounding to the nearest
  626. // integer and are specified as:
  627. auto round_mv_comp_q2 = [&](MotionVector in) {
  628. // return (value < 0 ? value - 1 : value + 1) / 2
  629. return MotionVector {
  630. (in.row() < 0 ? in.row() - 1 : in.row() + 1) >> 1,
  631. (in.column() < 0 ? in.column() - 1 : in.column() + 1) >> 1
  632. };
  633. };
  634. auto round_mv_comp_q4 = [&](MotionVector in) {
  635. // return (value < 0 ? value - 2 : value + 2) / 4
  636. return MotionVector {
  637. (in.row() < 0 ? in.row() - 2 : in.row() + 2) >> 2,
  638. (in.column() < 0 ? in.column() - 2 : in.column() + 2) >> 2
  639. };
  640. };
  641. auto vectors = block_context.sub_block_motion_vectors;
  642. // The motion vector array mv is derived as follows:
  643. // − If plane is equal to 0, or MiSize is greater than or equal to BLOCK_8X8, mv is set equal to
  644. // BlockMvs[ refList ][ blockIdx ].
  645. if (plane == 0 || block_context.size >= Block_8x8)
  646. return vectors[block_index][reference_index];
  647. // − Otherwise, if subsampling_x is equal to 0 and subsampling_y is equal to 0, mv is set equal to
  648. // BlockMvs[ refList ][ blockIdx ].
  649. if (!block_context.frame_context.color_config.subsampling_x && !block_context.frame_context.color_config.subsampling_y)
  650. return vectors[block_index][reference_index];
  651. // − Otherwise, if subsampling_x is equal to 0 and subsampling_y is equal to 1, mv[ comp ] is set equal to
  652. // round_mv_comp_q2( BlockMvs[ refList ][ blockIdx ][ comp ] + BlockMvs[ refList ][ blockIdx + 2 ][ comp ] )
  653. // for comp = 0..1.
  654. if (!block_context.frame_context.color_config.subsampling_x && block_context.frame_context.color_config.subsampling_y)
  655. return round_mv_comp_q2(vectors[block_index][reference_index] + vectors[block_index + 2][reference_index]);
  656. // − Otherwise, if subsampling_x is equal to 1 and subsampling_y is equal to 0, mv[ comp ] is set equal to
  657. // round_mv_comp_q2( BlockMvs[ refList ][ blockIdx ][ comp ] + BlockMvs[ refList ][ blockIdx + 1 ][ comp ] )
  658. // for comp = 0..1.
  659. if (block_context.frame_context.color_config.subsampling_x && !block_context.frame_context.color_config.subsampling_y)
  660. return round_mv_comp_q2(vectors[block_index][reference_index] + vectors[block_index + 1][reference_index]);
  661. // − Otherwise, (subsampling_x is equal to 1 and subsampling_y is equal to 1), mv[ comp ] is set equal to
  662. // round_mv_comp_q4( BlockMvs[ refList ][ 0 ][ comp ] + BlockMvs[ refList ][ 1 ][ comp ] +
  663. // BlockMvs[ refList ][ 2 ][ comp ] + BlockMvs[ refList ][ 3 ][ comp ] ) for comp = 0..1.
  664. VERIFY(block_context.frame_context.color_config.subsampling_x && block_context.frame_context.color_config.subsampling_y);
  665. return round_mv_comp_q4(vectors[0][reference_index] + vectors[1][reference_index]
  666. + vectors[2][reference_index] + vectors[3][reference_index]);
  667. }
  668. MotionVector Decoder::clamp_motion_vector(u8 plane, BlockContext const& block_context, u32 block_row, u32 block_column, MotionVector vector)
  669. {
  670. // FIXME: This function is named very similarly to Parser::clamp_mv. Rename one or the other?
  671. // The purpose of this process is to change the motion vector into the appropriate precision for the current plane
  672. // and to clamp motion vectors that go too far off the edge of the frame.
  673. // The variables sx and sy are set equal to the subsampling for the current plane as follows:
  674. // − If plane is equal to 0, sx is set equal to 0 and sy is set equal to 0.
  675. // − Otherwise, sx is set equal to subsampling_x and sy is set equal to subsampling_y.
  676. bool subsampling_x = plane > 0 ? block_context.frame_context.color_config.subsampling_x : false;
  677. bool subsampling_y = plane > 0 ? block_context.frame_context.color_config.subsampling_y : false;
  678. // The output array clampedMv is specified by the following steps:
  679. i32 blocks_high = num_8x8_blocks_high_lookup[block_context.size];
  680. // Casts must be done here to prevent subtraction underflow from wrapping the values.
  681. i32 mb_to_top_edge = -(static_cast<i32>(block_row * MI_SIZE) * 16) >> subsampling_y;
  682. i32 mb_to_bottom_edge = (((static_cast<i32>(block_context.frame_context.rows()) - blocks_high - static_cast<i32>(block_row)) * MI_SIZE) * 16) >> subsampling_y;
  683. i32 blocks_wide = num_8x8_blocks_wide_lookup[block_context.size];
  684. i32 mb_to_left_edge = -(static_cast<i32>(block_column * MI_SIZE) * 16) >> subsampling_x;
  685. i32 mb_to_right_edge = (((static_cast<i32>(block_context.frame_context.columns()) - blocks_wide - static_cast<i32>(block_column)) * MI_SIZE) * 16) >> subsampling_x;
  686. i32 subpel_left = (INTERP_EXTEND + ((blocks_wide * MI_SIZE) >> subsampling_x)) << SUBPEL_BITS;
  687. i32 subpel_right = subpel_left - SUBPEL_SHIFTS;
  688. i32 subpel_top = (INTERP_EXTEND + ((blocks_high * MI_SIZE) >> subsampling_y)) << SUBPEL_BITS;
  689. i32 subpel_bottom = subpel_top - SUBPEL_SHIFTS;
  690. return {
  691. clip_3(mb_to_top_edge - subpel_top, mb_to_bottom_edge + subpel_bottom, (2 * vector.row()) >> subsampling_y),
  692. clip_3(mb_to_left_edge - subpel_left, mb_to_right_edge + subpel_right, (2 * vector.column()) >> subsampling_x)
  693. };
  694. }
  695. static constexpr i32 maximum_scaled_step = 80;
  696. DecoderErrorOr<void> Decoder::prepare_referenced_frame(Gfx::Size<u32> frame_size, u8 reference_frame_index)
  697. {
  698. ReferenceFrame& reference_frame = m_parser->m_reference_frames[reference_frame_index];
  699. // 8.5.2.3 Motion vector scaling process
  700. // The inputs to this process are:
  701. // − a variable plane specifying which plane is being predicted,
  702. // − a variable refList specifying that we should scale to match reference frame ref_frame[ refList ],
  703. // − variables x and y specifying the location of the top left sample in the CurrFrame[ plane ] array of the region
  704. // to be predicted,
  705. // − a variable clampedMv specifying the clamped motion vector.
  706. // The outputs of this process are the variables startX and startY giving the reference block location in units of
  707. // 1/16 th of a sample, and variables xStep and yStep giving the step size in units of 1/16 th of a sample.
  708. // This process is responsible for computing the sampling locations in the reference frame based on the motion
  709. // vector. The sampling locations are also adjusted to compensate for any difference in the size of the reference
  710. // frame compared to the current frame.
  711. // It is a requirement of bitstream conformance that all the following conditions are satisfied:
  712. // − 2 * FrameWidth >= RefFrameWidth[ refIdx ]
  713. // − 2 * FrameHeight >= RefFrameHeight[ refIdx ]
  714. // − FrameWidth <= 16 * RefFrameWidth[ refIdx ]
  715. // − FrameHeight <= 16 * RefFrameHeight[ refIdx ]
  716. if (!reference_frame.is_valid())
  717. return DecoderError::format(DecoderErrorCategory::Corrupted, "Attempted to use reference frame {} that has not been saved", reference_frame_index);
  718. auto double_frame_size = frame_size.scaled_by(2);
  719. if (double_frame_size.width() < reference_frame.size.width() || double_frame_size.height() < reference_frame.size.height())
  720. return DecoderError::format(DecoderErrorCategory::Corrupted, "Inter frame size is too small relative to reference frame {}", reference_frame_index);
  721. if (!reference_frame.size.scaled_by(16).contains(frame_size))
  722. return DecoderError::format(DecoderErrorCategory::Corrupted, "Inter frame size is too large relative to reference frame {}", reference_frame_index);
  723. // FIXME: Convert all the operations in this function to vector operations supported by
  724. // MotionVector.
  725. // A variable xScale is set equal to (RefFrameWidth[ refIdx ] << REF_SCALE_SHIFT) / FrameWidth.
  726. // A variable yScale is set equal to (RefFrameHeight[ refIdx ] << REF_SCALE_SHIFT) / FrameHeight.
  727. // (xScale and yScale specify the size of the reference frame relative to the current frame in units where 16 is
  728. // equivalent to the reference frame having the same size.)
  729. i32 x_scale = (reference_frame.size.width() << REF_SCALE_SHIFT) / frame_size.width();
  730. i32 y_scale = (reference_frame.size.height() << REF_SCALE_SHIFT) / frame_size.height();
  731. // The output variable stepX is set equal to (16 * xScale) >> REF_SCALE_SHIFT.
  732. // The output variable stepY is set equal to (16 * yScale) >> REF_SCALE_SHIFT.
  733. i32 scaled_step_x = (16 * x_scale) >> REF_SCALE_SHIFT;
  734. i32 scaled_step_y = (16 * y_scale) >> REF_SCALE_SHIFT;
  735. // 5. The block inter prediction process in section 8.5.2.4 is invoked with plane, refList, startX, startY, stepX,
  736. // stepY, w, h as inputs and the output is assigned to the 2D array preds[ refList ].
  737. // 8.5.2.4 Block inter prediction process
  738. // The inputs to this process are:
  739. // − a variable plane,
  740. // − a variable refList specifying that we should predict from ref_frame[ refList ],
  741. // − variables x and y giving the block location in units of 1/16 th of a sample,
  742. // − variables xStep and yStep giving the step size in units of 1/16 th of a sample. (These will be at most equal
  743. // to 80 due to the restrictions on scaling between reference frames.)
  744. VERIFY(scaled_step_x <= maximum_scaled_step && scaled_step_y <= maximum_scaled_step);
  745. // − variables w and h giving the width and height of the block in units of samples
  746. // The output from this process is the 2D array named pred containing inter predicted samples.
  747. reference_frame.x_scale = x_scale;
  748. reference_frame.y_scale = x_scale;
  749. reference_frame.scaled_step_x = scaled_step_x;
  750. reference_frame.scaled_step_y = scaled_step_y;
  751. return {};
  752. }
  753. DecoderErrorOr<void> Decoder::predict_inter_block(u8 plane, BlockContext const& block_context, ReferenceIndex reference_index, u32 block_row, u32 block_column, u32 x, u32 y, u32 width, u32 height, u32 block_index, Span<u16> block_buffer)
  754. {
  755. VERIFY(width <= maximum_block_dimensions && height <= maximum_block_dimensions);
  756. // 2. The motion vector selection process in section 8.5.2.1 is invoked with plane, refList, blockIdx as inputs
  757. // and the output being the motion vector mv.
  758. auto motion_vector = select_motion_vector(plane, block_context, reference_index, block_index);
  759. // 3. The motion vector clamping process in section 8.5.2.2 is invoked with plane, mv as inputs and the output
  760. // being the clamped motion vector clampedMv
  761. auto clamped_vector = clamp_motion_vector(plane, block_context, block_row, block_column, motion_vector);
  762. // 4. The motion vector scaling process in section 8.5.2.3 is invoked with plane, refList, x, y, clampedMv as
  763. // inputs and the output being the initial location startX, startY, and the step sizes stepX, stepY.
  764. // 8.5.2.3 Motion vector scaling process
  765. // The inputs to this process are:
  766. // − a variable plane specifying which plane is being predicted,
  767. // − a variable refList specifying that we should scale to match reference frame ref_frame[ refList ],
  768. // − variables x and y specifying the location of the top left sample in the CurrFrame[ plane ] array of the region
  769. // to be predicted,
  770. // − a variable clampedMv specifying the clamped motion vector.
  771. // The outputs of this process are the variables startX and startY giving the reference block location in units of
  772. // 1/16 th of a sample, and variables xStep and yStep giving the step size in units of 1/16 th of a sample.
  773. // This process is responsible for computing the sampling locations in the reference frame based on the motion
  774. // vector. The sampling locations are also adjusted to compensate for any difference in the size of the reference
  775. // frame compared to the current frame.
  776. // NOTE: Some of this is done in advance by Decoder::prepare_referenced_frame().
  777. // A variable refIdx specifying which reference frame is being used is set equal to
  778. // ref_frame_idx[ ref_frame[ refList ] - LAST_FRAME ].
  779. auto reference_frame_index = block_context.frame_context.reference_frame_indices[block_context.reference_frame_types[reference_index] - ReferenceFrameType::LastFrame];
  780. auto const& reference_frame = m_parser->m_reference_frames[reference_frame_index];
  781. auto x_scale = reference_frame.x_scale;
  782. auto y_scale = reference_frame.x_scale;
  783. auto scaled_step_x = reference_frame.scaled_step_x;
  784. auto scaled_step_y = reference_frame.scaled_step_y;
  785. // The variable baseX is set equal to (x * xScale) >> REF_SCALE_SHIFT.
  786. // The variable baseY is set equal to (y * yScale) >> REF_SCALE_SHIFT.
  787. // (baseX and baseY specify the location of the block in the reference frame if a zero motion vector is used).
  788. i32 base_x = (x * x_scale) >> REF_SCALE_SHIFT;
  789. i32 base_y = (y * y_scale) >> REF_SCALE_SHIFT;
  790. // The variable lumaX is set equal to (plane > 0) ? x << subsampling_x : x.
  791. // The variable lumaY is set equal to (plane > 0) ? y << subsampling_y : y.
  792. // (lumaX and lumaY specify the location of the block to be predicted in the current frame in units of luma
  793. // samples.)
  794. bool subsampling_x = plane > 0 ? block_context.frame_context.color_config.subsampling_x : false;
  795. bool subsampling_y = plane > 0 ? block_context.frame_context.color_config.subsampling_y : false;
  796. i32 luma_x = x << subsampling_x;
  797. i32 luma_y = y << subsampling_y;
  798. // The variable fracX is set equal to ( (16 * lumaX * xScale) >> REF_SCALE_SHIFT) & SUBPEL_MASK.
  799. // The variable fracY is set equal to ( (16 * lumaY * yScale) >> REF_SCALE_SHIFT) & SUBPEL_MASK.
  800. i32 frac_x = ((16 * luma_x * x_scale) >> REF_SCALE_SHIFT) & SUBPEL_MASK;
  801. i32 frac_y = ((16 * luma_y * y_scale) >> REF_SCALE_SHIFT) & SUBPEL_MASK;
  802. // The variable dX is set equal to ( (clampedMv[ 1 ] * xScale) >> REF_SCALE_SHIFT) + fracX.
  803. // The variable dY is set equal to ( (clampedMv[ 0 ] * yScale) >> REF_SCALE_SHIFT) + fracY.
  804. // (dX and dY specify a scaled motion vector.)
  805. i32 scaled_vector_x = ((clamped_vector.column() * x_scale) >> REF_SCALE_SHIFT) + frac_x;
  806. i32 scaled_vector_y = ((clamped_vector.row() * y_scale) >> REF_SCALE_SHIFT) + frac_y;
  807. // The output variable startX is set equal to (baseX << SUBPEL_BITS) + dX.
  808. // The output variable startY is set equal to (baseY << SUBPEL_BITS) + dY.
  809. i32 offset_scaled_block_x = (base_x << SUBPEL_BITS) + scaled_vector_x;
  810. i32 offset_scaled_block_y = (base_y << SUBPEL_BITS) + scaled_vector_y;
  811. // A variable ref specifying the reference frame contents is set equal to FrameStore[ refIdx ].
  812. auto& reference_frame_buffer = reference_frame.frame_planes[plane];
  813. auto reference_frame_width = (reference_frame.size.width() >> subsampling_x) + (MV_BORDER * 2);
  814. auto block_buffer_at = [&](u32 row, u32 column) -> u16& {
  815. return block_buffer[row * width + column];
  816. };
  817. // The variable lastX is set equal to ( (RefFrameWidth[ refIdx ] + subX) >> subX) - 1.
  818. // The variable lastY is set equal to ( (RefFrameHeight[ refIdx ] + subY) >> subY) - 1.
  819. // (lastX and lastY specify the coordinates of the bottom right sample of the reference plane.)
  820. // Ad-hoc: These variables are not needed, since the reference frame is expanded to contain the samples that
  821. // may be referenced by motion vectors on the edge of the frame.
  822. // The variable intermediateHeight specifying the height required for the intermediate array is set equal to (((h -
  823. // 1) * yStep + 15) >> 4) + 8.
  824. static constexpr auto maximum_intermediate_height = (((maximum_block_dimensions - 1) * maximum_scaled_step + 15) >> 4) + 8;
  825. auto intermediate_height = (((height - 1) * scaled_step_y + 15) >> 4) + 8;
  826. VERIFY(intermediate_height <= maximum_intermediate_height);
  827. // The sub-sample interpolation is effected via two one-dimensional convolutions. First a horizontal filter is used
  828. // to build up a temporary array, and then this array is vertically filtered to obtain the final prediction. The
  829. // fractional parts of the motion vectors determine the filtering process. If the fractional part is zero, then the
  830. // filtering is equivalent to a straight sample copy.
  831. // The filtering is applied as follows:
  832. // The array intermediate is specified as follows:
  833. // Note: Height is specified by `intermediate_height`, width is specified by `width`
  834. Array<u16, maximum_intermediate_height * maximum_block_dimensions> intermediate_buffer;
  835. auto intermediate_buffer_at = [&](u32 row, u32 column) -> u16& {
  836. return intermediate_buffer[row * width + column];
  837. };
  838. // Check our reference frame bounds before starting the loop.
  839. auto last_possible_reference = (MV_BORDER + (offset_scaled_block_y >> 4) + static_cast<i32>(intermediate_height - 1) - 3) * reference_frame_width;
  840. VERIFY(reference_frame_buffer.size() >= last_possible_reference);
  841. for (auto row = 0u; row < intermediate_height; row++) {
  842. auto reference_row = (offset_scaled_block_y >> 4) + static_cast<i32>(row) - 3;
  843. u16 const* scan_line = &reference_frame_buffer[static_cast<size_t>(MV_BORDER + reference_row) * reference_frame_width];
  844. for (auto column = 0u; column < width; column++) {
  845. auto samples_start = offset_scaled_block_x + static_cast<i32>(scaled_step_x * column);
  846. i32 accumulated_samples = 0;
  847. for (auto t = 0u; t < 8u; t++) {
  848. auto sample = scan_line[MV_BORDER + (samples_start >> 4) + static_cast<i32>(t) - 3];
  849. accumulated_samples += subpel_filters[block_context.interpolation_filter][samples_start & 15][t] * sample;
  850. }
  851. intermediate_buffer_at(row, column) = clip_1(block_context.frame_context.color_config.bit_depth, rounded_right_shift(accumulated_samples, 7));
  852. }
  853. }
  854. for (auto row = 0u; row < height; row++) {
  855. for (auto column = 0u; column < width; column++) {
  856. auto samples_start = (offset_scaled_block_y & 15) + static_cast<i32>(scaled_step_y * row);
  857. auto const* scan_column = &intermediate_buffer_at(samples_start >> 4, column);
  858. auto const* subpel_filters_for_samples = subpel_filters[block_context.interpolation_filter][samples_start & 15];
  859. i32 accumulated_samples = 0;
  860. for (auto t = 0u; t < 8u; t++) {
  861. auto sample = *scan_column;
  862. accumulated_samples += subpel_filters_for_samples[t] * sample;
  863. scan_column += width;
  864. }
  865. block_buffer_at(row, column) = clip_1(block_context.frame_context.color_config.bit_depth, rounded_right_shift(accumulated_samples, 7));
  866. }
  867. }
  868. return {};
  869. }
  870. DecoderErrorOr<void> Decoder::predict_inter(u8 plane, BlockContext const& block_context, u32 x, u32 y, u32 width, u32 height, u32 block_index)
  871. {
  872. // The inter prediction process is invoked for inter coded blocks. When MiSize is smaller than BLOCK_8X8, the
  873. // prediction is done with a granularity of 4x4 samples, otherwise the whole plane is predicted at the same time.
  874. // The inputs to this process are:
  875. // − a variable plane specifying which plane is being predicted,
  876. // − variables x and y specifying the location of the top left sample in the CurrFrame[ plane ] array of the region
  877. // to be predicted,
  878. // − variables w and h specifying the width and height of the region to be predicted,
  879. // − a variable blockIdx, specifying how much of the block has already been predicted in units of 4x4 samples.
  880. // The outputs of this process are inter predicted samples in the current frame CurrFrame.
  881. // The prediction arrays are formed by the following ordered steps:
  882. // 1. The variable refList is set equal to 0.
  883. // 2. through 5.
  884. Array<u16, maximum_block_size> predicted_buffer;
  885. auto predicted_span = predicted_buffer.span().trim(width * height);
  886. TRY(predict_inter_block(plane, block_context, ReferenceIndex::Primary, block_context.row, block_context.column, x, y, width, height, block_index, predicted_span));
  887. auto predicted_buffer_at = [&](Span<u16> buffer, u32 row, u32 column) -> u16& {
  888. return buffer[row * width + column];
  889. };
  890. // 6. If isCompound is equal to 1, then the variable refList is set equal to 1 and steps 2, 3, 4 and 5 are repeated
  891. // to form the prediction for the second reference.
  892. // The inter predicted samples are then derived as follows:
  893. auto& frame_buffer = get_output_buffer(plane);
  894. VERIFY(!frame_buffer.is_empty());
  895. auto frame_width = (block_context.frame_context.columns() * 8u) >> (plane > 0 ? block_context.frame_context.color_config.subsampling_x : false);
  896. auto frame_height = (block_context.frame_context.rows() * 8u) >> (plane > 0 ? block_context.frame_context.color_config.subsampling_y : false);
  897. auto frame_buffer_at = [&](u32 row, u32 column) -> u16& {
  898. return frame_buffer[row * frame_width + column];
  899. };
  900. auto width_in_frame_buffer = min(width, frame_width - x);
  901. auto height_in_frame_buffer = min(height, frame_height - y);
  902. // The variable isCompound is set equal to ref_frame[ 1 ] > NONE.
  903. // − If isCompound is equal to 0, CurrFrame[ plane ][ y + i ][ x + j ] is set equal to preds[ 0 ][ i ][ j ] for i = 0..h-1
  904. // and j = 0..w-1.
  905. if (!block_context.is_compound()) {
  906. for (auto i = 0u; i < height_in_frame_buffer; i++) {
  907. for (auto j = 0u; j < width_in_frame_buffer; j++)
  908. frame_buffer_at(y + i, x + j) = predicted_buffer_at(predicted_span, i, j);
  909. }
  910. return {};
  911. }
  912. // − Otherwise, CurrFrame[ plane ][ y + i ][ x + j ] is set equal to Round2( preds[ 0 ][ i ][ j ] + preds[ 1 ][ i ][ j ], 1 )
  913. // for i = 0..h-1 and j = 0..w-1.
  914. Array<u16, maximum_block_size> second_predicted_buffer;
  915. auto second_predicted_span = second_predicted_buffer.span().trim(width * height);
  916. TRY(predict_inter_block(plane, block_context, ReferenceIndex::Secondary, block_context.row, block_context.column, x, y, width, height, block_index, second_predicted_span));
  917. for (auto i = 0u; i < height_in_frame_buffer; i++) {
  918. for (auto j = 0u; j < width_in_frame_buffer; j++)
  919. frame_buffer_at(y + i, x + j) = rounded_right_shift(predicted_buffer_at(predicted_span, i, j) + predicted_buffer_at(second_predicted_span, i, j), 1);
  920. }
  921. return {};
  922. }
  923. inline u16 dc_q(u8 bit_depth, u8 b)
  924. {
  925. // The function dc_q( b ) is specified as dc_qlookup[ (BitDepth-8) >> 1 ][ Clip3( 0, 255, b ) ] where dc_lookup is
  926. // defined as follows:
  927. constexpr u16 dc_qlookup[3][256] = {
  928. { 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53, 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88, 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110, 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247, 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300, 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364, 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441, 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549, 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336 },
  929. { 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132, 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182, 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230, 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387, 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466, 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567, 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347 },
  930. { 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237, 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405, 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580, 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752, 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919, 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387 }
  931. };
  932. return dc_qlookup[(bit_depth - 8) >> 1][clip_3<u8>(0, 255, b)];
  933. }
  934. inline u16 ac_q(u8 bit_depth, u8 b)
  935. {
  936. // The function ac_q( b ) is specified as ac_qlookup[ (BitDepth-8) >> 1 ][ Clip3( 0, 255, b ) ] where ac_lookup is
  937. // defined as follows:
  938. constexpr u16 ac_qlookup[3][256] = {
  939. { 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179, 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353, 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448, 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571, 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729, 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933, 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828 },
  940. { 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40, 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92, 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149, 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208, 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267, 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324, 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379, 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466, 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571, 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713, 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889, 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312 },
  941. { 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99, 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263, 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456, 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660, 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865, 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247 }
  942. };
  943. return ac_qlookup[(bit_depth - 8) >> 1][clip_3<u8>(0, 255, b)];
  944. }
  945. u8 Decoder::get_base_quantizer_index(BlockContext const& block_context)
  946. {
  947. // The function get_qindex( ) returns the quantizer index for the current block and is specified by the following:
  948. // − If seg_feature_active( SEG_LVL_ALT_Q ) is equal to 1 the following ordered steps apply:
  949. if (Parser::seg_feature_active(block_context, SEG_LVL_ALT_Q)) {
  950. // 1. Set the variable data equal to FeatureData[ segment_id ][ SEG_LVL_ALT_Q ].
  951. auto data = block_context.frame_context.segmentation_features[block_context.segment_id][SEG_LVL_ALT_Q].value;
  952. // 2. If segmentation_abs_or_delta_update is equal to 0, set data equal to base_q_idx + data
  953. if (!block_context.frame_context.should_use_absolute_segment_base_quantizer) {
  954. data += block_context.frame_context.base_quantizer_index;
  955. }
  956. // 3. Return Clip3( 0, 255, data ).
  957. return clip_3<u8>(0, 255, data);
  958. }
  959. // − Otherwise, return base_q_idx.
  960. return block_context.frame_context.base_quantizer_index;
  961. }
  962. u16 Decoder::get_dc_quantizer(BlockContext const& block_context, u8 plane)
  963. {
  964. // FIXME: The result of this function can be cached. This does not change per frame.
  965. // The function get_dc_quant( plane ) returns the quantizer value for the dc coefficient for a particular plane and
  966. // is derived as follows:
  967. // − If plane is equal to 0, return dc_q( get_qindex( ) + delta_q_y_dc ).
  968. // − Otherwise, return dc_q( get_qindex( ) + delta_q_uv_dc ).
  969. // Instead of if { return }, select the value to add and return.
  970. i8 offset = plane == 0 ? block_context.frame_context.y_dc_quantizer_index_delta : block_context.frame_context.uv_dc_quantizer_index_delta;
  971. return dc_q(block_context.frame_context.color_config.bit_depth, static_cast<u8>(get_base_quantizer_index(block_context) + offset));
  972. }
  973. u16 Decoder::get_ac_quantizer(BlockContext const& block_context, u8 plane)
  974. {
  975. // FIXME: The result of this function can be cached. This does not change per frame.
  976. // The function get_ac_quant( plane ) returns the quantizer value for the ac coefficient for a particular plane and
  977. // is derived as follows:
  978. // − If plane is equal to 0, return ac_q( get_qindex( ) ).
  979. // − Otherwise, return ac_q( get_qindex( ) + delta_q_uv_ac ).
  980. // Instead of if { return }, select the value to add and return.
  981. i8 offset = plane == 0 ? 0 : block_context.frame_context.uv_ac_quantizer_index_delta;
  982. return ac_q(block_context.frame_context.color_config.bit_depth, static_cast<u8>(get_base_quantizer_index(block_context) + offset));
  983. }
  984. DecoderErrorOr<void> Decoder::reconstruct(u8 plane, BlockContext const& block_context, u32 transform_block_x, u32 transform_block_y, TransformSize transform_block_size, TransformSet transform_set)
  985. {
  986. // 8.6.2 Reconstruct process
  987. // The variable dqDenom is set equal to 2 if txSz is equal to Transform_32X32, otherwise dqDenom is set equal to 1.
  988. Intermediate dq_denominator = transform_block_size == Transform_32x32 ? 2 : 1;
  989. // The variable n (specifying the base 2 logarithm of the width of the transform block) is set equal to 2 + txSz.
  990. u8 log2_of_block_size = 2u + transform_block_size;
  991. // The variable n0 (specifying the width of the transform block) is set equal to 1 << n.
  992. auto block_size = 1u << log2_of_block_size;
  993. // 1. Dequant[ i ][ j ] is set equal to ( Tokens[ i * n0 + j ] * get_ac_quant( plane ) ) / dqDenom
  994. // for i = 0..(n0-1), for j = 0..(n0-1)
  995. Array<Intermediate, maximum_transform_size> dequantized;
  996. Intermediate ac_quant = get_ac_quantizer(block_context, plane);
  997. for (auto i = 0u; i < block_size; i++) {
  998. for (auto j = 0u; j < block_size; j++) {
  999. auto index = index_from_row_and_column(i, j, block_size);
  1000. if (index == 0)
  1001. continue;
  1002. dequantized[index] = (block_context.residual_tokens[index] * ac_quant) / dq_denominator;
  1003. }
  1004. }
  1005. // 2. Dequant[ 0 ][ 0 ] is set equal to ( Tokens[ 0 ] * get_dc_quant( plane ) ) / dqDenom
  1006. dequantized[0] = (block_context.residual_tokens[0] * get_dc_quantizer(block_context, plane)) / dq_denominator;
  1007. // It is a requirement of bitstream conformance that the values written into the Dequant array in steps 1 and 2
  1008. // are representable by a signed integer with 8 + BitDepth bits.
  1009. // Note: Since bounds checks just ensure that we will not have resulting values that will overflow, it's non-fatal
  1010. // to allow these bounds to be violated. Therefore, we can avoid the performance cost here.
  1011. // 3. Invoke the 2D inverse transform block process defined in section 8.7.2 with the variable n as input.
  1012. // The inverse transform outputs are stored back to the Dequant buffer.
  1013. TRY(inverse_transform_2d(block_context, dequantized, log2_of_block_size, transform_set));
  1014. // 4. CurrFrame[ plane ][ y + i ][ x + j ] is set equal to Clip1( CurrFrame[ plane ][ y + i ][ x + j ] + Dequant[ i ][ j ] )
  1015. // for i = 0..(n0-1) and j = 0..(n0-1).
  1016. auto& current_buffer = get_output_buffer(plane);
  1017. auto subsampling_x = (plane > 0 ? block_context.frame_context.color_config.subsampling_x : 0);
  1018. auto subsampling_y = (plane > 0 ? block_context.frame_context.color_config.subsampling_y : 0);
  1019. auto frame_width = (block_context.frame_context.columns() * 8) >> subsampling_x;
  1020. auto frame_height = (block_context.frame_context.rows() * 8) >> subsampling_y;
  1021. auto width_in_frame_buffer = min(block_size, frame_width - transform_block_x);
  1022. auto height_in_frame_buffer = min(block_size, frame_height - transform_block_y);
  1023. for (auto i = 0u; i < height_in_frame_buffer; i++) {
  1024. for (auto j = 0u; j < width_in_frame_buffer; j++) {
  1025. auto index = index_from_row_and_column(transform_block_y + i, transform_block_x + j, frame_width);
  1026. auto dequantized_value = dequantized[index_from_row_and_column(i, j, block_size)];
  1027. current_buffer[index] = clip_1(block_context.frame_context.color_config.bit_depth, current_buffer[index] + dequantized_value);
  1028. }
  1029. }
  1030. return {};
  1031. }
  1032. inline DecoderErrorOr<void> Decoder::inverse_walsh_hadamard_transform(Span<Intermediate> data, u8 log2_of_block_size, u8 shift)
  1033. {
  1034. (void)data;
  1035. (void)shift;
  1036. // The input to this process is a variable shift that specifies the amount of pre-scaling.
  1037. // This process does an in-place transform of the array T (of length 4) by the following ordered steps:
  1038. if (1 << log2_of_block_size != 4)
  1039. return DecoderError::corrupted("Block size was not 4"sv);
  1040. return DecoderError::not_implemented();
  1041. }
  1042. inline i32 Decoder::cos64(u8 angle)
  1043. {
  1044. const i32 cos64_lookup[33] = { 16384, 16364, 16305, 16207, 16069, 15893, 15679, 15426, 15137, 14811, 14449, 14053, 13623, 13160, 12665, 12140, 11585, 11003, 10394, 9760, 9102, 8423, 7723, 7005, 6270, 5520, 4756, 3981, 3196, 2404, 1606, 804, 0 };
  1045. // 1. Set a variable angle2 equal to angle & 127.
  1046. angle &= 127;
  1047. // 2. If angle2 is greater than or equal to 0 and less than or equal to 32, return cos64_lookup[ angle2 ].
  1048. if (angle <= 32)
  1049. return cos64_lookup[angle];
  1050. // 3. If angle2 is greater than 32 and less than or equal to 64, return cos64_lookup[ 64 - angle2 ] * -1.
  1051. if (angle <= 64)
  1052. return -cos64_lookup[64 - angle];
  1053. // 4. If angle2 is greater than 64 and less than or equal to 96, return cos64_lookup[ angle2 - 64 ] * -1.
  1054. if (angle <= 96)
  1055. return -cos64_lookup[angle - 64];
  1056. // 5. Otherwise (if angle2 is greater than 96 and less than 128), return cos64_lookup[ 128 - angle2 ].
  1057. return cos64_lookup[128 - angle];
  1058. }
  1059. inline i32 Decoder::sin64(u8 angle)
  1060. {
  1061. if (angle < 32)
  1062. angle += 128;
  1063. return cos64(angle - 32u);
  1064. }
  1065. template<typename T>
  1066. inline i32 Decoder::rounded_right_shift(T value, u8 bits)
  1067. {
  1068. value = (value + static_cast<T>(1u << (bits - 1u))) >> bits;
  1069. return static_cast<i32>(value);
  1070. }
  1071. // (8.7.1.1) The function B( a, b, angle, 0 ) performs a butterfly rotation.
  1072. inline void Decoder::butterfly_rotation_in_place(Span<Intermediate> data, size_t index_a, size_t index_b, u8 angle, bool flip)
  1073. {
  1074. auto cos = cos64(angle);
  1075. auto sin = sin64(angle);
  1076. // 1. The variable x is set equal to T[ a ] * cos64( angle ) - T[ b ] * sin64( angle ).
  1077. i64 rotated_a = data[index_a] * cos - data[index_b] * sin;
  1078. // 2. The variable y is set equal to T[ a ] * sin64( angle ) + T[ b ] * cos64( angle ).
  1079. i64 rotated_b = data[index_a] * sin + data[index_b] * cos;
  1080. // 3. T[ a ] is set equal to Round2( x, 14 ).
  1081. data[index_a] = rounded_right_shift(rotated_a, 14);
  1082. // 4. T[ b ] is set equal to Round2( y, 14 ).
  1083. data[index_b] = rounded_right_shift(rotated_b, 14);
  1084. // The function B( a ,b, angle, 1 ) performs a butterfly rotation and flip specified by the following ordered steps:
  1085. // 1. The function B( a, b, angle, 0 ) is invoked.
  1086. // 2. The contents of T[ a ] and T[ b ] are exchanged.
  1087. if (flip)
  1088. swap(data[index_a], data[index_b]);
  1089. // It is a requirement of bitstream conformance that the values saved into the array T by this function are
  1090. // representable by a signed integer using 8 + BitDepth bits of precision.
  1091. // Note: Since bounds checks just ensure that we will not have resulting values that will overflow, it's non-fatal
  1092. // to allow these bounds to be violated. Therefore, we can avoid the performance cost here.
  1093. }
  1094. // (8.7.1.1) The function H( a, b, 0 ) performs a Hadamard rotation.
  1095. inline void Decoder::hadamard_rotation_in_place(Span<Intermediate> data, size_t index_a, size_t index_b, bool flip)
  1096. {
  1097. // The function H( a, b, 1 ) performs a Hadamard rotation with flipped indices and is specified as follows:
  1098. // 1. The function H( b, a, 0 ) is invoked.
  1099. if (flip)
  1100. swap(index_a, index_b);
  1101. // The function H( a, b, 0 ) performs a Hadamard rotation specified by the following ordered steps:
  1102. // 1. The variable x is set equal to T[ a ].
  1103. auto a_value = data[index_a];
  1104. // 2. The variable y is set equal to T[ b ].
  1105. auto b_value = data[index_b];
  1106. // 3. T[ a ] is set equal to x + y.
  1107. data[index_a] = a_value + b_value;
  1108. // 4. T[ b ] is set equal to x - y.
  1109. data[index_b] = a_value - b_value;
  1110. // It is a requirement of bitstream conformance that the values saved into the array T by this function are
  1111. // representable by a signed integer using 8 + BitDepth bits of precision.
  1112. // Note: Since bounds checks just ensure that we will not have resulting values that will overflow, it's non-fatal
  1113. // to allow these bounds to be violated. Therefore, we can avoid the performance cost here.
  1114. }
  1115. inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform_array_permutation(Span<Intermediate> data, u8 log2_of_block_size)
  1116. {
  1117. u8 block_size = 1 << log2_of_block_size;
  1118. // This process performs an in-place permutation of the array T of length 2^n for 2 ≤ n ≤ 5 which is required before
  1119. // execution of the inverse DCT process.
  1120. if (log2_of_block_size < 2 || log2_of_block_size > 5)
  1121. return DecoderError::corrupted("Block size was out of range"sv);
  1122. // 1.1. A temporary array named copyT is set equal to T.
  1123. Array<Intermediate, maximum_transform_size> data_copy;
  1124. AK::TypedTransfer<Intermediate>::copy(data_copy.data(), data.data(), block_size);
  1125. // 1.2. T[ i ] is set equal to copyT[ brev( n, i ) ] for i = 0..((1<<n) - 1).
  1126. for (auto i = 0u; i < block_size; i++)
  1127. data[i] = data_copy[brev(log2_of_block_size, i)];
  1128. return {};
  1129. }
  1130. inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform(Span<Intermediate> data, u8 log2_of_block_size)
  1131. {
  1132. // 2.1. The variable n0 is set equal to 1<<n.
  1133. u8 block_size = 1 << log2_of_block_size;
  1134. // 8.7.1.3 Inverse DCT process
  1135. // 2.2. The variable n1 is set equal to 1<<(n-1).
  1136. u8 half_block_size = block_size >> 1;
  1137. // 2.3 The variable n2 is set equal to 1<<(n-2).
  1138. u8 quarter_block_size = half_block_size >> 1;
  1139. // 2.4 The variable n3 is set equal to 1<<(n-3).
  1140. u8 eighth_block_size = quarter_block_size >> 1;
  1141. // 2.5 If n is equal to 2, invoke B( 0, 1, 16, 1 ), otherwise recursively invoke the inverse DCT defined in this
  1142. // section with the variable n set equal to n - 1.
  1143. if (log2_of_block_size == 2)
  1144. butterfly_rotation_in_place(data, 0, 1, 16, true);
  1145. else
  1146. TRY(inverse_discrete_cosine_transform(data, log2_of_block_size - 1));
  1147. // 2.6 Invoke B( n1+i, n0-1-i, 32-brev( 5, n1+i), 0 ) for i = 0..(n2-1).
  1148. for (auto i = 0u; i < quarter_block_size; i++) {
  1149. auto index = half_block_size + i;
  1150. butterfly_rotation_in_place(data, index, block_size - 1 - i, 32 - brev(5, index), false);
  1151. }
  1152. // 2.7 If n is greater than or equal to 3:
  1153. if (log2_of_block_size >= 3) {
  1154. // a. Invoke H( n1+4*i+2*j, n1+1+4*i+2*j, j ) for i = 0..(n3-1), j = 0..1.
  1155. for (auto i = 0u; i < eighth_block_size; i++) {
  1156. for (auto j = 0u; j < 2; j++) {
  1157. auto index = half_block_size + (4 * i) + (2 * j);
  1158. hadamard_rotation_in_place(data, index, index + 1, j);
  1159. }
  1160. }
  1161. }
  1162. // 4. If n is equal to 5:
  1163. if (log2_of_block_size == 5) {
  1164. // a. Invoke B( n0-n+3-n2*j-4*i, n1+n-4+n2*j+4*i, 28-16*i+56*j, 1 ) for i = 0..1, j = 0..1.
  1165. for (auto i = 0u; i < 2; i++) {
  1166. for (auto j = 0u; j < 2; j++) {
  1167. auto index_a = block_size - log2_of_block_size + 3 - (quarter_block_size * j) - (4 * i);
  1168. auto index_b = half_block_size + log2_of_block_size - 4 + (quarter_block_size * j) + (4 * i);
  1169. auto angle = 28 - (16 * i) + (56 * j);
  1170. butterfly_rotation_in_place(data, index_a, index_b, angle, true);
  1171. }
  1172. }
  1173. // b. Invoke H( n1+n3*j+i, n1+n2-5+n3*j-i, j&1 ) for i = 0..1, j = 0..3.
  1174. for (auto i = 0u; i < 2; i++) {
  1175. for (auto j = 0u; j < 4; j++) {
  1176. auto index_a = half_block_size + (eighth_block_size * j) + i;
  1177. auto index_b = half_block_size + quarter_block_size - 5 + (eighth_block_size * j) - i;
  1178. hadamard_rotation_in_place(data, index_a, index_b, (j & 1) != 0);
  1179. }
  1180. }
  1181. }
  1182. // 5. If n is greater than or equal to 4:
  1183. if (log2_of_block_size >= 4) {
  1184. // a. Invoke B( n0-n+2-i-n2*j, n1+n-3+i+n2*j, 24+48*j, 1 ) for i = 0..(n==5), j = 0..1.
  1185. for (auto i = 0u; i <= (log2_of_block_size == 5); i++) {
  1186. for (auto j = 0u; j < 2; j++) {
  1187. auto index_a = block_size - log2_of_block_size + 2 - i - (quarter_block_size * j);
  1188. auto index_b = half_block_size + log2_of_block_size - 3 + i + (quarter_block_size * j);
  1189. butterfly_rotation_in_place(data, index_a, index_b, 24 + (48 * j), true);
  1190. }
  1191. }
  1192. // b. Invoke H( n1+n2*j+i, n1+n2-1+n2*j-i, j&1 ) for i = 0..(2n-7), j = 0..1.
  1193. for (auto i = 0u; i < (2 * log2_of_block_size) - 6u; i++) {
  1194. for (auto j = 0u; j < 2; j++) {
  1195. auto index_a = half_block_size + (quarter_block_size * j) + i;
  1196. auto index_b = half_block_size + quarter_block_size - 1 + (quarter_block_size * j) - i;
  1197. hadamard_rotation_in_place(data, index_a, index_b, (j & 1) != 0);
  1198. }
  1199. }
  1200. }
  1201. // 6. If n is greater than or equal to 3:
  1202. if (log2_of_block_size >= 3) {
  1203. // a. Invoke B( n0-n3-1-i, n1+n3+i, 16, 1 ) for i = 0..(n3-1).
  1204. for (auto i = 0u; i < eighth_block_size; i++) {
  1205. auto index_a = block_size - eighth_block_size - 1 - i;
  1206. auto index_b = half_block_size + eighth_block_size + i;
  1207. butterfly_rotation_in_place(data, index_a, index_b, 16, true);
  1208. }
  1209. }
  1210. // 7. Invoke H( i, n0-1-i, 0 ) for i = 0..(n1-1).
  1211. for (auto i = 0u; i < half_block_size; i++)
  1212. hadamard_rotation_in_place(data, i, block_size - 1 - i, false);
  1213. return {};
  1214. }
  1215. inline void Decoder::inverse_asymmetric_discrete_sine_transform_input_array_permutation(Span<Intermediate> data, u8 log2_of_block_size)
  1216. {
  1217. // The variable n0 is set equal to 1<<n.
  1218. auto block_size = 1u << log2_of_block_size;
  1219. // The variable n1 is set equal to 1<<(n-1).
  1220. // We can iterate by 2 at a time instead of taking half block size.
  1221. // A temporary array named copyT is set equal to T.
  1222. Array<Intermediate, maximum_transform_size> data_copy;
  1223. AK::TypedTransfer<Intermediate>::copy(data_copy.data(), data.data(), block_size);
  1224. // The values at even locations T[ 2 * i ] are set equal to copyT[ n0 - 1 - 2 * i ] for i = 0..(n1-1).
  1225. // The values at odd locations T[ 2 * i + 1 ] are set equal to copyT[ 2 * i ] for i = 0..(n1-1).
  1226. for (auto i = 0u; i < block_size; i += 2) {
  1227. data[i] = data_copy[block_size - 1 - i];
  1228. data[i + 1] = data_copy[i];
  1229. }
  1230. }
  1231. inline void Decoder::inverse_asymmetric_discrete_sine_transform_output_array_permutation(Span<Intermediate> data, u8 log2_of_block_size)
  1232. {
  1233. auto block_size = 1u << log2_of_block_size;
  1234. // A temporary array named copyT is set equal to T.
  1235. Array<Intermediate, maximum_transform_size> data_copy;
  1236. AK::TypedTransfer<Intermediate>::copy(data_copy.data(), data.data(), block_size);
  1237. // The permutation depends on n as follows:
  1238. if (log2_of_block_size == 4) {
  1239. // − If n is equal to 4,
  1240. // T[ 8*a + 4*b + 2*c + d ] is set equal to copyT[ 8*(d^c) + 4*(c^b) + 2*(b^a) + a ] for a = 0..1
  1241. // and b = 0..1 and c = 0..1 and d = 0..1.
  1242. for (auto a = 0u; a < 2; a++)
  1243. for (auto b = 0u; b < 2; b++)
  1244. for (auto c = 0u; c < 2; c++)
  1245. for (auto d = 0u; d < 2; d++)
  1246. data[(8 * a) + (4 * b) + (2 * c) + d] = data_copy[8 * (d ^ c) + 4 * (c ^ b) + 2 * (b ^ a) + a];
  1247. } else {
  1248. VERIFY(log2_of_block_size == 3);
  1249. // − Otherwise (n is equal to 3),
  1250. // T[ 4*a + 2*b + c ] is set equal to copyT[ 4*(c^b) + 2*(b^a) + a ] for a = 0..1 and
  1251. // b = 0..1 and c = 0..1.
  1252. for (auto a = 0u; a < 2; a++)
  1253. for (auto b = 0u; b < 2; b++)
  1254. for (auto c = 0u; c < 2; c++)
  1255. data[4 * a + 2 * b + c] = data_copy[4 * (c ^ b) + 2 * (b ^ a) + a];
  1256. }
  1257. }
  1258. inline void Decoder::inverse_asymmetric_discrete_sine_transform_4(Span<Intermediate> data)
  1259. {
  1260. VERIFY(data.size() == 4);
  1261. const i64 sinpi_1_9 = 5283;
  1262. const i64 sinpi_2_9 = 9929;
  1263. const i64 sinpi_3_9 = 13377;
  1264. const i64 sinpi_4_9 = 15212;
  1265. // Steps are derived from pseudocode in (8.7.1.6):
  1266. // s0 = SINPI_1_9 * T[ 0 ]
  1267. i64 s0 = sinpi_1_9 * data[0];
  1268. // s1 = SINPI_2_9 * T[ 0 ]
  1269. i64 s1 = sinpi_2_9 * data[0];
  1270. // s2 = SINPI_3_9 * T[ 1 ]
  1271. i64 s2 = sinpi_3_9 * data[1];
  1272. // s3 = SINPI_4_9 * T[ 2 ]
  1273. i64 s3 = sinpi_4_9 * data[2];
  1274. // s4 = SINPI_1_9 * T[ 2 ]
  1275. i64 s4 = sinpi_1_9 * data[2];
  1276. // s5 = SINPI_2_9 * T[ 3 ]
  1277. i64 s5 = sinpi_2_9 * data[3];
  1278. // s6 = SINPI_4_9 * T[ 3 ]
  1279. i64 s6 = sinpi_4_9 * data[3];
  1280. // v = T[ 0 ] - T[ 2 ] + T[ 3 ]
  1281. // s7 = SINPI_3_9 * v
  1282. i64 s7 = sinpi_3_9 * (data[0] - data[2] + data[3]);
  1283. // x0 = s0 + s3 + s5
  1284. auto x0 = s0 + s3 + s5;
  1285. // x1 = s1 - s4 - s6
  1286. auto x1 = s1 - s4 - s6;
  1287. // x2 = s7
  1288. auto x2 = s7;
  1289. // x3 = s2
  1290. auto x3 = s2;
  1291. // s0 = x0 + x3
  1292. s0 = x0 + x3;
  1293. // s1 = x1 + x3
  1294. s1 = x1 + x3;
  1295. // s2 = x2
  1296. s2 = x2;
  1297. // s3 = x0 + x1 - x3
  1298. s3 = x0 + x1 - x3;
  1299. // T[ 0 ] = Round2( s0, 14 )
  1300. data[0] = rounded_right_shift(s0, 14);
  1301. // T[ 1 ] = Round2( s1, 14 )
  1302. data[1] = rounded_right_shift(s1, 14);
  1303. // T[ 2 ] = Round2( s2, 14 )
  1304. data[2] = rounded_right_shift(s2, 14);
  1305. // T[ 3 ] = Round2( s3, 14 )
  1306. data[3] = rounded_right_shift(s3, 14);
  1307. // (8.7.1.1) The inverse asymmetric discrete sine transforms also make use of an intermediate array named S.
  1308. // The values in this array require higher precision to avoid overflow. Using signed integers with 24 +
  1309. // BitDepth bits of precision is enough to avoid overflow.
  1310. // Note: Since bounds checks just ensure that we will not have resulting values that will overflow, it's non-fatal
  1311. // to allow these bounds to be violated. Therefore, we can avoid the performance cost here.
  1312. }
  1313. // The function SB( a, b, angle, 0 ) performs a butterfly rotation.
  1314. // Spec defines the source as array T, and the destination array as S.
  1315. template<typename S, typename D>
  1316. inline void Decoder::butterfly_rotation(Span<S> source, Span<D> destination, size_t index_a, size_t index_b, u8 angle, bool flip)
  1317. {
  1318. // The function SB( a, b, angle, 0 ) performs a butterfly rotation according to the following ordered steps:
  1319. auto cos = cos64(angle);
  1320. auto sin = sin64(angle);
  1321. // Expand to the destination buffer's precision.
  1322. D a = source[index_a];
  1323. D b = source[index_b];
  1324. // 1. S[ a ] is set equal to T[ a ] * cos64( angle ) - T[ b ] * sin64( angle ).
  1325. destination[index_a] = a * cos - b * sin;
  1326. // 2. S[ b ] is set equal to T[ a ] * sin64( angle ) + T[ b ] * cos64( angle ).
  1327. destination[index_b] = a * sin + b * cos;
  1328. // The function SB( a, b, angle, 1 ) performs a butterfly rotation and flip according to the following ordered steps:
  1329. // 1. The function SB( a, b, angle, 0 ) is invoked.
  1330. // 2. The contents of S[ a ] and S[ b ] are exchanged.
  1331. if (flip)
  1332. swap(destination[index_a], destination[index_b]);
  1333. }
  1334. // The function SH( a, b ) performs a Hadamard rotation and rounding.
  1335. // Spec defines the source array as S, and the destination array as T.
  1336. template<typename S, typename D>
  1337. inline void Decoder::hadamard_rotation(Span<S> source, Span<D> destination, size_t index_a, size_t index_b)
  1338. {
  1339. // Keep the source buffer's precision until rounding.
  1340. S a = source[index_a];
  1341. S b = source[index_b];
  1342. // 1. T[ a ] is set equal to Round2( S[ a ] + S[ b ], 14 ).
  1343. destination[index_a] = rounded_right_shift(a + b, 14);
  1344. // 2. T[ b ] is set equal to Round2( S[ a ] - S[ b ], 14 ).
  1345. destination[index_b] = rounded_right_shift(a - b, 14);
  1346. }
  1347. inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform_8(Span<Intermediate> data)
  1348. {
  1349. VERIFY(data.size() == 8);
  1350. // This process does an in-place transform of the array T using:
  1351. // A higher precision array S for intermediate results.
  1352. // (8.7.1.1) NOTE - The values in array S require higher precision to avoid overflow. Using signed integers with
  1353. // 24 + BitDepth bits of precision is enough to avoid overflow.
  1354. Array<i64, 8> high_precision_temp;
  1355. // The following ordered steps apply:
  1356. // 1. Invoke the ADST input array permutation process specified in section 8.7.1.4 with the input variable n set
  1357. // equal to 3.
  1358. inverse_asymmetric_discrete_sine_transform_input_array_permutation(data, 3);
  1359. // 2. Invoke SB( 2*i, 1+2*i, 30-8*i, 1 ) for i = 0..3.
  1360. for (auto i = 0u; i < 4; i++)
  1361. butterfly_rotation(data, high_precision_temp.span(), 2 * i, 1 + (2 * i), 30 - (8 * i), true);
  1362. // 3. Invoke SH( i, 4+i ) for i = 0..3.
  1363. for (auto i = 0u; i < 4; i++)
  1364. hadamard_rotation(high_precision_temp.span(), data, i, 4 + i);
  1365. // 4. Invoke SB( 4+3*i, 5+i, 24-16*i, 1 ) for i = 0..1.
  1366. for (auto i = 0u; i < 2; i++)
  1367. butterfly_rotation(data, high_precision_temp.span(), 4 + (3 * i), 5 + i, 24 - (16 * i), true);
  1368. // 5. Invoke SH( 4+i, 6+i ) for i = 0..1.
  1369. for (auto i = 0u; i < 2; i++)
  1370. hadamard_rotation(high_precision_temp.span(), data, 4 + i, 6 + i);
  1371. // 6. Invoke H( i, 2+i, 0 ) for i = 0..1.
  1372. for (auto i = 0u; i < 2; i++)
  1373. hadamard_rotation_in_place(data, i, 2 + i, false);
  1374. // 7. Invoke B( 2+4*i, 3+4*i, 16, 1 ) for i = 0..1.
  1375. for (auto i = 0u; i < 2; i++)
  1376. butterfly_rotation_in_place(data, 2 + (4 * i), 3 + (4 * i), 16, true);
  1377. // 8. Invoke the ADST output array permutation process specified in section 8.7.1.5 with the input variable n
  1378. // set equal to 3.
  1379. inverse_asymmetric_discrete_sine_transform_output_array_permutation(data, 3);
  1380. // 9. Set T[ 1+2*i ] equal to -T[ 1+2*i ] for i = 0..3.
  1381. for (auto i = 0u; i < 4; i++) {
  1382. auto index = 1 + (2 * i);
  1383. data[index] = -data[index];
  1384. }
  1385. return {};
  1386. }
  1387. inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform_16(Span<Intermediate> data)
  1388. {
  1389. VERIFY(data.size() == 16);
  1390. // This process does an in-place transform of the array T using:
  1391. // A higher precision array S for intermediate results.
  1392. // (8.7.1.1) The inverse asymmetric discrete sine transforms also make use of an intermediate array named S.
  1393. // The values in this array require higher precision to avoid overflow. Using signed integers with 24 +
  1394. // BitDepth bits of precision is enough to avoid overflow.
  1395. Array<i64, 16> high_precision_temp;
  1396. // The following ordered steps apply:
  1397. // 1. Invoke the ADST input array permutation process specified in section 8.7.1.4 with the input variable n set
  1398. // equal to 4.
  1399. inverse_asymmetric_discrete_sine_transform_input_array_permutation(data, 4);
  1400. // 2. Invoke SB( 2*i, 1+2*i, 31-4*i, 1 ) for i = 0..7.
  1401. for (auto i = 0u; i < 8; i++)
  1402. butterfly_rotation(data, high_precision_temp.span(), 2 * i, 1 + (2 * i), 31 - (4 * i), true);
  1403. // 3. Invoke SH( i, 8+i ) for i = 0..7.
  1404. for (auto i = 0u; i < 8; i++)
  1405. hadamard_rotation(high_precision_temp.span(), data, i, 8 + i);
  1406. // 4. Invoke SB( 8+2*i, 9+2*i, 28-16*i, 1 ) for i = 0..3.
  1407. for (auto i = 0u; i < 4; i++)
  1408. butterfly_rotation(data, high_precision_temp.span(), 8 + (2 * i), 9 + (2 * i), 128 + 28 - (16 * i), true);
  1409. // 5. Invoke SH( 8+i, 12+i ) for i = 0..3.
  1410. for (auto i = 0u; i < 4; i++)
  1411. hadamard_rotation(high_precision_temp.span(), data, 8 + i, 12 + i);
  1412. // 6. Invoke H( i, 4+i, 0 ) for i = 0..3.
  1413. for (auto i = 0u; i < 4; i++)
  1414. hadamard_rotation_in_place(data, i, 4 + i, false);
  1415. // 7. Invoke SB( 4+8*i+3*j, 5+8*i+j, 24-16*j, 1 ) for i = 0..1, for j = 0..1.
  1416. for (auto i = 0u; i < 2; i++)
  1417. for (auto j = 0u; j < 2; j++)
  1418. butterfly_rotation(data, high_precision_temp.span(), 4 + (8 * i) + (3 * j), 5 + (8 * i) + j, 24 - (16 * j), true);
  1419. // 8. Invoke SH( 4+8*j+i, 6+8*j+i ) for i = 0..1, j = 0..1.
  1420. for (auto i = 0u; i < 2; i++)
  1421. for (auto j = 0u; j < 2; j++)
  1422. hadamard_rotation(high_precision_temp.span(), data, 4 + (8 * j) + i, 6 + (8 * j) + i);
  1423. // 9. Invoke H( 8*j+i, 2+8*j+i, 0 ) for i = 0..1, for j = 0..1.
  1424. for (auto i = 0u; i < 2; i++)
  1425. for (auto j = 0u; j < 2; j++)
  1426. hadamard_rotation_in_place(data, (8 * j) + i, 2 + (8 * j) + i, false);
  1427. // 10. Invoke B( 2+4*j+8*i, 3+4*j+8*i, 48+64*(i^j), 0 ) for i = 0..1, for j = 0..1.
  1428. for (auto i = 0u; i < 2; i++)
  1429. for (auto j = 0u; j < 2; j++)
  1430. butterfly_rotation_in_place(data, 2 + (4 * j) + (8 * i), 3 + (4 * j) + (8 * i), 48 + (64 * (i ^ j)), false);
  1431. // 11. Invoke the ADST output array permutation process specified in section 8.7.1.5 with the input variable n
  1432. // set equal to 4.
  1433. inverse_asymmetric_discrete_sine_transform_output_array_permutation(data, 4);
  1434. // 12. Set T[ 1+12*j+2*i ] equal to -T[ 1+12*j+2*i ] for i = 0..1, for j = 0..1.
  1435. for (auto i = 0u; i < 2; i++) {
  1436. for (auto j = 0u; j < 2; j++) {
  1437. auto index = 1 + (12 * j) + (2 * i);
  1438. data[index] = -data[index];
  1439. }
  1440. }
  1441. return {};
  1442. }
  1443. inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform(Span<Intermediate> data, u8 log2_of_block_size)
  1444. {
  1445. // 8.7.1.9 Inverse ADST Process
  1446. // This process performs an in-place inverse ADST process on the array T of size 2^n for 2 ≤ n ≤ 4.
  1447. if (log2_of_block_size < 2 || log2_of_block_size > 4)
  1448. return DecoderError::corrupted("Block size was out of range"sv);
  1449. // The process to invoke depends on n as follows:
  1450. if (log2_of_block_size == 2) {
  1451. // − If n is equal to 2, invoke the Inverse ADST4 process specified in section 8.7.1.6.
  1452. inverse_asymmetric_discrete_sine_transform_4(data);
  1453. return {};
  1454. }
  1455. if (log2_of_block_size == 3) {
  1456. // − Otherwise if n is equal to 3, invoke the Inverse ADST8 process specified in section 8.7.1.7.
  1457. return inverse_asymmetric_discrete_sine_transform_8(data);
  1458. }
  1459. // − Otherwise (n is equal to 4), invoke the Inverse ADST16 process specified in section 8.7.1.8.
  1460. return inverse_asymmetric_discrete_sine_transform_16(data);
  1461. }
  1462. DecoderErrorOr<void> Decoder::inverse_transform_2d(BlockContext const& block_context, Span<Intermediate> dequantized, u8 log2_of_block_size, TransformSet transform_set)
  1463. {
  1464. // This process performs a 2D inverse transform for an array of size 2^n by 2^n stored in the 2D array Dequant.
  1465. // The input to this process is a variable n (log2_of_block_size) that specifies the base 2 logarithm of the width of the transform.
  1466. // 1. Set the variable n0 (block_size) equal to 1 << n.
  1467. auto block_size = 1u << log2_of_block_size;
  1468. Array<Intermediate, maximum_transform_size> row_array;
  1469. Span<Intermediate> row = row_array.span().trim(block_size);
  1470. // 2. The row transforms with i = 0..(n0-1) are applied as follows:
  1471. for (auto i = 0u; i < block_size; i++) {
  1472. // 1. Set T[ j ] equal to Dequant[ i ][ j ] for j = 0..(n0-1).
  1473. for (auto j = 0u; j < block_size; j++)
  1474. row[j] = dequantized[index_from_row_and_column(i, j, block_size)];
  1475. // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal
  1476. // to 2.
  1477. if (block_context.frame_context.is_lossless()) {
  1478. TRY(inverse_walsh_hadamard_transform(row, log2_of_block_size, 2));
  1479. continue;
  1480. }
  1481. switch (transform_set.second_transform) {
  1482. case TransformType::DCT:
  1483. // Otherwise, if TxType is equal to DCT_DCT or TxType is equal to ADST_DCT, apply an inverse DCT as
  1484. // follows:
  1485. // 1. Invoke the inverse DCT permutation process as specified in section 8.7.1.2 with the input variable n.
  1486. TRY(inverse_discrete_cosine_transform_array_permutation(row, log2_of_block_size));
  1487. // 2. Invoke the inverse DCT process as specified in section 8.7.1.3 with the input variable n.
  1488. TRY(inverse_discrete_cosine_transform(row, log2_of_block_size));
  1489. break;
  1490. case TransformType::ADST:
  1491. // 4. Otherwise (TxType is equal to DCT_ADST or TxType is equal to ADST_ADST), invoke the inverse ADST
  1492. // process as specified in section 8.7.1.9 with input variable n.
  1493. TRY(inverse_asymmetric_discrete_sine_transform(row, log2_of_block_size));
  1494. break;
  1495. default:
  1496. return DecoderError::corrupted("Unknown tx_type"sv);
  1497. }
  1498. // 5. Set Dequant[ i ][ j ] equal to T[ j ] for j = 0..(n0-1).
  1499. for (auto j = 0u; j < block_size; j++)
  1500. dequantized[index_from_row_and_column(i, j, block_size)] = row[j];
  1501. }
  1502. Array<Intermediate, maximum_transform_size> column_array;
  1503. auto column = column_array.span().trim(block_size);
  1504. // 3. The column transforms with j = 0..(n0-1) are applied as follows:
  1505. for (auto j = 0u; j < block_size; j++) {
  1506. // 1. Set T[ i ] equal to Dequant[ i ][ j ] for i = 0..(n0-1).
  1507. for (auto i = 0u; i < block_size; i++)
  1508. column[i] = dequantized[index_from_row_and_column(i, j, block_size)];
  1509. // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal
  1510. // to 0.
  1511. if (block_context.frame_context.is_lossless()) {
  1512. TRY(inverse_walsh_hadamard_transform(column, log2_of_block_size, 2));
  1513. continue;
  1514. }
  1515. switch (transform_set.first_transform) {
  1516. case TransformType::DCT:
  1517. // Otherwise, if TxType is equal to DCT_DCT or TxType is equal to DCT_ADST, apply an inverse DCT as
  1518. // follows:
  1519. // 1. Invoke the inverse DCT permutation process as specified in section 8.7.1.2 with the input variable n.
  1520. TRY(inverse_discrete_cosine_transform_array_permutation(column, log2_of_block_size));
  1521. // 2. Invoke the inverse DCT process as specified in section 8.7.1.3 with the input variable n.
  1522. TRY(inverse_discrete_cosine_transform(column, log2_of_block_size));
  1523. break;
  1524. case TransformType::ADST:
  1525. // 4. Otherwise (TxType is equal to ADST_DCT or TxType is equal to ADST_ADST), invoke the inverse ADST
  1526. // process as specified in section 8.7.1.9 with input variable n.
  1527. TRY(inverse_asymmetric_discrete_sine_transform(column, log2_of_block_size));
  1528. break;
  1529. default:
  1530. VERIFY_NOT_REACHED();
  1531. }
  1532. // 5. If Lossless is equal to 1, set Dequant[ i ][ j ] equal to T[ i ] for i = 0..(n0-1).
  1533. for (auto i = 0u; i < block_size; i++)
  1534. dequantized[index_from_row_and_column(i, j, block_size)] = column[i];
  1535. // 6. Otherwise (Lossless is equal to 0), set Dequant[ i ][ j ] equal to Round2( T[ i ], Min( 6, n + 2 ) )
  1536. // for i = 0..(n0-1).
  1537. if (!block_context.frame_context.is_lossless()) {
  1538. for (auto i = 0u; i < block_size; i++) {
  1539. auto index = index_from_row_and_column(i, j, block_size);
  1540. dequantized[index] = rounded_right_shift(dequantized[index], min(6, log2_of_block_size + 2));
  1541. }
  1542. }
  1543. }
  1544. return {};
  1545. }
  1546. DecoderErrorOr<void> Decoder::update_reference_frames(FrameContext const& frame_context)
  1547. {
  1548. // This process is invoked as the final step in decoding a frame.
  1549. // The inputs to this process are the samples in the current frame CurrFrame[ plane ][ x ][ y ].
  1550. // The output from this process is an updated set of reference frames and previous motion vectors.
  1551. // The following ordered steps apply:
  1552. // 1. For each value of i from 0 to NUM_REF_FRAMES - 1, the following applies if bit i of refresh_frame_flags
  1553. // is equal to 1 (i.e. if (refresh_frame_flags>>i)&1 is equal to 1):
  1554. for (u8 i = 0; i < NUM_REF_FRAMES; i++) {
  1555. if (frame_context.should_update_reference_frame_at_index(i)) {
  1556. auto& reference_frame = m_parser->m_reference_frames[i];
  1557. // − RefFrameWidth[ i ] is set equal to FrameWidth.
  1558. // − RefFrameHeight[ i ] is set equal to FrameHeight.
  1559. reference_frame.size = frame_context.size();
  1560. // − RefSubsamplingX[ i ] is set equal to subsampling_x.
  1561. reference_frame.subsampling_x = frame_context.color_config.subsampling_x;
  1562. // − RefSubsamplingY[ i ] is set equal to subsampling_y.
  1563. reference_frame.subsampling_y = frame_context.color_config.subsampling_y;
  1564. // − RefBitDepth[ i ] is set equal to BitDepth.
  1565. reference_frame.bit_depth = frame_context.color_config.bit_depth;
  1566. // − FrameStore[ i ][ 0 ][ y ][ x ] is set equal to CurrFrame[ 0 ][ y ][ x ] for x = 0..FrameWidth-1, for y =
  1567. // 0..FrameHeight-1.
  1568. // − FrameStore[ i ][ plane ][ y ][ x ] is set equal to CurrFrame[ plane ][ y ][ x ] for plane = 1..2, for x =
  1569. // 0..((FrameWidth+subsampling_x) >> subsampling_x)-1, for y = 0..((FrameHeight+subsampling_y) >>
  1570. // subsampling_y)-1.
  1571. // FIXME: Frame width is not equal to the buffer's stride. If we store the stride of the buffer with the reference
  1572. // frame, we can just copy the framebuffer data instead. Alternatively, we should crop the output framebuffer.
  1573. for (auto plane = 0u; plane < 3; plane++) {
  1574. auto width = frame_context.size().width();
  1575. auto height = frame_context.size().height();
  1576. auto stride = frame_context.columns() * 8;
  1577. if (plane > 0) {
  1578. width = (width + frame_context.color_config.subsampling_x) >> frame_context.color_config.subsampling_x;
  1579. height = (height + frame_context.color_config.subsampling_y) >> frame_context.color_config.subsampling_y;
  1580. stride >>= frame_context.color_config.subsampling_x;
  1581. }
  1582. auto const& original_buffer = get_output_buffer(plane);
  1583. auto& frame_store_buffer = reference_frame.frame_planes[plane];
  1584. auto frame_store_width = width + MV_BORDER * 2;
  1585. auto frame_store_height = height + MV_BORDER * 2;
  1586. frame_store_buffer.resize_and_keep_capacity(frame_store_width * frame_store_height);
  1587. VERIFY(original_buffer.size() >= width * height);
  1588. for (auto destination_y = 0u; destination_y < frame_store_height; destination_y++) {
  1589. // Offset the source row by the motion vector border and then clamp it to the range of 0...height.
  1590. // This will create an extended border on the top and bottom of the reference frame to avoid having to bounds check
  1591. // inter-prediction.
  1592. auto source_y = min(destination_y >= MV_BORDER ? destination_y - MV_BORDER : 0, height - 1);
  1593. auto const* source = &original_buffer[index_from_row_and_column(source_y, 0, stride)];
  1594. auto* destination = &frame_store_buffer[index_from_row_and_column(destination_y, MV_BORDER, frame_store_width)];
  1595. AK::TypedTransfer<RemoveReference<decltype(*destination)>>::copy(destination, source, width);
  1596. }
  1597. for (auto destination_y = 0u; destination_y < frame_store_height; destination_y++) {
  1598. // Stretch the leftmost samples out into the border.
  1599. auto sample = frame_store_buffer[index_from_row_and_column(destination_y, MV_BORDER, frame_store_width)];
  1600. for (auto destination_x = 0u; destination_x < MV_BORDER; destination_x++) {
  1601. frame_store_buffer[index_from_row_and_column(destination_y, destination_x, frame_store_width)] = sample;
  1602. }
  1603. // Stretch the rightmost samples out into the border.
  1604. sample = frame_store_buffer[index_from_row_and_column(destination_y, MV_BORDER + width - 1, frame_store_width)];
  1605. for (auto destination_x = MV_BORDER + width; destination_x < frame_store_width; destination_x++) {
  1606. frame_store_buffer[index_from_row_and_column(destination_y, destination_x, frame_store_width)] = sample;
  1607. }
  1608. }
  1609. }
  1610. }
  1611. }
  1612. // 2. If show_existing_frame is equal to 0, the following applies:
  1613. if (!frame_context.shows_existing_frame()) {
  1614. DECODER_TRY_ALLOC(m_parser->m_previous_block_contexts.try_resize_to_match_other_vector2d(frame_context.block_contexts()));
  1615. // − PrevRefFrames[ row ][ col ][ list ] is set equal to RefFrames[ row ][ col ][ list ] for row = 0..MiRows-1,
  1616. // for col = 0..MiCols-1, for list = 0..1.
  1617. // − PrevMvs[ row ][ col ][ list ][ comp ] is set equal to Mvs[ row ][ col ][ list ][ comp ] for row = 0..MiRows-1,
  1618. // for col = 0..MiCols-1, for list = 0..1, for comp = 0..1.
  1619. // And from decode_frame():
  1620. // - If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to
  1621. // SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1:
  1622. // − show_existing_frame is equal to 0,
  1623. // − segmentation_enabled is equal to 1,
  1624. // − segmentation_update_map is equal to 1.
  1625. bool keep_segment_ids = !frame_context.shows_existing_frame() && frame_context.segmentation_enabled && frame_context.use_full_segment_id_tree;
  1626. frame_context.block_contexts().copy_to(m_parser->m_previous_block_contexts, [keep_segment_ids](FrameBlockContext context) {
  1627. auto persistent_context = PersistentBlockContext(context);
  1628. if (!keep_segment_ids)
  1629. persistent_context.segment_id = 0;
  1630. return persistent_context;
  1631. });
  1632. }
  1633. return {};
  1634. }
  1635. }