Sampler.cpp 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. /*
  2. * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/SIMDExtras.h>
  7. #include <AK/SIMDMath.h>
  8. #include <LibSoftGPU/Config.h>
  9. #include <LibSoftGPU/Image.h>
  10. #include <LibSoftGPU/SIMD.h>
  11. #include <LibSoftGPU/Sampler.h>
  12. #include <math.h>
  13. namespace SoftGPU {
  14. using AK::SIMD::f32x4;
  15. using AK::SIMD::i32x4;
  16. using AK::SIMD::u32x4;
  17. using AK::SIMD::clamp;
  18. using AK::SIMD::expand4;
  19. using AK::SIMD::floor_int_range;
  20. using AK::SIMD::frac_int_range;
  21. using AK::SIMD::maskbits;
  22. using AK::SIMD::to_f32x4;
  23. using AK::SIMD::to_i32x4;
  24. using AK::SIMD::to_u32x4;
  25. using AK::SIMD::truncate_int_range;
  26. static f32x4 wrap_repeat(f32x4 value)
  27. {
  28. return frac_int_range(value);
  29. }
  30. [[maybe_unused]] static f32x4 wrap_clamp(f32x4 value)
  31. {
  32. return clamp(value, expand4(0.0f), expand4(1.0f));
  33. }
  34. static f32x4 wrap_clamp_to_edge(f32x4 value, u32x4 num_texels)
  35. {
  36. f32x4 const clamp_limit = 1.f / to_f32x4(2 * num_texels);
  37. return clamp(value, clamp_limit, 1.0f - clamp_limit);
  38. }
  39. static f32x4 wrap_mirrored_repeat(f32x4 value, u32x4 num_texels)
  40. {
  41. f32x4 integer = floor_int_range(value);
  42. f32x4 frac = value - integer;
  43. auto is_odd = to_i32x4(integer) & 1;
  44. return wrap_clamp_to_edge(is_odd ? 1 - frac : frac, num_texels);
  45. }
  46. static f32x4 wrap(f32x4 value, TextureWrapMode mode, u32x4 num_texels)
  47. {
  48. switch (mode) {
  49. case TextureWrapMode::Repeat:
  50. return wrap_repeat(value);
  51. case TextureWrapMode::MirroredRepeat:
  52. return wrap_mirrored_repeat(value, num_texels);
  53. case TextureWrapMode::Clamp:
  54. if constexpr (CLAMP_DEPRECATED_BEHAVIOR) {
  55. return wrap_clamp(value);
  56. }
  57. return wrap_clamp_to_edge(value, num_texels);
  58. case TextureWrapMode::ClampToBorder:
  59. case TextureWrapMode::ClampToEdge:
  60. return wrap_clamp_to_edge(value, num_texels);
  61. default:
  62. VERIFY_NOT_REACHED();
  63. }
  64. }
  65. ALWAYS_INLINE static Vector4<f32x4> texel4(Image const& image, u32x4 layer, u32x4 level, u32x4 x, u32x4 y, u32x4 z)
  66. {
  67. auto t0 = image.texel(layer[0], level[0], x[0], y[0], z[0]);
  68. auto t1 = image.texel(layer[1], level[1], x[1], y[1], z[1]);
  69. auto t2 = image.texel(layer[2], level[2], x[2], y[2], z[2]);
  70. auto t3 = image.texel(layer[3], level[3], x[3], y[3], z[3]);
  71. return Vector4<f32x4> {
  72. f32x4 { t0.x(), t1.x(), t2.x(), t3.x() },
  73. f32x4 { t0.y(), t1.y(), t2.y(), t3.y() },
  74. f32x4 { t0.z(), t1.z(), t2.z(), t3.z() },
  75. f32x4 { t0.w(), t1.w(), t2.w(), t3.w() },
  76. };
  77. }
  78. ALWAYS_INLINE static Vector4<f32x4> texel4border(Image const& image, u32x4 layer, u32x4 level, u32x4 x, u32x4 y, u32x4 z, FloatVector4 const& border, u32x4 w, u32x4 h)
  79. {
  80. auto border_mask = maskbits(x < 0 || x >= w || y < 0 || y >= h);
  81. auto t0 = border_mask & 1 ? border : image.texel(layer[0], level[0], x[0], y[0], z[0]);
  82. auto t1 = border_mask & 2 ? border : image.texel(layer[1], level[1], x[1], y[1], z[1]);
  83. auto t2 = border_mask & 4 ? border : image.texel(layer[2], level[2], x[2], y[2], z[2]);
  84. auto t3 = border_mask & 8 ? border : image.texel(layer[3], level[3], x[3], y[3], z[3]);
  85. return Vector4<f32x4> {
  86. f32x4 { t0.x(), t1.x(), t2.x(), t3.x() },
  87. f32x4 { t0.y(), t1.y(), t2.y(), t3.y() },
  88. f32x4 { t0.z(), t1.z(), t2.z(), t3.z() },
  89. f32x4 { t0.w(), t1.w(), t2.w(), t3.w() },
  90. };
  91. }
  92. Vector4<AK::SIMD::f32x4> Sampler::sample_2d(Vector2<AK::SIMD::f32x4> const& uv) const
  93. {
  94. if (m_config.bound_image.is_null())
  95. return expand4(FloatVector4 { 1, 0, 0, 1 });
  96. auto const& image = *m_config.bound_image;
  97. // FIXME: Make base level configurable with glTexParameteri(GL_TEXTURE_BASE_LEVEL, base_level)
  98. constexpr unsigned base_level = 0;
  99. // Determine the texture scale factor. See OpenGL 1.5 spec chapter 3.8.8.
  100. // FIXME: Static casting from u32 to float could silently truncate here.
  101. // u16 should be plenty enough for texture dimensions and would allow textures of up to 65536x65536x65536 pixels.
  102. auto texel_coordinates = uv;
  103. texel_coordinates.set_x(texel_coordinates.x() * static_cast<float>(image.level_width(base_level)));
  104. texel_coordinates.set_y(texel_coordinates.y() * static_cast<float>(image.level_height(base_level)));
  105. auto dtdx = ddx(texel_coordinates);
  106. auto dtdy = ddy(texel_coordinates);
  107. auto scale_factor = max(dtdx.dot(dtdx), dtdy.dot(dtdy));
  108. // FIXME: Here we simply determine the filter based on the single scale factor of the upper left pixel.
  109. // Actually, we could end up with different scale factors for each pixel. This however would break our
  110. // parallelisation as we could also end up with different filter modes per pixel.
  111. auto filter = scale_factor[0] > 1 ? m_config.texture_mag_filter : m_config.texture_min_filter;
  112. if (m_config.mipmap_filter == MipMapFilter::None)
  113. return sample_2d_lod(uv, expand4(base_level), filter);
  114. // FIXME: Instead of clamping to num_levels - 1, actually make the max mipmap level configurable with glTexParameteri(GL_TEXTURE_MAX_LEVEL, max_level)
  115. auto min_level = expand4(static_cast<float>(base_level));
  116. auto max_level = expand4(image.num_levels() - 1.0f);
  117. auto level = min(max(log2_approximate(scale_factor) * 0.5f, min_level), max_level);
  118. auto lower_level_texel = sample_2d_lod(uv, to_u32x4(level), filter);
  119. if (m_config.mipmap_filter == MipMapFilter::Nearest)
  120. return lower_level_texel;
  121. auto higher_level_texel = sample_2d_lod(uv, to_u32x4(min(level + 1.f, max_level)), filter);
  122. return mix(lower_level_texel, higher_level_texel, frac_int_range(level));
  123. }
  124. Vector4<AK::SIMD::f32x4> Sampler::sample_2d_lod(Vector2<AK::SIMD::f32x4> const& uv, AK::SIMD::u32x4 level, TextureFilter filter) const
  125. {
  126. auto const& image = *m_config.bound_image;
  127. u32x4 const layer = expand4(0u);
  128. u32x4 const width = {
  129. image.level_width(level[0]),
  130. image.level_width(level[1]),
  131. image.level_width(level[2]),
  132. image.level_width(level[3]),
  133. };
  134. u32x4 const height = {
  135. image.level_height(level[0]),
  136. image.level_height(level[1]),
  137. image.level_height(level[2]),
  138. image.level_height(level[3]),
  139. };
  140. u32x4 width_mask = width - 1;
  141. u32x4 height_mask = height - 1;
  142. f32x4 s = wrap(uv.x(), m_config.texture_wrap_u, width);
  143. f32x4 t = wrap(uv.y(), m_config.texture_wrap_v, height);
  144. f32x4 u = s * to_f32x4(width);
  145. f32x4 v = t * to_f32x4(height);
  146. if (filter == TextureFilter::Nearest) {
  147. u32x4 i = to_u32x4(u);
  148. u32x4 j = to_u32x4(v);
  149. u32x4 k = expand4(0u);
  150. i = image.width_is_power_of_two() ? i & width_mask : i % width;
  151. j = image.height_is_power_of_two() ? j & height_mask : j % height;
  152. return texel4(image, layer, level, i, j, k);
  153. }
  154. u -= 0.5f;
  155. v -= 0.5f;
  156. i32x4 i0 = to_i32x4(floor_int_range(u));
  157. i32x4 i1 = i0 + 1;
  158. i32x4 j0 = to_i32x4(floor_int_range(v));
  159. i32x4 j1 = j0 + 1;
  160. if (m_config.texture_wrap_u == TextureWrapMode::Repeat) {
  161. if (image.width_is_power_of_two()) {
  162. i0 = (i32x4)(i0 & width_mask);
  163. i1 = (i32x4)(i1 & width_mask);
  164. } else {
  165. i0 = (i32x4)(i0 % width);
  166. i1 = (i32x4)(i1 % width);
  167. }
  168. }
  169. if (m_config.texture_wrap_v == TextureWrapMode::Repeat) {
  170. if (image.height_is_power_of_two()) {
  171. j0 = (i32x4)(j0 & height_mask);
  172. j1 = (i32x4)(j1 & height_mask);
  173. } else {
  174. j0 = (i32x4)(j0 % height);
  175. j1 = (i32x4)(j1 % height);
  176. }
  177. }
  178. u32x4 k = expand4(0u);
  179. Vector4<f32x4> t0, t1, t2, t3;
  180. if (m_config.texture_wrap_u == TextureWrapMode::Repeat && m_config.texture_wrap_v == TextureWrapMode::Repeat) {
  181. t0 = texel4(image, layer, level, to_u32x4(i0), to_u32x4(j0), k);
  182. t1 = texel4(image, layer, level, to_u32x4(i1), to_u32x4(j0), k);
  183. t2 = texel4(image, layer, level, to_u32x4(i0), to_u32x4(j1), k);
  184. t3 = texel4(image, layer, level, to_u32x4(i1), to_u32x4(j1), k);
  185. } else {
  186. t0 = texel4border(image, layer, level, to_u32x4(i0), to_u32x4(j0), k, m_config.border_color, width, height);
  187. t1 = texel4border(image, layer, level, to_u32x4(i1), to_u32x4(j0), k, m_config.border_color, width, height);
  188. t2 = texel4border(image, layer, level, to_u32x4(i0), to_u32x4(j1), k, m_config.border_color, width, height);
  189. t3 = texel4border(image, layer, level, to_u32x4(i1), to_u32x4(j1), k, m_config.border_color, width, height);
  190. }
  191. f32x4 const alpha = frac_int_range(u);
  192. f32x4 const beta = frac_int_range(v);
  193. auto const lerp_0 = mix(t0, t1, alpha);
  194. auto const lerp_1 = mix(t2, t3, alpha);
  195. return mix(lerp_0, lerp_1, beta);
  196. }
  197. }