SIMD.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. /*
  2. * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
  3. * Copyright (c) 2023, Jelle Raaijmakers <jelle@gmta.nl>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/SIMDExtras.h>
  9. #include <AK/SIMDMath.h>
  10. #include <LibGfx/Vector2.h>
  11. #include <LibGfx/Vector3.h>
  12. #include <LibGfx/Vector4.h>
  13. namespace SoftGPU {
  14. ALWAYS_INLINE static constexpr Vector2<AK::SIMD::f32x4> expand4(Vector2<float> const& v)
  15. {
  16. return Vector2<AK::SIMD::f32x4> {
  17. AK::SIMD::expand4(v.x()),
  18. AK::SIMD::expand4(v.y()),
  19. };
  20. }
  21. ALWAYS_INLINE static constexpr Vector3<AK::SIMD::f32x4> expand4(Vector3<float> const& v)
  22. {
  23. return Vector3<AK::SIMD::f32x4> {
  24. AK::SIMD::expand4(v.x()),
  25. AK::SIMD::expand4(v.y()),
  26. AK::SIMD::expand4(v.z()),
  27. };
  28. }
  29. ALWAYS_INLINE static constexpr Vector4<AK::SIMD::f32x4> expand4(Vector4<float> const& v)
  30. {
  31. return Vector4<AK::SIMD::f32x4> {
  32. AK::SIMD::expand4(v.x()),
  33. AK::SIMD::expand4(v.y()),
  34. AK::SIMD::expand4(v.z()),
  35. AK::SIMD::expand4(v.w()),
  36. };
  37. }
  38. ALWAYS_INLINE static constexpr Vector2<AK::SIMD::i32x4> expand4(Vector2<int> const& v)
  39. {
  40. return Vector2<AK::SIMD::i32x4> {
  41. AK::SIMD::expand4(v.x()),
  42. AK::SIMD::expand4(v.y()),
  43. };
  44. }
  45. ALWAYS_INLINE static constexpr Vector3<AK::SIMD::i32x4> expand4(Vector3<int> const& v)
  46. {
  47. return Vector3<AK::SIMD::i32x4> {
  48. AK::SIMD::expand4(v.x()),
  49. AK::SIMD::expand4(v.y()),
  50. AK::SIMD::expand4(v.z()),
  51. };
  52. }
  53. ALWAYS_INLINE static constexpr Vector4<AK::SIMD::i32x4> expand4(Vector4<int> const& v)
  54. {
  55. return Vector4<AK::SIMD::i32x4> {
  56. AK::SIMD::expand4(v.x()),
  57. AK::SIMD::expand4(v.y()),
  58. AK::SIMD::expand4(v.z()),
  59. AK::SIMD::expand4(v.w()),
  60. };
  61. }
  62. ALWAYS_INLINE static AK::SIMD::f32x4 ddx(AK::SIMD::f32x4 v)
  63. {
  64. return AK::SIMD::f32x4 {
  65. v[1] - v[0],
  66. v[1] - v[0],
  67. v[3] - v[2],
  68. v[3] - v[2],
  69. };
  70. }
  71. ALWAYS_INLINE static AK::SIMD::f32x4 ddy(AK::SIMD::f32x4 v)
  72. {
  73. return AK::SIMD::f32x4 {
  74. v[2] - v[0],
  75. v[3] - v[1],
  76. v[2] - v[0],
  77. v[3] - v[1],
  78. };
  79. }
  80. ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> ddx(Vector2<AK::SIMD::f32x4> const& v)
  81. {
  82. return {
  83. ddx(v.x()),
  84. ddx(v.y()),
  85. };
  86. }
  87. ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> ddy(Vector2<AK::SIMD::f32x4> const& v)
  88. {
  89. return {
  90. ddy(v.x()),
  91. ddy(v.y()),
  92. };
  93. }
  94. ALWAYS_INLINE static AK::SIMD::f32x4 length(Vector2<AK::SIMD::f32x4> const& v)
  95. {
  96. return AK::SIMD::sqrt(v.dot(v));
  97. }
  98. // Calculates a quadratic approximation of log2, exploiting the fact that IEEE754 floats are represented as mantissa * 2^exponent.
  99. // See https://stackoverflow.com/questions/9411823/fast-log2float-x-implementation-c
  100. ALWAYS_INLINE static AK::SIMD::f32x4 log2_approximate(AK::SIMD::f32x4 v)
  101. {
  102. union {
  103. AK::SIMD::f32x4 float_val;
  104. AK::SIMD::i32x4 int_val;
  105. } u { v };
  106. // Extract just the exponent minus 1, giving a lower integral bound for log2.
  107. auto log = AK::SIMD::to_f32x4(((u.int_val >> 23) & 255) - 128);
  108. // Replace the exponent with 0, giving a value between 1 and 2.
  109. u.int_val &= ~(255 << 23);
  110. u.int_val |= 127 << 23;
  111. // Approximate log2 by adding a quadratic function of u to the integral part.
  112. log += (-0.34484843f * u.float_val + 2.02466578f) * u.float_val - 0.67487759f;
  113. return log;
  114. }
  115. ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> to_vec2_f32x4(Vector2<AK::SIMD::i32x4> const& v)
  116. {
  117. return {
  118. AK::SIMD::to_f32x4(v.x()),
  119. AK::SIMD::to_f32x4(v.y()),
  120. };
  121. }
  122. ALWAYS_INLINE static constexpr Vector4<AK::SIMD::f32x4> to_vec4(AK::SIMD::f32x4 v)
  123. {
  124. return { v, v, v, v };
  125. }
  126. }