SIMD.h 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. /*
  2. * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/SIMDExtras.h>
  8. #include <LibGfx/Vector2.h>
  9. #include <LibGfx/Vector3.h>
  10. #include <LibGfx/Vector4.h>
  11. namespace SoftGPU {
  12. ALWAYS_INLINE static constexpr Vector2<AK::SIMD::f32x4> expand4(Vector2<float> const& v)
  13. {
  14. return Vector2<AK::SIMD::f32x4> {
  15. AK::SIMD::expand4(v.x()),
  16. AK::SIMD::expand4(v.y()),
  17. };
  18. }
  19. ALWAYS_INLINE static constexpr Vector3<AK::SIMD::f32x4> expand4(Vector3<float> const& v)
  20. {
  21. return Vector3<AK::SIMD::f32x4> {
  22. AK::SIMD::expand4(v.x()),
  23. AK::SIMD::expand4(v.y()),
  24. AK::SIMD::expand4(v.z()),
  25. };
  26. }
  27. ALWAYS_INLINE static constexpr Vector4<AK::SIMD::f32x4> expand4(Vector4<float> const& v)
  28. {
  29. return Vector4<AK::SIMD::f32x4> {
  30. AK::SIMD::expand4(v.x()),
  31. AK::SIMD::expand4(v.y()),
  32. AK::SIMD::expand4(v.z()),
  33. AK::SIMD::expand4(v.w()),
  34. };
  35. }
  36. ALWAYS_INLINE static constexpr Vector2<AK::SIMD::i32x4> expand4(Vector2<int> const& v)
  37. {
  38. return Vector2<AK::SIMD::i32x4> {
  39. AK::SIMD::expand4(v.x()),
  40. AK::SIMD::expand4(v.y()),
  41. };
  42. }
  43. ALWAYS_INLINE static constexpr Vector3<AK::SIMD::i32x4> expand4(Vector3<int> const& v)
  44. {
  45. return Vector3<AK::SIMD::i32x4> {
  46. AK::SIMD::expand4(v.x()),
  47. AK::SIMD::expand4(v.y()),
  48. AK::SIMD::expand4(v.z()),
  49. };
  50. }
  51. ALWAYS_INLINE static constexpr Vector4<AK::SIMD::i32x4> expand4(Vector4<int> const& v)
  52. {
  53. return Vector4<AK::SIMD::i32x4> {
  54. AK::SIMD::expand4(v.x()),
  55. AK::SIMD::expand4(v.y()),
  56. AK::SIMD::expand4(v.z()),
  57. AK::SIMD::expand4(v.w()),
  58. };
  59. }
  60. ALWAYS_INLINE static AK::SIMD::f32x4 ddx(AK::SIMD::f32x4 v)
  61. {
  62. return AK::SIMD::f32x4 {
  63. v[1] - v[0],
  64. v[1] - v[0],
  65. v[3] - v[2],
  66. v[3] - v[2],
  67. };
  68. }
  69. ALWAYS_INLINE static AK::SIMD::f32x4 ddy(AK::SIMD::f32x4 v)
  70. {
  71. return AK::SIMD::f32x4 {
  72. v[2] - v[0],
  73. v[3] - v[1],
  74. v[2] - v[0],
  75. v[3] - v[1],
  76. };
  77. }
  78. ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> ddx(Vector2<AK::SIMD::f32x4> const& v)
  79. {
  80. return {
  81. ddx(v.x()),
  82. ddx(v.y()),
  83. };
  84. }
  85. ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> ddy(Vector2<AK::SIMD::f32x4> const& v)
  86. {
  87. return {
  88. ddy(v.x()),
  89. ddy(v.y()),
  90. };
  91. }
  92. // Calculates a quadratic approximation of log2, exploiting the fact that IEEE754 floats are represented as mantissa * 2^exponent.
  93. // See https://stackoverflow.com/questions/9411823/fast-log2float-x-implementation-c
  94. ALWAYS_INLINE static AK::SIMD::f32x4 log2_approximate(AK::SIMD::f32x4 v)
  95. {
  96. union {
  97. AK::SIMD::f32x4 float_val;
  98. AK::SIMD::i32x4 int_val;
  99. } u { v };
  100. // Extract just the exponent minus 1, giving a lower integral bound for log2.
  101. auto log = AK::SIMD::to_f32x4(((u.int_val >> 23) & 255) - 128);
  102. // Replace the exponent with 0, giving a value between 1 and 2.
  103. u.int_val &= ~(255 << 23);
  104. u.int_val |= 127 << 23;
  105. // Approximate log2 by adding a quadratic function of u to the integral part.
  106. log += (-0.34484843f * u.float_val + 2.02466578f) * u.float_val - 0.67487759f;
  107. return log;
  108. }
  109. }