diff --git a/AK/SIMDExtras.h b/AK/SIMDExtras.h
new file mode 100644
index 00000000000..4b4a4061166
--- /dev/null
+++ b/AK/SIMDExtras.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/SIMD.h>
+
+// Returning a vector on i686 target generates warning "psabi".
+// This prevents the CI, treating this as an error, from running to completion.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic warning "-Wpsabi"
+
+namespace AK::SIMD {
+
+// SIMD Vector Expansion
+
+ALWAYS_INLINE static constexpr f32x4 expand4(float f)
+{
+    return f32x4 { f, f, f, f };
+}
+
+ALWAYS_INLINE static constexpr i32x4 expand4(i32 i)
+{
+    return i32x4 { i, i, i, i };
+}
+
+ALWAYS_INLINE static constexpr u32x4 expand4(u32 u)
+{
+    return u32x4 { u, u, u, u };
+}
+
+// Casting
+
+template<typename TSrc>
+ALWAYS_INLINE static u32x4 to_u32x4(TSrc v)
+{
+    return __builtin_convertvector(v, u32x4);
+}
+
+template<typename TSrc>
+ALWAYS_INLINE static i32x4 to_i32x4(TSrc v)
+{
+    return __builtin_convertvector(v, i32x4);
+}
+
+template<typename TSrc>
+ALWAYS_INLINE static f32x4 to_f32x4(TSrc v)
+{
+    return __builtin_convertvector(v, f32x4);
+}
+
+// Masking
+
+ALWAYS_INLINE static i32 maskbits(i32x4 mask)
+{
+#if defined(__SSE__)
+    return __builtin_ia32_movmskps((f32x4)mask);
+#else
+    return ((mask[0] & 0x80000000) >> 31) | ((mask[1] & 0x80000000) >> 30) | ((mask[2] & 0x80000000) >> 29) | ((mask[3] & 0x80000000) >> 28);
+#endif
+}
+
+ALWAYS_INLINE static bool all(i32x4 mask)
+{
+    return maskbits(mask) == 15;
+}
+
+ALWAYS_INLINE static bool any(i32x4 mask)
+{
+    return maskbits(mask) != 0;
+}
+
+ALWAYS_INLINE static bool none(i32x4 mask)
+{
+    return maskbits(mask) == 0;
+}
+
+ALWAYS_INLINE static int maskcount(i32x4 mask)
+{
+    constexpr static int count_lut[16] { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+    return count_lut[maskbits(mask)];
+}
+
+// Load / Store
+
+ALWAYS_INLINE static f32x4 load4(float const* a, float const* b, float const* c, float const* d)
+{
+    return f32x4 { *a, *b, *c, *d };
+}
+
+ALWAYS_INLINE static u32x4 load4(u32 const* a, u32 const* b, u32 const* c, u32 const* d)
+{
+    return u32x4 { *a, *b, *c, *d };
+}
+
+ALWAYS_INLINE static f32x4 load4_masked(float const* a, float const* b, float const* c, float const* d, i32x4 mask)
+{
+    int bits = maskbits(mask);
+    return f32x4 {
+        bits & 1 ? *a : 0.f,
+        bits & 2 ? *b : 0.f,
+        bits & 4 ? *c : 0.f,
+        bits & 8 ? *d : 0.f,
+    };
+}
+
+ALWAYS_INLINE static u32x4 load4_masked(u32 const* a, u32 const* b, u32 const* c, u32 const* d, i32x4 mask)
+{
+    int bits = maskbits(mask);
+    return u32x4 {
+        bits & 1 ? *a : 0u,
+        bits & 2 ? *b : 0u,
+        bits & 4 ? *c : 0u,
+        bits & 8 ? *d : 0u,
+    };
+}
+
+template<typename VectorType, typename UnderlyingType = decltype(declval<VectorType>()[0])>
+ALWAYS_INLINE static void store4(VectorType v, UnderlyingType* a, UnderlyingType* b, UnderlyingType* c, UnderlyingType* d)
+{
+    *a = v[0];
+    *b = v[1];
+    *c = v[2];
+    *d = v[3];
+}
+
+template<typename VectorType, typename UnderlyingType = decltype(declval<VectorType>()[0])>
+ALWAYS_INLINE static void store4_masked(VectorType v, UnderlyingType* a, UnderlyingType* b, UnderlyingType* c, UnderlyingType* d, i32x4 mask)
+{
+    int bits = maskbits(mask);
+    if (bits & 1)
+        *a = v[0];
+    if (bits & 2)
+        *b = v[1];
+    if (bits & 4)
+        *c = v[2];
+    if (bits & 8)
+        *d = v[3];
+}
+
+#pragma GCC diagnostic pop
+
+}