mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
AK: Add SIMDExtras.h with SIMD related functions
Adds a header to AK with helper functions for writing vectorized code. Co-authored-by: Hendiadyoin <leon2002.la@gmail.com>
This commit is contained in:
parent
75e31a4749
commit
7adcdecc7b
Notes:
sideshowbarker
2024-07-17 21:20:18 +09:00
Author: https://github.com/sunverwerth Commit: https://github.com/SerenityOS/serenity/commit/7adcdecc7bf Pull-request: https://github.com/SerenityOS/serenity/pull/11568 Reviewed-by: https://github.com/Hendiadyoin1 ✅ Reviewed-by: https://github.com/Quaker762 ✅ Reviewed-by: https://github.com/gmta
1 changed files with 146 additions and 0 deletions
146
AK/SIMDExtras.h
Normal file
146
AK/SIMDExtras.h
Normal file
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/SIMD.h>
|
||||
|
||||
// Returning a vector on i686 target generates warning "psabi".
|
||||
// This prevents the CI, treating this as an error, from running to completion.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic warning "-Wpsabi"
|
||||
|
||||
namespace AK::SIMD {
|
||||
|
||||
// SIMD Vector Expansion
|
||||
|
||||
ALWAYS_INLINE static constexpr f32x4 expand4(float f)
|
||||
{
|
||||
return f32x4 { f, f, f, f };
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static constexpr i32x4 expand4(i32 i)
|
||||
{
|
||||
return i32x4 { i, i, i, i };
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static constexpr u32x4 expand4(u32 u)
|
||||
{
|
||||
return u32x4 { u, u, u, u };
|
||||
}
|
||||
|
||||
// Casting
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static u32x4 to_u32x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, u32x4);
|
||||
}
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static i32x4 to_i32x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, i32x4);
|
||||
}
|
||||
|
||||
template<typename TSrc>
|
||||
ALWAYS_INLINE static f32x4 to_f32x4(TSrc v)
|
||||
{
|
||||
return __builtin_convertvector(v, f32x4);
|
||||
}
|
||||
|
||||
// Masking
|
||||
|
||||
ALWAYS_INLINE static i32 maskbits(i32x4 mask)
|
||||
{
|
||||
#if defined(__SSE__)
|
||||
return __builtin_ia32_movmskps((f32x4)mask);
|
||||
#else
|
||||
return ((mask[0] & 0x80000000) >> 31) | ((mask[1] & 0x80000000) >> 30) | ((mask[2] & 0x80000000) >> 29) | ((mask[3] & 0x80000000) >> 28);
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static bool all(i32x4 mask)
|
||||
{
|
||||
return maskbits(mask) == 15;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static bool any(i32x4 mask)
|
||||
{
|
||||
return maskbits(mask) != 0;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static bool none(i32x4 mask)
|
||||
{
|
||||
return maskbits(mask) == 0;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static int maskcount(i32x4 mask)
|
||||
{
|
||||
constexpr static int count_lut[16] { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
|
||||
return count_lut[maskbits(mask)];
|
||||
}
|
||||
|
||||
// Load / Store
|
||||
|
||||
ALWAYS_INLINE static f32x4 load4(float const* a, float const* b, float const* c, float const* d)
|
||||
{
|
||||
return f32x4 { *a, *b, *c, *d };
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static u32x4 load4(u32 const* a, u32 const* b, u32 const* c, u32 const* d)
|
||||
{
|
||||
return u32x4 { *a, *b, *c, *d };
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static f32x4 load4_masked(float const* a, float const* b, float const* c, float const* d, i32x4 mask)
|
||||
{
|
||||
int bits = maskbits(mask);
|
||||
return f32x4 {
|
||||
bits & 1 ? *a : 0.f,
|
||||
bits & 2 ? *b : 0.f,
|
||||
bits & 4 ? *c : 0.f,
|
||||
bits & 8 ? *d : 0.f,
|
||||
};
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static u32x4 load4_masked(u32 const* a, u32 const* b, u32 const* c, u32 const* d, i32x4 mask)
|
||||
{
|
||||
int bits = maskbits(mask);
|
||||
return u32x4 {
|
||||
bits & 1 ? *a : 0u,
|
||||
bits & 2 ? *b : 0u,
|
||||
bits & 4 ? *c : 0u,
|
||||
bits & 8 ? *d : 0u,
|
||||
};
|
||||
}
|
||||
|
||||
template<typename VectorType, typename UnderlyingType = decltype(declval<VectorType>()[0])>
|
||||
ALWAYS_INLINE static void store4(VectorType v, UnderlyingType* a, UnderlyingType* b, UnderlyingType* c, UnderlyingType* d)
|
||||
{
|
||||
*a = v[0];
|
||||
*b = v[1];
|
||||
*c = v[2];
|
||||
*d = v[3];
|
||||
}
|
||||
|
||||
template<typename VectorType, typename UnderlyingType = decltype(declval<VectorType>()[0])>
|
||||
ALWAYS_INLINE static void store4_masked(VectorType v, UnderlyingType* a, UnderlyingType* b, UnderlyingType* c, UnderlyingType* d, i32x4 mask)
|
||||
{
|
||||
int bits = maskbits(mask);
|
||||
if (bits & 1)
|
||||
*a = v[0];
|
||||
if (bits & 2)
|
||||
*b = v[1];
|
||||
if (bits & 4)
|
||||
*c = v[2];
|
||||
if (bits & 8)
|
||||
*d = v[3];
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
}
|
Loading…
Reference in a new issue