AK: Reorder AK/Math after the removal of i686

Without i686 we can remove some implementations for sqrt and round_to.
This commit is contained in:
Hendiadyoin1 2022-12-28 22:30:08 +01:00 committed by Jelle Raaijmakers
parent ce0f41b9fb
commit 1bdc4e6b29
Notes: sideshowbarker 2024-07-17 09:56:35 +09:00

226
AK/Math.h
View file

@ -123,6 +123,20 @@ constexpr T sqrt(T x)
CONSTEXPR_STATE(sqrt, x);
#if ARCH(X86_64)
if constexpr (IsSame<T, float>) {
float res;
asm("sqrtss %1, %0"
: "=x"(res)
: "x"(x));
return res;
}
if constexpr (IsSame<T, double>) {
double res;
asm("sqrtsd %1, %0"
: "=x"(res)
: "x"(x));
return res;
}
T res;
asm("fsqrt"
: "=t"(res)
@ -140,45 +154,8 @@ constexpr T rsqrt(T x)
{
#if ARCH(AARCH64)
AARCH64_INSTRUCTION(frsqrte, x);
#endif
return (T)1. / sqrt(x);
}
#if ARCH(x86_64)
template<>
constexpr float sqrt(float x)
{
if (is_constant_evaluated())
return __builtin_sqrtf(x);
float res;
asm("sqrtss %1, %0"
: "=x"(res)
: "x"(x));
return res;
}
# ifdef __SSE2__
template<>
constexpr double sqrt(double x)
{
if (is_constant_evaluated())
return __builtin_sqrt(x);
double res;
asm("sqrtsd %1, %0"
: "=x"(res)
: "x"(x));
return res;
}
# endif
template<>
constexpr float rsqrt(float x)
{
if (is_constant_evaluated())
return 1.f / __builtin_sqrtf(x);
#elif ARCH(X86_64)
if constexpr (IsSame<T, float>) {
float res;
asm("rsqrtss %1, %0"
: "=x"(res)
@ -186,6 +163,8 @@ constexpr float rsqrt(float x)
return res;
}
#endif
return (T)1. / sqrt(x);
}
template<FloatingPoint T>
constexpr T cbrt(T x)
@ -583,9 +562,12 @@ using Hyperbolic::sinh;
using Hyperbolic::tanh;
template<Integral I, FloatingPoint P>
ALWAYS_INLINE I round_to(P value)
{
ALWAYS_INLINE I round_to(P value);
#if ARCH(X86_64)
template<Integral I>
ALWAYS_INLINE I round_to(long double value)
{
// Note: fistps outputs into a signed integer location (i16, i32, i64),
// so lets be nice and tell the compiler that.
Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret;
@ -606,115 +588,129 @@ ALWAYS_INLINE I round_to(P value)
: "st");
}
return static_cast<I>(ret);
}
template<Integral I>
ALWAYS_INLINE I round_to(float value)
{
// FIXME: round_to<u64> might will cause issues, aka the indefinite value being set,
// if the value surpasses the i64 limit, even if the result could fit into an u64
// To solve this we would either need to detect that value or do a range check and
// then do a more specialized conversion, which might include a division (which is expensive)
if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) {
i64 ret;
asm("cvtss2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
i32 ret;
asm("cvtss2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
template<Integral I>
ALWAYS_INLINE I round_to(double value)
{
// FIXME: round_to<u64> might will cause issues, aka the indefinite value being set,
// if the value surpasses the i64 limit, even if the result could fit into an u64
// To solve this we would either need to detect that value or do a range check and
// then do a more specialized conversion, which might include a division (which is expensive)
if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) {
i64 ret;
asm("cvtsd2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
i32 ret;
asm("cvtsd2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
#elif ARCH(AARCH64)
if constexpr (IsSigned<I>) {
if constexpr (sizeof(I) <= sizeof(i32)) {
template<Signed I>
ALWAYS_INLINE I round_to(float value)
{
if constexpr (sizeof(I) <= sizeof(u32)) {
i32 res;
if constexpr (IsSame<P, float>) {
asm("fcvtns %w0, %s1"
: "=r"(res)
: "w"(value));
} else if constexpr (IsSame<P, double>) {
asm("fcvtns %w0, %d1"
: "=r"(res)
: "w"(value));
} else if constexpr (IsSame<P, long double>) {
TODO();
}
return static_cast<I>(res);
}
// either long or long long aka i64
i64 res;
if constexpr (IsSame<P, float>) {
asm("fcvtns %0, %s1"
: "=r"(res)
: "w"(value));
} else if constexpr (IsSame<P, double>) {
return static_cast<I>(res);
}
template<Signed I>
ALWAYS_INLINE I round_to(double value)
{
if constexpr (sizeof(I) <= sizeof(u32)) {
i32 res;
asm("fcvtns %w0, %d1"
: "=r"(res)
: "w"(value));
return static_cast<I>(res);
}
i64 res;
asm("fcvtns %0, %d1"
: "=r"(res)
: "w"(value));
} else if constexpr (IsSame<P, long double>) {
TODO();
}
return static_cast<I>(res);
}
if constexpr (sizeof(I) <= sizeof(u32)) {
template<Unsigned U>
ALWAYS_INLINE U round_to(float value)
{
if constexpr (sizeof(U) <= sizeof(u32)) {
u32 res;
if constexpr (IsSame<P, float>) {
asm("fcvtnu %w0, %s1"
: "=r"(res)
: "w"(value));
} else if constexpr (IsSame<P, double>) {
asm("fcvtnu %w0, %d1"
: "=r"(res)
: "w"(value));
} else if constexpr (IsSame<P, long double>) {
TODO();
return static_cast<U>(res);
}
return static_cast<I>(res);
}
// either unsigned long or unsigned long long aka u64
u64 res;
if constexpr (IsSame<P, float>) {
i64 res;
asm("fcvtnu %0, %s1"
: "=r"(res)
: "w"(value));
} else if constexpr (IsSame<P, double>) {
asm("fcvtnu %0, %d1"
return static_cast<U>(res);
}
template<Unsigned U>
ALWAYS_INLINE U round_to(double value)
{
if constexpr (sizeof(U) <= sizeof(u32)) {
u32 res;
asm("fcvtns %w0, %d1"
: "=r"(res)
: "w"(value));
} else if constexpr (IsSame<P, long double>) {
TODO();
return static_cast<U>(res);
}
return static_cast<I>(res);
i64 res;
asm("fcvtns %0, %d1"
: "=r"(res)
: "w"(value));
return static_cast<U>(res);
}
#else
template<Integral I, FloatingPoint P>
ALWAYS_INLINE I round_to(P value)
{
if constexpr (IsSame<P, long double>)
return static_cast<I>(__builtin_llrintl(value));
if constexpr (IsSame<P, double>)
return static_cast<I>(__builtin_llrint(value));
if constexpr (IsSame<P, float>)
return static_cast<I>(__builtin_llrintf(value));
#endif
}
#if ARCH(x86_64)
template<Integral I>
ALWAYS_INLINE I round_to(float value)
{
if constexpr (sizeof(I) == sizeof(i64)) {
// Note: Outputting into 64-bit registers or memory locations requires the
// REX prefix, so we have to fall back to long doubles on platforms
i64 ret;
asm("cvtss2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
i32 ret;
asm("cvtss2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
#endif
#ifdef __SSE2__
template<Integral I>
ALWAYS_INLINE I round_to(double value)
{
if constexpr (sizeof(I) == sizeof(i64)) {
i64 ret;
asm("cvtsd2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
i32 ret;
asm("cvtsd2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
#endif