AK: Add an helper for quick hardware based rounding

This uses the `fistp` and `cvts[sd]2si` respectively, to potentially
round floating point values with just one instruction.

This falls back to `llrint[fl]?` on aarch64 for now.
This commit is contained in:
Hendiadyoin1 2022-04-14 17:33:01 +02:00 committed by Linus Groh
parent 71b175d4ed
commit 37ff2b9bd2
Notes: sideshowbarker 2024-07-17 11:12:14 +09:00

View file

@ -265,7 +265,7 @@ constexpr T tan(T angle)
CONSTEXPR_STATE(tan, angle);
#if ARCH(I386) || ARCH(X86_64)
double ret, one;
T ret, one;
asm(
"fptan"
: "=t"(one), "=u"(ret)
@ -536,6 +536,89 @@ using Hyperbolic::cosh;
using Hyperbolic::sinh;
using Hyperbolic::tanh;
template<Integral I, FloatingPoint P>
ALWAYS_INLINE I round_to(P value)
{
#if ARCH(I386) || ARCH(X86_64)
// Note: fistps outputs into a signed integer location (i16, i32, i64),
// so lets be nice and tell the compiler that.
Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret;
if constexpr (sizeof(I) == sizeof(i64)) {
asm("fistpll %0"
: "=m"(ret)
: "t"(value)
: "st");
} else if constexpr (sizeof(I) == sizeof(i32)) {
asm("fistpl %0"
: "=m"(ret)
: "t"(value)
: "st");
} else {
asm("fistps %0"
: "=m"(ret)
: "t"(value)
: "st");
}
return static_cast<I>(ret);
#else
if constexpr (IsSame<P, long double>)
return static_cast<I>(llrintl(value));
if constexpr (IsSame<P, double>)
return static_cast<I>(llrint(value));
if constexpr (IsSame<P, float>)
return static_cast<I>(llrintf(value));
#endif
}
#ifdef __SSE__
template<Integral I>
ALWAYS_INLINE I round_to(float value)
{
if constexpr (sizeof(I) == sizeof(i64)) {
// Note: Outputting into 64-bit registers or memory locations requires the
// REX prefix, so we have to fall back to long doubles on i686
# if ARCH(X86_64)
i64 ret;
asm("cvtss2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
# else
return round_to<I, long double>(value);
# endif
}
i32 ret;
asm("cvtss2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
#endif
#ifdef __SSE2__
template<Integral I>
ALWAYS_INLINE I round_to(double value)
{
if constexpr (sizeof(I) == sizeof(i64)) {
// Note: Outputting into 64-bit registers or memory locations requires the
// REX prefix, so we have to fall back to long doubles on i686
# if ARCH(X86_64)
i64 ret;
asm("cvtsd2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
# else
return round_to<I, long double>(value);
# endif
}
i32 ret;
asm("cvtsd2si %1, %0"
: "=r"(ret)
: "xm"(value));
return static_cast<I>(ret);
}
#endif
template<FloatingPoint T>
constexpr T pow(T x, T y)
{
@ -563,5 +646,6 @@ constexpr T pow(T x, T y)
}
#undef CONSTEXPR_STATE
}
using AK::round_to;