mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-24 00:20:21 +00:00
AK/SIMDExtras: Fix masking logic in shuffle_or_0
This commit is contained in:
parent
48f1861ce9
commit
a168bec7ef
Notes:
github-actions[bot]
2024-07-27 13:03:24 +00:00
Author: https://github.com/dzfrias Commit: https://github.com/LadybirdBrowser/ladybird/commit/a168bec7efc Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/851
1 changed files with 4 additions and 5 deletions
|
@ -218,11 +218,10 @@ ALWAYS_INLINE static T shuffle_or_0_impl(T a, Control control, IndexSequence<Idx
|
|||
using E = ElementOf<T>;
|
||||
|
||||
if constexpr (__has_builtin(__builtin_shuffle)) {
|
||||
// GCC does a very bad job at optimizing the masking, while not recognizing the shuffle idiom
|
||||
// So we jinx its __builtin_shuffle to work with out of bounds indices
|
||||
// TODO: verify that this masking logic is correct (for machines with __builtin_shuffle)
|
||||
auto mask = (control >= 0) | (control < N);
|
||||
return __builtin_shuffle(a, control & mask) & ~mask;
|
||||
auto vector = __builtin_shuffle(a, control);
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
vector[i] = control[i] < 0 || control[i] >= N ? 0 : vector[i];
|
||||
return vector;
|
||||
}
|
||||
// 1. Set all out of bounds values to ~0
|
||||
// Note: This is done so that the optimization mentioned down below works
|
||||
|
|
Loading…
Reference in a new issue