memset.cpp 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. /*
  2. * Copyright (c) 2022, Daniel Bertalan <dani@danielbertalan.dev>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Types.h>
  7. #include <cpuid.h>
  8. #include <string.h>
  9. extern "C" {
  10. extern void* memset_sse2(void*, int, size_t);
  11. extern void* memset_sse2_erms(void*, int, size_t);
  12. constexpr u32 tcg_signature_ebx = 0x54474354;
  13. constexpr u32 tcg_signature_ecx = 0x43544743;
  14. constexpr u32 tcg_signature_edx = 0x47435447;
  15. // Bit 9 of ebx in cpuid[eax = 7] indicates support for "Enhanced REP MOVSB/STOSB"
  16. constexpr u32 cpuid_7_ebx_bit_erms = 1 << 9;
  17. namespace {
  18. [[gnu::used]] decltype(&memset) resolve_memset()
  19. {
  20. u32 eax, ebx, ecx, edx;
  21. __cpuid(0x40000000, eax, ebx, ecx, edx);
  22. bool is_tcg = ebx == tcg_signature_ebx && ecx == tcg_signature_ecx && edx == tcg_signature_edx;
  23. // Although TCG reports ERMS support, testing shows that rep stosb performs strictly worse than
  24. // SSE copies on all data sizes except <= 4 bytes.
  25. if (is_tcg)
  26. return memset_sse2;
  27. __cpuid_count(7, 0, eax, ebx, ecx, edx);
  28. if (ebx & cpuid_7_ebx_bit_erms)
  29. return memset_sse2_erms;
  30. return memset_sse2;
  31. }
  32. }
  33. #if !defined(AK_COMPILER_CLANG) && !defined(_DYNAMIC_LOADER)
  34. [[gnu::ifunc("resolve_memset")]] void* memset(void*, int, size_t);
  35. #else
  36. // DynamicLoader can't self-relocate IFUNCs.
  37. // FIXME: There's a circular dependency between LibC and libunwind when built with Clang,
  38. // so the IFUNC resolver could be called before LibC has been relocated, returning bogus addresses.
  39. void* memset(void* dest_ptr, int c, size_t n)
  40. {
  41. static decltype(&memset) s_impl = nullptr;
  42. if (s_impl == nullptr)
  43. s_impl = resolve_memset();
  44. return s_impl(dest_ptr, c, n);
  45. }
  46. #endif
  47. }