NanBoxedValue.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. /*
  2. * Copyright (c) 2024, Shannon Booth <shannon@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/BitCast.h>
  8. #include <AK/Types.h>
  9. namespace JS {
  10. static_assert(sizeof(double) == 8);
  11. static_assert(sizeof(void*) == sizeof(double) || sizeof(void*) == sizeof(u32));
  12. // To make our Value representation compact we can use the fact that IEEE
  13. // doubles have a lot (2^52 - 2) of NaN bit patterns. The canonical form being
  14. // just 0x7FF8000000000000 i.e. sign = 0 exponent is all ones and the top most
  15. // bit of the mantissa set.
  16. static constexpr u64 CANON_NAN_BITS = bit_cast<u64>(__builtin_nan(""));
  17. static_assert(CANON_NAN_BITS == 0x7FF8000000000000);
  18. // (Unfortunately all the other values are valid so we have to convert any
  19. // incoming NaNs to this pattern although in practice it seems only the negative
  20. // version of these CANON_NAN_BITS)
  21. // +/- Infinity are represented by a full exponent but without any bits of the
  22. // mantissa set.
  23. static constexpr u64 POSITIVE_INFINITY_BITS = bit_cast<u64>(__builtin_huge_val());
  24. static constexpr u64 NEGATIVE_INFINITY_BITS = bit_cast<u64>(-__builtin_huge_val());
  25. static_assert(POSITIVE_INFINITY_BITS == 0x7FF0000000000000);
  26. static_assert(NEGATIVE_INFINITY_BITS == 0xFFF0000000000000);
  27. // However as long as any bit is set in the mantissa with the exponent of all
  28. // ones this value is a NaN, and it even ignores the sign bit.
  29. // (NOTE: we have to use __builtin_isnan here since some isnan implementations are not constexpr)
  30. static_assert(__builtin_isnan(bit_cast<double>(0x7FF0000000000001)));
  31. static_assert(__builtin_isnan(bit_cast<double>(0xFFF0000000040000)));
  32. // This means we can use all of these NaNs to store all other options for Value.
  33. // To make sure all of these other representations we use 0x7FF8 as the base top
  34. // 2 bytes which ensures the value is always a NaN.
  35. static constexpr u64 BASE_TAG = 0x7FF8;
  36. // This leaves the sign bit and the three lower bits for tagging a value and then
  37. // 48 bits of potential payload.
  38. // First the pointer backed types (Object, String etc.), to signify this category
  39. // and make stack scanning easier we use the sign bit (top most bit) of 1 to
  40. // signify that it is a pointer backed type.
  41. static constexpr u64 IS_CELL_BIT = 0x8000 | BASE_TAG;
  42. // On all current 64-bit systems this code runs pointer actually only use the
  43. // lowest 6 bytes which fits neatly into our NaN payload with the top two bytes
  44. // left over for marking it as a NaN and tagging the type.
  45. // Note that we do need to take care when extracting the pointer value but this
  46. // is explained in the extract_pointer method.
  47. static constexpr u64 IS_CELL_PATTERN = 0xFFF8ULL;
  48. static constexpr u64 TAG_SHIFT = 48;
  49. static constexpr u64 TAG_EXTRACTION = 0xFFFF000000000000;
  50. static constexpr u64 SHIFTED_IS_CELL_PATTERN = IS_CELL_PATTERN << TAG_SHIFT;
  51. class NanBoxedValue {
  52. public:
  53. bool is_cell() const { return (m_value.tag & IS_CELL_PATTERN) == IS_CELL_PATTERN; }
  54. static constexpr FlatPtr extract_pointer_bits(u64 encoded)
  55. {
  56. #ifdef AK_ARCH_32_BIT
  57. // For 32-bit system the pointer fully fits so we can just return it directly.
  58. static_assert(sizeof(void*) == sizeof(u32));
  59. return static_cast<FlatPtr>(encoded & 0xffff'ffff);
  60. #elif ARCH(X86_64) || ARCH(RISCV64)
  61. // For x86_64 and riscv64 the top 16 bits should be sign extending the "real" top bit (47th).
  62. // So first shift the top 16 bits away then using the right shift it sign extends the top 16 bits.
  63. return static_cast<FlatPtr>((static_cast<i64>(encoded << 16)) >> 16);
  64. #elif ARCH(AARCH64) || ARCH(PPC64) || ARCH(PPC64LE)
  65. // For AArch64 the top 16 bits of the pointer should be zero.
  66. // For PPC64: all 64 bits can be used for pointers, however on Linux only
  67. // the lower 43 bits are used for user-space addresses, so
  68. // masking off the top 16 bits should match the rest of LibJS.
  69. return static_cast<FlatPtr>(encoded & 0xffff'ffff'ffffULL);
  70. #else
  71. # error "Unknown architecture. Don't know whether pointers need to be sign-extended."
  72. #endif
  73. }
  74. template<typename PointerType>
  75. PointerType* extract_pointer() const
  76. {
  77. VERIFY(is_cell());
  78. return reinterpret_cast<PointerType*>(extract_pointer_bits(m_value.encoded));
  79. }
  80. CellImpl& as_cell()
  81. {
  82. VERIFY(is_cell());
  83. return *extract_pointer<CellImpl>();
  84. }
  85. CellImpl& as_cell() const
  86. {
  87. VERIFY(is_cell());
  88. return *extract_pointer<CellImpl>();
  89. }
  90. bool is_nan() const
  91. {
  92. return m_value.encoded == CANON_NAN_BITS;
  93. }
  94. protected:
  95. union {
  96. double as_double;
  97. struct {
  98. u64 payload : 48;
  99. u64 tag : 16;
  100. };
  101. u64 encoded;
  102. } m_value { .encoded = 0 };
  103. };
  104. static_assert(sizeof(NanBoxedValue) == sizeof(double));
  105. }