LibJS: Make Value inherit from a NanBoxedValue

NanBoxedValue is intended to be a GC-allocatable type which is not specific to javascript, towards the effort of factoring out the GC implementation from LibJS.
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/0bf2a8362ac Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2334
2024-11-21 23:20:20 +00:00 · 2024-11-14 19:40:36 +13:00 · 2024-11-14 19:40:36 +13:00 · 0bf2a8362a · 2024-11-14 14:39:52 +00:00
commit 0bf2a8362a
parent c0bcebeb08
6 changed files with 132 additions and 108 deletions
--- a/Libraries/LibJS/Forward.h
+++ b/Libraries/LibJS/Forward.h
@ -194,6 +194,7 @@ class MemberExpression;
 class MetaProperty;
 class Module;
 struct ModuleRequest;
+class NanBoxedValue;
 class NativeFunction;
 class ObjectEnvironment;
 class Parser;
--- a/Libraries/LibJS/Heap/Cell.cpp
+++ b/Libraries/LibJS/Heap/Cell.cpp
@ -6,7 +6,7 @@

 #include <LibJS/Heap/Cell.h>
 #include <LibJS/Heap/Heap.h>
-#include <LibJS/Runtime/Value.h>
+#include <LibJS/Heap/NanBoxedValue.h>

 namespace JS {

@ -14,7 +14,7 @@ void JS::Cell::initialize(JS::Realm&)
 {
 }

-void JS::Cell::Visitor::visit(JS::Value value)
+void JS::Cell::Visitor::visit(NanBoxedValue const& value)
 {
    if (value.is_cell())
        visit_impl(value.as_cell());
--- a/Libraries/LibJS/Heap/Cell.h
+++ b/Libraries/LibJS/Heap/Cell.h
@ -150,7 +150,7 @@ public:
            }
        }

-        void visit(Value value);
+        void visit(NanBoxedValue const& value);

        // Allow explicitly ignoring a GC-allocated member in a visit_edges implementation instead
        // of just not using it.
--- a/Libraries/LibJS/Heap/Heap.cpp
+++ b/Libraries/LibJS/Heap/Heap.cpp
@ -18,8 +18,8 @@
 #include <LibJS/Heap/Handle.h>
 #include <LibJS/Heap/Heap.h>
 #include <LibJS/Heap/HeapBlock.h>
+#include <LibJS/Heap/NanBoxedValue.h>
 #include <LibJS/Runtime/VM.h>
-#include <LibJS/Runtime/Value.h>
 #include <setjmp.h>

 #ifdef HAS_ADDRESS_SANITIZER
@ -61,23 +61,23 @@ void Heap::will_allocate(size_t size)

 static void add_possible_value(HashMap<FlatPtr, HeapRoot>& possible_pointers, FlatPtr data, HeapRoot origin, FlatPtr min_block_address, FlatPtr max_block_address)
 {
-    if constexpr (sizeof(FlatPtr*) == sizeof(Value)) {
-        // Because Value stores pointers in non-canonical form we have to check if the top bytes
+    if constexpr (sizeof(FlatPtr*) == sizeof(NanBoxedValue)) {
+        // Because NanBoxedValue stores pointers in non-canonical form we have to check if the top bytes
        // match any pointer-backed tag, in that case we have to extract the pointer to its
        // canonical form and add that as a possible pointer.
        FlatPtr possible_pointer;
        if ((data & SHIFTED_IS_CELL_PATTERN) == SHIFTED_IS_CELL_PATTERN)
-            possible_pointer = Value::extract_pointer_bits(data);
+            possible_pointer = NanBoxedValue::extract_pointer_bits(data);
        else
            possible_pointer = data;
        if (possible_pointer < min_block_address || possible_pointer > max_block_address)
            return;
        possible_pointers.set(possible_pointer, move(origin));
    } else {
-        static_assert((sizeof(Value) % sizeof(FlatPtr*)) == 0);
+        static_assert((sizeof(NanBoxedValue) % sizeof(FlatPtr*)) == 0);
        if (data < min_block_address || data > max_block_address)
            return;
-        // In the 32-bit case we will look at the top and bottom part of Value separately we just
+        // In the 32-bit case we will look at the top and bottom part of NanBoxedValue separately we just
        // add both the upper and lower bytes as possible pointers.
        possible_pointers.set(data, move(origin));
    }
--- a/Libraries/LibJS/Heap/NanBoxedValue.h
+++ b/Libraries/LibJS/Heap/NanBoxedValue.h
@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2024, Shannon Booth <shannon@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/BitCast.h>
+#include <AK/Types.h>
+
+namespace JS {
+
+static_assert(sizeof(double) == 8);
+static_assert(sizeof(void*) == sizeof(double) || sizeof(void*) == sizeof(u32));
+// To make our Value representation compact we can use the fact that IEEE
+// doubles have a lot (2^52 - 2) of NaN bit patterns. The canonical form being
+// just 0x7FF8000000000000 i.e. sign = 0 exponent is all ones and the top most
+// bit of the mantissa set.
+static constexpr u64 CANON_NAN_BITS = bit_cast<u64>(__builtin_nan(""));
+static_assert(CANON_NAN_BITS == 0x7FF8000000000000);
+// (Unfortunately all the other values are valid so we have to convert any
+// incoming NaNs to this pattern although in practice it seems only the negative
+// version of these CANON_NAN_BITS)
+// +/- Infinity are represented by a full exponent but without any bits of the
+// mantissa set.
+static constexpr u64 POSITIVE_INFINITY_BITS = bit_cast<u64>(__builtin_huge_val());
+static constexpr u64 NEGATIVE_INFINITY_BITS = bit_cast<u64>(-__builtin_huge_val());
+static_assert(POSITIVE_INFINITY_BITS == 0x7FF0000000000000);
+static_assert(NEGATIVE_INFINITY_BITS == 0xFFF0000000000000);
+// However as long as any bit is set in the mantissa with the exponent of all
+// ones this value is a NaN, and it even ignores the sign bit.
+// (NOTE: we have to use __builtin_isnan here since some isnan implementations are not constexpr)
+static_assert(__builtin_isnan(bit_cast<double>(0x7FF0000000000001)));
+static_assert(__builtin_isnan(bit_cast<double>(0xFFF0000000040000)));
+// This means we can use all of these NaNs to store all other options for Value.
+// To make sure all of these other representations we use 0x7FF8 as the base top
+// 2 bytes which ensures the value is always a NaN.
+static constexpr u64 BASE_TAG = 0x7FF8;
+// This leaves the sign bit and the three lower bits for tagging a value and then
+// 48 bits of potential payload.
+// First the pointer backed types (Object, String etc.), to signify this category
+// and make stack scanning easier we use the sign bit (top most bit) of 1 to
+// signify that it is a pointer backed type.
+static constexpr u64 IS_CELL_BIT = 0x8000 | BASE_TAG;
+// On all current 64-bit systems this code runs pointer actually only use the
+// lowest 6 bytes which fits neatly into our NaN payload with the top two bytes
+// left over for marking it as a NaN and tagging the type.
+// Note that we do need to take care when extracting the pointer value but this
+// is explained in the extract_pointer method.
+
+static constexpr u64 IS_CELL_PATTERN = 0xFFF8ULL;
+static constexpr u64 TAG_SHIFT = 48;
+static constexpr u64 TAG_EXTRACTION = 0xFFFF000000000000;
+static constexpr u64 SHIFTED_IS_CELL_PATTERN = IS_CELL_PATTERN << TAG_SHIFT;
+
+class NanBoxedValue {
+public:
+    bool is_cell() const { return (m_value.tag & IS_CELL_PATTERN) == IS_CELL_PATTERN; }
+
+    static constexpr FlatPtr extract_pointer_bits(u64 encoded)
+    {
+#ifdef AK_ARCH_32_BIT
+        // For 32-bit system the pointer fully fits so we can just return it directly.
+        static_assert(sizeof(void*) == sizeof(u32));
+        return static_cast<FlatPtr>(encoded & 0xffff'ffff);
+#elif ARCH(X86_64) || ARCH(RISCV64)
+        // For x86_64 and riscv64 the top 16 bits should be sign extending the "real" top bit (47th).
+        // So first shift the top 16 bits away then using the right shift it sign extends the top 16 bits.
+        return static_cast<FlatPtr>((static_cast<i64>(encoded << 16)) >> 16);
+#elif ARCH(AARCH64) || ARCH(PPC64) || ARCH(PPC64LE)
+        // For AArch64 the top 16 bits of the pointer should be zero.
+        // For PPC64: all 64 bits can be used for pointers, however on Linux only
+        //            the lower 43 bits are used for user-space addresses, so
+        //            masking off the top 16 bits should match the rest of LibJS.
+        return static_cast<FlatPtr>(encoded & 0xffff'ffff'ffffULL);
+#else
+#    error "Unknown architecture. Don't know whether pointers need to be sign-extended."
+#endif
+    }
+
+    template<typename PointerType>
+    PointerType* extract_pointer() const
+    {
+        VERIFY(is_cell());
+        return reinterpret_cast<PointerType*>(extract_pointer_bits(m_value.encoded));
+    }
+
+    Cell& as_cell()
+    {
+        VERIFY(is_cell());
+        return *extract_pointer<Cell>();
+    }
+
+    Cell& as_cell() const
+    {
+        VERIFY(is_cell());
+        return *extract_pointer<Cell>();
+    }
+
+    bool is_nan() const
+    {
+        return m_value.encoded == CANON_NAN_BITS;
+    }
+
+protected:
+    union {
+        double as_double;
+        struct {
+            u64 payload : 48;
+            u64 tag : 16;
+        };
+        u64 encoded;
+    } m_value { .encoded = 0 };
+};
+
+static_assert(sizeof(NanBoxedValue) == sizeof(double));
+
+}
--- a/Libraries/LibJS/Runtime/Value.h
+++ b/Libraries/LibJS/Runtime/Value.h
@ -21,6 +21,7 @@
 #include <LibJS/Forward.h>
 #include <LibJS/Heap/GCPtr.h>
 #include <LibJS/Heap/Handle.h>
+#include <LibJS/Heap/NanBoxedValue.h>
 #include <math.h>

 namespace JS {
@ -30,44 +31,6 @@ static constexpr double MAX_ARRAY_LIKE_INDEX = 9007199254740991.0;
 // Unique bit representation of negative zero (only sign bit set)
 static constexpr u64 NEGATIVE_ZERO_BITS = ((u64)1 << 63);

-static_assert(sizeof(double) == 8);
-static_assert(sizeof(void*) == sizeof(double) || sizeof(void*) == sizeof(u32));
-// To make our Value representation compact we can use the fact that IEEE
-// doubles have a lot (2^52 - 2) of NaN bit patterns. The canonical form being
-// just 0x7FF8000000000000 i.e. sign = 0 exponent is all ones and the top most
-// bit of the mantissa set.
-static constexpr u64 CANON_NAN_BITS = bit_cast<u64>(__builtin_nan(""));
-static_assert(CANON_NAN_BITS == 0x7FF8000000000000);
-// (Unfortunately all the other values are valid so we have to convert any
-// incoming NaNs to this pattern although in practice it seems only the negative
-// version of these CANON_NAN_BITS)
-// +/- Infinity are represented by a full exponent but without any bits of the
-// mantissa set.
-static constexpr u64 POSITIVE_INFINITY_BITS = bit_cast<u64>(__builtin_huge_val());
-static constexpr u64 NEGATIVE_INFINITY_BITS = bit_cast<u64>(-__builtin_huge_val());
-static_assert(POSITIVE_INFINITY_BITS == 0x7FF0000000000000);
-static_assert(NEGATIVE_INFINITY_BITS == 0xFFF0000000000000);
-// However as long as any bit is set in the mantissa with the exponent of all
-// ones this value is a NaN, and it even ignores the sign bit.
-// (NOTE: we have to use __builtin_isnan here since some isnan implementations are not constexpr)
-static_assert(__builtin_isnan(bit_cast<double>(0x7FF0000000000001)));
-static_assert(__builtin_isnan(bit_cast<double>(0xFFF0000000040000)));
-// This means we can use all of these NaNs to store all other options for Value.
-// To make sure all of these other representations we use 0x7FF8 as the base top
-// 2 bytes which ensures the value is always a NaN.
-static constexpr u64 BASE_TAG = 0x7FF8;
-// This leaves the sign bit and the three lower bits for tagging a value and then
-// 48 bits of potential payload.
-// First the pointer backed types (Object, String etc.), to signify this category
-// and make stack scanning easier we use the sign bit (top most bit) of 1 to
-// signify that it is a pointer backed type.
-static constexpr u64 IS_CELL_BIT = 0x8000 | BASE_TAG;
-// On all current 64-bit systems this code runs pointer actually only use the
-// lowest 6 bytes which fits neatly into our NaN payload with the top two bytes
-// left over for marking it as a NaN and tagging the type.
-// Note that we do need to take care when extracting the pointer value but this
-// is explained in the extract_pointer method.
-
 // This leaves us 3 bits to tag the type of pointer:
 static constexpr u64 OBJECT_TAG = 0b001 | IS_CELL_BIT;
 static constexpr u64 STRING_TAG = 0b010 | IS_CELL_BIT;
@ -77,7 +40,6 @@ static constexpr u64 BIGINT_TAG = 0b101 | IS_CELL_BIT;

 // We can then by extracting the top 13 bits quickly check if a Value is
 // pointer backed.
-static constexpr u64 IS_CELL_PATTERN = 0xFFF8ULL;
 static_assert((OBJECT_TAG & IS_CELL_PATTERN) == IS_CELL_PATTERN);
 static_assert((STRING_TAG & IS_CELL_PATTERN) == IS_CELL_PATTERN);
 static_assert((CANON_NAN_BITS & IS_CELL_PATTERN) != IS_CELL_PATTERN);
@ -104,11 +66,8 @@ static_assert((EMPTY_TAG & IS_NULLISH_EXTRACT_PATTERN) != IS_NULLISH_PATTERN);
 // values are not valid anywhere else we can use this "value" to our advantage
 // in Optional<Value> to represent the empty optional.

-static constexpr u64 TAG_EXTRACTION = 0xFFFF000000000000;
-static constexpr u64 TAG_SHIFT = 48;
 static constexpr u64 SHIFTED_BOOLEAN_TAG = BOOLEAN_TAG << TAG_SHIFT;
 static constexpr u64 SHIFTED_INT32_TAG = INT32_TAG << TAG_SHIFT;
-static constexpr u64 SHIFTED_IS_CELL_PATTERN = IS_CELL_PATTERN << TAG_SHIFT;

 // Summary:
 // To pack all the different value in to doubles we use the following schema:
@ -125,7 +84,7 @@ static constexpr u64 SHIFTED_IS_CELL_PATTERN = IS_CELL_PATTERN << TAG_SHIFT;
 // options from 8 tags to 15 but since we currently only use 5 for both sign bits
 // this is not needed.

-class Value {
+class Value : public NanBoxedValue {
 public:
    enum class PreferredType {
        Default,
@ -146,18 +105,12 @@ public:
    bool is_accessor() const { return m_value.tag == ACCESSOR_TAG; }
    bool is_bigint() const { return m_value.tag == BIGINT_TAG; }
    bool is_nullish() const { return (m_value.tag & IS_NULLISH_EXTRACT_PATTERN) == IS_NULLISH_PATTERN; }
-    bool is_cell() const { return (m_value.tag & IS_CELL_PATTERN) == IS_CELL_PATTERN; }
    ThrowCompletionOr<bool> is_array(VM&) const;
    bool is_function() const;
    bool is_constructor() const;
    bool is_error() const;
    ThrowCompletionOr<bool> is_regexp(VM&) const;

-    bool is_nan() const
-    {
-        return m_value.encoded == CANON_NAN_BITS;
-    }
-
    bool is_infinity() const
    {
        static_assert(NEGATIVE_INFINITY_BITS == (0x1ULL << 63 | POSITIVE_INFINITY_BITS));
@ -353,18 +306,6 @@ public:
        return *extract_pointer<Symbol>();
    }

-    Cell& as_cell()
-    {
-        VERIFY(is_cell());
-        return *extract_pointer<Cell>();
-    }
-
-    Cell& as_cell() const
-    {
-        VERIFY(is_cell());
-        return *extract_pointer<Cell>();
-    }
-
    Accessor& as_accessor()
    {
        VERIFY(is_accessor());
@ -434,27 +375,6 @@ public:
    template<typename... Args>
    [[nodiscard]] ALWAYS_INLINE ThrowCompletionOr<Value> invoke(VM&, PropertyKey const& property_key, Args... args);

-    static constexpr FlatPtr extract_pointer_bits(u64 encoded)
-    {
-#ifdef AK_ARCH_32_BIT
-        // For 32-bit system the pointer fully fits so we can just return it directly.
-        static_assert(sizeof(void*) == sizeof(u32));
-        return static_cast<FlatPtr>(encoded & 0xffff'ffff);
-#elif ARCH(X86_64) || ARCH(RISCV64)
-        // For x86_64 and riscv64 the top 16 bits should be sign extending the "real" top bit (47th).
-        // So first shift the top 16 bits away then using the right shift it sign extends the top 16 bits.
-        return static_cast<FlatPtr>((static_cast<i64>(encoded << 16)) >> 16);
-#elif ARCH(AARCH64) || ARCH(PPC64) || ARCH(PPC64LE)
-        // For AArch64 the top 16 bits of the pointer should be zero.
-        // For PPC64: all 64 bits can be used for pointers, however on Linux only
-        //            the lower 43 bits are used for user-space addresses, so
-        //            masking off the top 16 bits should match the rest of LibJS.
-        return static_cast<FlatPtr>(encoded & 0xffff'ffff'ffffULL);
-#else
-#    error "Unknown architecture. Don't know whether pointers need to be sign-extended."
-#endif
-    }
-
    // A double is any Value which does not have the full exponent and top mantissa bit set or has
    // exactly only those bits set.
    bool is_double() const { return (m_value.encoded & CANON_NAN_BITS) != CANON_NAN_BITS || (m_value.encoded == CANON_NAN_BITS); }
@ -511,31 +431,15 @@ private:
            //       This means that all bits above the 47th should be the same as
            //       the 47th. When storing a pointer we thus drop the top 16 bits as
            //       we can recover it when extracting the pointer again.
-            //       See also: Value::extract_pointer.
+            //       See also: NanBoxedValue::extract_pointer.
            m_value.encoded = tag | (reinterpret_cast<u64>(ptr) & 0x0000ffffffffffffULL);
        }
    }

-    template<typename PointerType>
-    PointerType* extract_pointer() const
-    {
-        VERIFY(is_cell());
-        return reinterpret_cast<PointerType*>(extract_pointer_bits(m_value.encoded));
-    }
-
    [[nodiscard]] ThrowCompletionOr<Value> invoke_internal(VM&, PropertyKey const&, Optional<MarkedVector<Value>> arguments);

    ThrowCompletionOr<i32> to_i32_slow_case(VM&) const;

-    union {
-        double as_double;
-        struct {
-            u64 payload : 48;
-            u64 tag : 16;
-        };
-        u64 encoded;
-    } m_value { .encoded = 0 };
-
    friend Value js_undefined();
    friend Value js_null();
    friend ThrowCompletionOr<Value> greater_than(VM&, Value lhs, Value rhs);