Kaynağa Gözat

LibJS/Bytecode: Don't reparse regular expressions on instantiation

The RegExpLiteral AST node already has the parsed regex::Parser::Result
so let's plumb that over to the bytecode executable instead of reparsing
the regex every time NewRegExp is executed.

~12% speed-up on language/literals/regexp/S7.8.5_A2.1_T2.js in test262.
Andreas Kling 2 yıl önce
ebeveyn
işleme
c0f985ffcf

+ 6 - 1
Userland/Libraries/LibJS/Bytecode/ASTCodegen.cpp

@@ -289,7 +289,12 @@ Bytecode::CodeGenerationErrorOr<void> RegExpLiteral::generate_bytecode(Bytecode:
 {
     auto source_index = generator.intern_string(m_pattern);
     auto flags_index = generator.intern_string(m_flags);
-    generator.emit<Bytecode::Op::NewRegExp>(source_index, flags_index);
+    auto regex_index = generator.intern_regex(Bytecode::ParsedRegex {
+        .regex = m_parsed_regex,
+        .pattern = m_parsed_pattern,
+        .flags = m_parsed_flags,
+    });
+    generator.emit<Bytecode::Op::NewRegExp>(source_index, flags_index, regex_index);
     return {};
 }
 

+ 2 - 0
Userland/Libraries/LibJS/Bytecode/Executable.h

@@ -11,6 +11,7 @@
 #include <AK/WeakPtr.h>
 #include <LibJS/Bytecode/BasicBlock.h>
 #include <LibJS/Bytecode/IdentifierTable.h>
+#include <LibJS/Bytecode/RegexTable.h>
 #include <LibJS/Bytecode/StringTable.h>
 
 namespace JS::Bytecode {
@@ -32,6 +33,7 @@ struct Executable {
     Vector<NonnullOwnPtr<BasicBlock>> basic_blocks;
     NonnullOwnPtr<StringTable> string_table;
     NonnullOwnPtr<IdentifierTable> identifier_table;
+    NonnullOwnPtr<RegexTable> regex_table;
     size_t number_of_registers { 0 };
     bool is_strict_mode { false };
 

+ 2 - 0
Userland/Libraries/LibJS/Bytecode/Generator.cpp

@@ -16,6 +16,7 @@ namespace JS::Bytecode {
 Generator::Generator()
     : m_string_table(make<StringTable>())
     , m_identifier_table(make<IdentifierTable>())
+    , m_regex_table(make<RegexTable>())
 {
 }
 
@@ -67,6 +68,7 @@ CodeGenerationErrorOr<NonnullOwnPtr<Executable>> Generator::generate(ASTNode con
         .basic_blocks = move(generator.m_root_basic_blocks),
         .string_table = move(generator.m_string_table),
         .identifier_table = move(generator.m_identifier_table),
+        .regex_table = move(generator.m_regex_table),
         .number_of_registers = generator.m_next_register,
         .is_strict_mode = is_strict_mode,
     });

+ 7 - 0
Userland/Libraries/LibJS/Bytecode/Generator.h

@@ -18,6 +18,7 @@
 #include <LibJS/Bytecode/StringTable.h>
 #include <LibJS/Forward.h>
 #include <LibJS/Runtime/FunctionKind.h>
+#include <LibRegex/Regex.h>
 
 namespace JS::Bytecode {
 
@@ -131,6 +132,11 @@ public:
         return m_string_table->insert(move(string));
     }
 
+    RegexTableIndex intern_regex(ParsedRegex regex)
+    {
+        return m_regex_table->insert(move(regex));
+    }
+
     IdentifierTableIndex intern_identifier(DeprecatedFlyString string)
     {
         return m_identifier_table->insert(move(string));
@@ -220,6 +226,7 @@ private:
     Vector<NonnullOwnPtr<BasicBlock>> m_root_basic_blocks;
     NonnullOwnPtr<StringTable> m_string_table;
     NonnullOwnPtr<IdentifierTable> m_identifier_table;
+    NonnullOwnPtr<RegexTable> m_regex_table;
 
     u32 m_next_register { 2 };
     u32 m_next_block { 1 };

+ 18 - 2
Userland/Libraries/LibJS/Bytecode/Op.cpp

@@ -328,14 +328,30 @@ ThrowCompletionOr<void> NewObject::execute_impl(Bytecode::Interpreter& interpret
     return {};
 }
 
+// 13.2.7.3 Runtime Semantics: Evaluation, https://tc39.es/ecma262/#sec-regular-expression-literals-runtime-semantics-evaluation
 ThrowCompletionOr<void> NewRegExp::execute_impl(Bytecode::Interpreter& interpreter) const
 {
     auto& vm = interpreter.vm();
 
-    auto source = interpreter.current_executable().get_string(m_source_index);
+    auto& realm = *vm.current_realm();
+
+    // 1. Let pattern be CodePointsToString(BodyText of RegularExpressionLiteral).
+    auto pattern = interpreter.current_executable().get_string(m_source_index);
+
+    // 2. Let flags be CodePointsToString(FlagText of RegularExpressionLiteral).
     auto flags = interpreter.current_executable().get_string(m_flags_index);
 
-    interpreter.accumulator() = TRY(regexp_create(vm, PrimitiveString::create(vm, source), PrimitiveString::create(vm, flags)));
+    // 3. Return ! RegExpCreate(pattern, flags).
+    auto& parsed_regex = interpreter.current_executable().regex_table->get(m_regex_index);
+    Regex<ECMA262> regex(parsed_regex.regex, parsed_regex.pattern, parsed_regex.flags);
+    // NOTE: We bypass RegExpCreate and subsequently RegExpAlloc as an optimization to use the already parsed values.
+    auto regexp_object = RegExpObject::create(realm, move(regex), move(pattern), move(flags));
+    // RegExpAlloc has these two steps from the 'Legacy RegExp features' proposal.
+    regexp_object->set_realm(*vm.current_realm());
+    // We don't need to check 'If SameValue(newTarget, thisRealm.[[Intrinsics]].[[%RegExp%]]) is true'
+    // here as we know RegExpCreate calls RegExpAlloc with %RegExp% for newTarget.
+    regexp_object->set_legacy_features_enabled(true);
+    interpreter.accumulator() = regexp_object;
     return {};
 }
 

+ 4 - 1
Userland/Libraries/LibJS/Bytecode/Op.h

@@ -13,6 +13,7 @@
 #include <LibJS/Bytecode/IdentifierTable.h>
 #include <LibJS/Bytecode/Instruction.h>
 #include <LibJS/Bytecode/Label.h>
+#include <LibJS/Bytecode/RegexTable.h>
 #include <LibJS/Bytecode/Register.h>
 #include <LibJS/Bytecode/StringTable.h>
 #include <LibJS/Heap/Cell.h>
@@ -196,10 +197,11 @@ public:
 
 class NewRegExp final : public Instruction {
 public:
-    NewRegExp(StringTableIndex source_index, StringTableIndex flags_index)
+    NewRegExp(StringTableIndex source_index, StringTableIndex flags_index, RegexTableIndex regex_index)
         : Instruction(Type::NewRegExp)
         , m_source_index(source_index)
         , m_flags_index(flags_index)
+        , m_regex_index(regex_index)
     {
     }
 
@@ -211,6 +213,7 @@ public:
 private:
     StringTableIndex m_source_index;
     StringTableIndex m_flags_index;
+    RegexTableIndex m_regex_index;
 };
 
 #define JS_ENUMERATE_NEW_BUILTIN_ERROR_OPS(O) \

+ 29 - 0
Userland/Libraries/LibJS/Bytecode/RegexTable.cpp

@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2023, Andreas Kling <kling@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibJS/Bytecode/RegexTable.h>
+
+namespace JS::Bytecode {
+
+RegexTableIndex RegexTable::insert(ParsedRegex regex)
+{
+    m_regexes.append(move(regex));
+    return m_regexes.size() - 1;
+}
+
+ParsedRegex const& RegexTable::get(RegexTableIndex index) const
+{
+    return m_regexes[index.value()];
+}
+
+void RegexTable::dump() const
+{
+    outln("Regex Table:");
+    for (size_t i = 0; i < m_regexes.size(); i++)
+        outln("{}: {}", i, m_regexes[i].pattern);
+}
+
+}

+ 40 - 0
Userland/Libraries/LibJS/Bytecode/RegexTable.h

@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023, Andreas Kling <kling@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/DeprecatedString.h>
+#include <AK/DistinctNumeric.h>
+#include <AK/Vector.h>
+#include <LibRegex/RegexParser.h>
+
+namespace JS::Bytecode {
+
+AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(size_t, RegexTableIndex, Comparison);
+
+struct ParsedRegex {
+    regex::Parser::Result regex;
+    DeprecatedString pattern;
+    regex::RegexOptions<ECMAScriptFlags> flags;
+};
+
+class RegexTable {
+    AK_MAKE_NONMOVABLE(RegexTable);
+    AK_MAKE_NONCOPYABLE(RegexTable);
+
+public:
+    RegexTable() = default;
+
+    RegexTableIndex insert(ParsedRegex);
+    ParsedRegex const& get(RegexTableIndex) const;
+    void dump() const;
+    bool is_empty() const { return m_regexes.is_empty(); }
+
+private:
+    Vector<ParsedRegex> m_regexes;
+};
+
+}

+ 1 - 0
Userland/Libraries/LibJS/CMakeLists.txt

@@ -15,6 +15,7 @@ set(SOURCES
     Bytecode/Pass/MergeBlocks.cpp
     Bytecode/Pass/PlaceBlocks.cpp
     Bytecode/Pass/UnifySameBlocks.cpp
+    Bytecode/RegexTable.cpp
     Bytecode/StringTable.cpp
     Console.cpp
     Contrib/Test262/$262Object.cpp