LibJS/Bytecode: Don't reparse regular expressions on instantiation

The RegExpLiteral AST node already has the parsed regex::Parser::Result
so let's plumb that over to the bytecode executable instead of reparsing
the regex every time NewRegExp is executed.

~12% speed-up on language/literals/regexp/S7.8.5_A2.1_T2.js in test262.
This commit is contained in:
Andreas Kling 2023-07-13 10:49:07 +02:00
parent a098f38cab
commit c0f985ffcf
Notes: sideshowbarker 2024-07-16 22:11:09 +09:00
9 changed files with 109 additions and 4 deletions

View file

@ -289,7 +289,12 @@ Bytecode::CodeGenerationErrorOr<void> RegExpLiteral::generate_bytecode(Bytecode:
{
auto source_index = generator.intern_string(m_pattern);
auto flags_index = generator.intern_string(m_flags);
generator.emit<Bytecode::Op::NewRegExp>(source_index, flags_index);
auto regex_index = generator.intern_regex(Bytecode::ParsedRegex {
.regex = m_parsed_regex,
.pattern = m_parsed_pattern,
.flags = m_parsed_flags,
});
generator.emit<Bytecode::Op::NewRegExp>(source_index, flags_index, regex_index);
return {};
}

View file

@ -11,6 +11,7 @@
#include <AK/WeakPtr.h>
#include <LibJS/Bytecode/BasicBlock.h>
#include <LibJS/Bytecode/IdentifierTable.h>
#include <LibJS/Bytecode/RegexTable.h>
#include <LibJS/Bytecode/StringTable.h>
namespace JS::Bytecode {
@ -32,6 +33,7 @@ struct Executable {
Vector<NonnullOwnPtr<BasicBlock>> basic_blocks;
NonnullOwnPtr<StringTable> string_table;
NonnullOwnPtr<IdentifierTable> identifier_table;
NonnullOwnPtr<RegexTable> regex_table;
size_t number_of_registers { 0 };
bool is_strict_mode { false };

View file

@ -16,6 +16,7 @@ namespace JS::Bytecode {
Generator::Generator()
: m_string_table(make<StringTable>())
, m_identifier_table(make<IdentifierTable>())
, m_regex_table(make<RegexTable>())
{
}
@ -67,6 +68,7 @@ CodeGenerationErrorOr<NonnullOwnPtr<Executable>> Generator::generate(ASTNode con
.basic_blocks = move(generator.m_root_basic_blocks),
.string_table = move(generator.m_string_table),
.identifier_table = move(generator.m_identifier_table),
.regex_table = move(generator.m_regex_table),
.number_of_registers = generator.m_next_register,
.is_strict_mode = is_strict_mode,
});

View file

@ -18,6 +18,7 @@
#include <LibJS/Bytecode/StringTable.h>
#include <LibJS/Forward.h>
#include <LibJS/Runtime/FunctionKind.h>
#include <LibRegex/Regex.h>
namespace JS::Bytecode {
@ -131,6 +132,11 @@ public:
return m_string_table->insert(move(string));
}
RegexTableIndex intern_regex(ParsedRegex regex)
{
return m_regex_table->insert(move(regex));
}
IdentifierTableIndex intern_identifier(DeprecatedFlyString string)
{
return m_identifier_table->insert(move(string));
@ -220,6 +226,7 @@ private:
Vector<NonnullOwnPtr<BasicBlock>> m_root_basic_blocks;
NonnullOwnPtr<StringTable> m_string_table;
NonnullOwnPtr<IdentifierTable> m_identifier_table;
NonnullOwnPtr<RegexTable> m_regex_table;
u32 m_next_register { 2 };
u32 m_next_block { 1 };

View file

@ -328,14 +328,30 @@ ThrowCompletionOr<void> NewObject::execute_impl(Bytecode::Interpreter& interpret
return {};
}
// 13.2.7.3 Runtime Semantics: Evaluation, https://tc39.es/ecma262/#sec-regular-expression-literals-runtime-semantics-evaluation
ThrowCompletionOr<void> NewRegExp::execute_impl(Bytecode::Interpreter& interpreter) const
{
auto& vm = interpreter.vm();
auto source = interpreter.current_executable().get_string(m_source_index);
auto& realm = *vm.current_realm();
// 1. Let pattern be CodePointsToString(BodyText of RegularExpressionLiteral).
auto pattern = interpreter.current_executable().get_string(m_source_index);
// 2. Let flags be CodePointsToString(FlagText of RegularExpressionLiteral).
auto flags = interpreter.current_executable().get_string(m_flags_index);
interpreter.accumulator() = TRY(regexp_create(vm, PrimitiveString::create(vm, source), PrimitiveString::create(vm, flags)));
// 3. Return ! RegExpCreate(pattern, flags).
auto& parsed_regex = interpreter.current_executable().regex_table->get(m_regex_index);
Regex<ECMA262> regex(parsed_regex.regex, parsed_regex.pattern, parsed_regex.flags);
// NOTE: We bypass RegExpCreate and subsequently RegExpAlloc as an optimization to use the already parsed values.
auto regexp_object = RegExpObject::create(realm, move(regex), move(pattern), move(flags));
// RegExpAlloc has these two steps from the 'Legacy RegExp features' proposal.
regexp_object->set_realm(*vm.current_realm());
// We don't need to check 'If SameValue(newTarget, thisRealm.[[Intrinsics]].[[%RegExp%]]) is true'
// here as we know RegExpCreate calls RegExpAlloc with %RegExp% for newTarget.
regexp_object->set_legacy_features_enabled(true);
interpreter.accumulator() = regexp_object;
return {};
}

View file

@ -13,6 +13,7 @@
#include <LibJS/Bytecode/IdentifierTable.h>
#include <LibJS/Bytecode/Instruction.h>
#include <LibJS/Bytecode/Label.h>
#include <LibJS/Bytecode/RegexTable.h>
#include <LibJS/Bytecode/Register.h>
#include <LibJS/Bytecode/StringTable.h>
#include <LibJS/Heap/Cell.h>
@ -196,10 +197,11 @@ public:
class NewRegExp final : public Instruction {
public:
NewRegExp(StringTableIndex source_index, StringTableIndex flags_index)
NewRegExp(StringTableIndex source_index, StringTableIndex flags_index, RegexTableIndex regex_index)
: Instruction(Type::NewRegExp)
, m_source_index(source_index)
, m_flags_index(flags_index)
, m_regex_index(regex_index)
{
}
@ -211,6 +213,7 @@ public:
private:
StringTableIndex m_source_index;
StringTableIndex m_flags_index;
RegexTableIndex m_regex_index;
};
#define JS_ENUMERATE_NEW_BUILTIN_ERROR_OPS(O) \

View file

@ -0,0 +1,29 @@
/*
* Copyright (c) 2023, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibJS/Bytecode/RegexTable.h>
namespace JS::Bytecode {
RegexTableIndex RegexTable::insert(ParsedRegex regex)
{
m_regexes.append(move(regex));
return m_regexes.size() - 1;
}
ParsedRegex const& RegexTable::get(RegexTableIndex index) const
{
return m_regexes[index.value()];
}
void RegexTable::dump() const
{
outln("Regex Table:");
for (size_t i = 0; i < m_regexes.size(); i++)
outln("{}: {}", i, m_regexes[i].pattern);
}
}

View file

@ -0,0 +1,40 @@
/*
* Copyright (c) 2023, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/DeprecatedString.h>
#include <AK/DistinctNumeric.h>
#include <AK/Vector.h>
#include <LibRegex/RegexParser.h>
namespace JS::Bytecode {
AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(size_t, RegexTableIndex, Comparison);
struct ParsedRegex {
regex::Parser::Result regex;
DeprecatedString pattern;
regex::RegexOptions<ECMAScriptFlags> flags;
};
class RegexTable {
AK_MAKE_NONMOVABLE(RegexTable);
AK_MAKE_NONCOPYABLE(RegexTable);
public:
RegexTable() = default;
RegexTableIndex insert(ParsedRegex);
ParsedRegex const& get(RegexTableIndex) const;
void dump() const;
bool is_empty() const { return m_regexes.is_empty(); }
private:
Vector<ParsedRegex> m_regexes;
};
}

View file

@ -15,6 +15,7 @@ set(SOURCES
Bytecode/Pass/MergeBlocks.cpp
Bytecode/Pass/PlaceBlocks.cpp
Bytecode/Pass/UnifySameBlocks.cpp
Bytecode/RegexTable.cpp
Bytecode/StringTable.cpp
Console.cpp
Contrib/Test262/$262Object.cpp