LibJS: Store strings in a string table

Instead of using Strings in the bytecode ops this adds a global string
table to the Executable struct which individual operations can refer
to using indices. This brings bytecode ops one step closer to being
pointer free.
This commit is contained in:
Gunnar Beutner 2021-06-09 10:02:01 +02:00 committed by Andreas Kling
parent 4efccbd030
commit 6a0d1fa259
Notes: sideshowbarker 2024-07-18 12:33:32 +09:00
16 changed files with 173 additions and 82 deletions

View file

@ -208,12 +208,12 @@ void BigIntLiteral::generate_bytecode(Bytecode::Generator& generator) const
void StringLiteral::generate_bytecode(Bytecode::Generator& generator) const
{
generator.emit<Bytecode::Op::NewString>(m_value);
generator.emit<Bytecode::Op::NewString>(generator.intern_string(m_value));
}
void Identifier::generate_bytecode(Bytecode::Generator& generator) const
{
generator.emit<Bytecode::Op::GetVariable>(m_string);
generator.emit<Bytecode::Op::GetVariable>(generator.intern_string(m_string));
}
void AssignmentExpression::generate_bytecode(Bytecode::Generator& generator) const
@ -223,7 +223,7 @@ void AssignmentExpression::generate_bytecode(Bytecode::Generator& generator) con
if (m_op == AssignmentOp::Assignment) {
m_rhs->generate_bytecode(generator);
generator.emit<Bytecode::Op::SetVariable>(identifier.string());
generator.emit<Bytecode::Op::SetVariable>(generator.intern_string(identifier.string()));
return;
}
@ -273,7 +273,7 @@ void AssignmentExpression::generate_bytecode(Bytecode::Generator& generator) con
TODO();
}
generator.emit<Bytecode::Op::SetVariable>(identifier.string());
generator.emit<Bytecode::Op::SetVariable>(generator.intern_string(identifier.string()));
return;
}
@ -289,7 +289,8 @@ void AssignmentExpression::generate_bytecode(Bytecode::Generator& generator) con
} else {
VERIFY(is<Identifier>(expression.property()));
m_rhs->generate_bytecode(generator);
generator.emit<Bytecode::Op::PutById>(object_reg, static_cast<Identifier const&>(expression.property()).string());
auto identifier_table_ref = generator.intern_string(static_cast<Identifier const&>(expression.property()).string());
generator.emit<Bytecode::Op::PutById>(object_reg, identifier_table_ref);
return;
}
}
@ -487,7 +488,8 @@ void MemberExpression::generate_bytecode(Bytecode::Generator& generator) const
TODO();
} else {
VERIFY(is<Identifier>(property()));
generator.emit<Bytecode::Op::GetById>(static_cast<Identifier const&>(property()).string());
auto identifier_table_ref = generator.intern_string(static_cast<Identifier const&>(property()).string());
generator.emit<Bytecode::Op::GetById>(identifier_table_ref);
}
}
@ -643,7 +645,7 @@ void UpdateExpression::generate_bytecode(Bytecode::Generator& generator) const
{
if (is<Identifier>(*m_argument)) {
auto& identifier = static_cast<Identifier const&>(*m_argument);
generator.emit<Bytecode::Op::GetVariable>(identifier.string());
generator.emit<Bytecode::Op::GetVariable>(generator.intern_string(identifier.string()));
Optional<Bytecode::Register> previous_value_for_postfix_reg;
if (!m_prefixed) {
@ -656,7 +658,7 @@ void UpdateExpression::generate_bytecode(Bytecode::Generator& generator) const
else
generator.emit<Bytecode::Op::Decrement>();
generator.emit<Bytecode::Op::SetVariable>(identifier.string());
generator.emit<Bytecode::Op::SetVariable>(generator.intern_string(identifier.string()));
if (!m_prefixed)
generator.emit<Bytecode::Op::Load>(*previous_value_for_postfix_reg);

View file

@ -47,13 +47,13 @@ void BasicBlock::seal()
// It also doesn't work because instructions that have String members use RefPtr internally which must be in writable memory.
}
void BasicBlock::dump() const
void BasicBlock::dump(Bytecode::Executable const& executable) const
{
Bytecode::InstructionStreamIterator it(instruction_stream());
if (!m_name.is_empty())
warnln("{}:", m_name);
while (!it.at_end()) {
warnln("[{:4x}] {}", it.offset(), (*it).to_string());
warnln("[{:4x}] {}", it.offset(), (*it).to_string(executable));
++it;
}
}

View file

@ -45,7 +45,7 @@ public:
void seal();
void dump() const;
void dump(Executable const&) const;
ReadonlyBytes instruction_stream() const { return ReadonlyBytes { m_buffer, m_buffer_size }; }
void* next_slot() { return m_buffer + m_buffer_size; }

View file

@ -14,6 +14,7 @@
namespace JS::Bytecode {
Generator::Generator()
: m_string_table(make<StringTable>())
{
}
@ -26,7 +27,7 @@ Executable Generator::generate(ASTNode const& node)
Generator generator;
generator.switch_to_basic_block(generator.make_block());
node.generate_bytecode(generator);
return { move(generator.m_root_basic_blocks), generator.m_next_register };
return { move(generator.m_root_basic_blocks), move(generator.m_string_table), generator.m_next_register };
}
void Generator::grow(size_t additional_size)

View file

@ -12,13 +12,17 @@
#include <LibJS/Bytecode/BasicBlock.h>
#include <LibJS/Bytecode/Label.h>
#include <LibJS/Bytecode/Register.h>
#include <LibJS/Bytecode/StringTable.h>
#include <LibJS/Forward.h>
namespace JS::Bytecode {
struct Executable {
NonnullOwnPtrVector<BasicBlock> basic_blocks;
NonnullOwnPtr<StringTable> string_table;
size_t number_of_registers { 0 };
String const& get_string(StringTableIndex index) const { return string_table->get(index); }
};
class Generator {
@ -74,6 +78,11 @@ public:
return m_current_basic_block->is_terminated();
}
StringTableIndex intern_string(StringView const& string)
{
return m_string_table->insert(string);
}
private:
Generator();
~Generator();
@ -83,6 +92,7 @@ private:
BasicBlock* m_current_basic_block { nullptr };
NonnullOwnPtrVector<BasicBlock> m_root_basic_blocks;
NonnullOwnPtr<StringTable> m_string_table;
u32 m_next_register { 1 };
u32 m_next_block { 1 };

View file

@ -73,7 +73,7 @@ public:
Type type() const { return m_type; }
size_t length() const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
void execute(Bytecode::Interpreter&) const;
static void destroy(Instruction&);

View file

@ -5,6 +5,7 @@
*/
#include <AK/Debug.h>
#include <AK/TemporaryChange.h>
#include <LibJS/Bytecode/BasicBlock.h>
#include <LibJS/Bytecode/Instruction.h>
#include <LibJS/Bytecode/Interpreter.h>
@ -38,6 +39,8 @@ Value Interpreter::run(Executable const& executable)
{
dbgln_if(JS_BYTECODE_DEBUG, "Bytecode::Interpreter will run unit {:p}", &executable);
TemporaryChange restore_executable { m_current_executable, &executable };
CallFrame global_call_frame;
if (vm().call_stack().is_empty()) {
global_call_frame.this_value = &global_object();

View file

@ -39,6 +39,8 @@ public:
}
void do_return(Value return_value) { m_return_value = return_value; }
Executable const& current_executable() { return *m_current_executable; }
private:
RegisterWindow& registers() { return m_register_windows.last(); }
@ -47,6 +49,7 @@ private:
NonnullOwnPtrVector<RegisterWindow> m_register_windows;
Optional<BasicBlock const*> m_pending_jump;
Value m_return_value;
Executable const* m_current_executable { nullptr };
};
}

View file

@ -16,11 +16,11 @@
namespace JS::Bytecode {
String Instruction::to_string() const
String Instruction::to_string(Bytecode::Executable const& executable) const
{
#define __BYTECODE_OP(op) \
case Instruction::Type::op: \
return static_cast<Bytecode::Op::op const&>(*this).to_string();
return static_cast<Bytecode::Op::op const&>(*this).to_string(executable);
switch (type()) {
ENUMERATE_BYTECODE_OPS(__BYTECODE_OP)
@ -77,7 +77,7 @@ static Value typed_equals(GlobalObject&, Value src1, Value src2)
auto rhs = interpreter.accumulator(); \
interpreter.accumulator() = op_snake_case(interpreter.global_object(), lhs, rhs); \
} \
String OpTitleCase::to_string() const \
String OpTitleCase::to_string(Bytecode::Executable const&) const \
{ \
return String::formatted(#OpTitleCase " {}", m_lhs_reg); \
}
@ -99,7 +99,7 @@ static Value typeof_(GlobalObject& global_object, Value value)
{ \
interpreter.accumulator() = op_snake_case(interpreter.global_object(), interpreter.accumulator()); \
} \
String OpTitleCase::to_string() const \
String OpTitleCase::to_string(Bytecode::Executable const&) const \
{ \
return #OpTitleCase; \
}
@ -122,7 +122,7 @@ void NewArray::execute(Bytecode::Interpreter& interpreter) const
void NewString::execute(Bytecode::Interpreter& interpreter) const
{
interpreter.accumulator() = js_string(interpreter.vm(), m_string);
interpreter.accumulator() = js_string(interpreter.vm(), interpreter.current_executable().get_string(m_string));
}
void NewObject::execute(Bytecode::Interpreter& interpreter) const
@ -137,24 +137,24 @@ void ConcatString::execute(Bytecode::Interpreter& interpreter) const
void GetVariable::execute(Bytecode::Interpreter& interpreter) const
{
interpreter.accumulator() = interpreter.vm().get_variable(m_identifier, interpreter.global_object());
interpreter.accumulator() = interpreter.vm().get_variable(interpreter.current_executable().get_string(m_identifier), interpreter.global_object());
}
void SetVariable::execute(Bytecode::Interpreter& interpreter) const
{
interpreter.vm().set_variable(m_identifier, interpreter.accumulator(), interpreter.global_object());
interpreter.vm().set_variable(interpreter.current_executable().get_string(m_identifier), interpreter.accumulator(), interpreter.global_object());
}
void GetById::execute(Bytecode::Interpreter& interpreter) const
{
if (auto* object = interpreter.accumulator().to_object(interpreter.global_object()))
interpreter.accumulator() = object->get(m_property);
interpreter.accumulator() = object->get(interpreter.current_executable().get_string(m_property));
}
void PutById::execute(Bytecode::Interpreter& interpreter) const
{
if (auto* object = interpreter.reg(m_base).to_object(interpreter.global_object()))
object->put(m_property, interpreter.accumulator());
object->put(interpreter.current_executable().get_string(m_property), interpreter.accumulator());
}
void Jump::execute(Bytecode::Interpreter& interpreter) const
@ -255,27 +255,27 @@ void Decrement::execute(Bytecode::Interpreter& interpreter) const
interpreter.accumulator() = js_bigint(interpreter.vm().heap(), old_value.as_bigint().big_integer().minus(Crypto::SignedBigInteger { 1 }));
}
String Load::to_string() const
String Load::to_string(Bytecode::Executable const&) const
{
return String::formatted("Load {}", m_src);
}
String LoadImmediate::to_string() const
String LoadImmediate::to_string(Bytecode::Executable const&) const
{
return String::formatted("LoadImmediate {}", m_value);
}
String Store::to_string() const
String Store::to_string(Bytecode::Executable const&) const
{
return String::formatted("Store {}", m_dst);
}
String NewBigInt::to_string() const
String NewBigInt::to_string(Bytecode::Executable const&) const
{
return String::formatted("NewBigInt \"{}\"", m_bigint.to_base10());
}
String NewArray::to_string() const
String NewArray::to_string(Bytecode::Executable const&) const
{
StringBuilder builder;
builder.append("NewArray");
@ -291,63 +291,63 @@ String NewArray::to_string() const
return builder.to_string();
}
String NewString::to_string() const
String NewString::to_string(Bytecode::Executable const& executable) const
{
return String::formatted("NewString \"{}\"", m_string);
return String::formatted("NewString {} (\"{}\")", m_string, executable.string_table->get(m_string));
}
String NewObject::to_string() const
String NewObject::to_string(Bytecode::Executable const&) const
{
return "NewObject";
}
String ConcatString::to_string() const
String ConcatString::to_string(Bytecode::Executable const&) const
{
return String::formatted("ConcatString {}", m_lhs);
}
String GetVariable::to_string() const
String GetVariable::to_string(Bytecode::Executable const& executable) const
{
return String::formatted("GetVariable {}", m_identifier);
return String::formatted("GetVariable {} ({})", m_identifier, executable.string_table->get(m_identifier));
}
String SetVariable::to_string() const
String SetVariable::to_string(Bytecode::Executable const& executable) const
{
return String::formatted("SetVariable {}", m_identifier);
return String::formatted("SetVariable {} ({})", m_identifier, executable.string_table->get(m_identifier));
}
String PutById::to_string() const
String PutById::to_string(Bytecode::Executable const& executable) const
{
return String::formatted("PutById base:{}, property:{}", m_base, m_property);
return String::formatted("PutById base:{}, property:{} ({})", m_base, m_property, executable.string_table->get(m_property));
}
String GetById::to_string() const
String GetById::to_string(Bytecode::Executable const& executable) const
{
return String::formatted("GetById {}", m_property);
return String::formatted("GetById {} ({})", m_property, executable.string_table->get(m_property));
}
String Jump::to_string() const
String Jump::to_string(Bytecode::Executable const&) const
{
if (m_true_target.has_value())
return String::formatted("Jump {}", *m_true_target);
return String::formatted("Jump <empty>");
}
String JumpConditional::to_string() const
String JumpConditional::to_string(Bytecode::Executable const&) const
{
auto true_string = m_true_target.has_value() ? String::formatted("{}", *m_true_target) : "<empty>";
auto false_string = m_false_target.has_value() ? String::formatted("{}", *m_false_target) : "<empty>";
return String::formatted("JumpConditional true:{} false:{}", true_string, false_string);
}
String JumpNullish::to_string() const
String JumpNullish::to_string(Bytecode::Executable const&) const
{
auto true_string = m_true_target.has_value() ? String::formatted("{}", *m_true_target) : "<empty>";
auto false_string = m_false_target.has_value() ? String::formatted("{}", *m_false_target) : "<empty>";
return String::formatted("JumpNullish null:{} nonnull:{}", true_string, false_string);
}
String Call::to_string() const
String Call::to_string(Bytecode::Executable const&) const
{
StringBuilder builder;
builder.appendff("Call callee:{}, this:{}", m_callee, m_this_value);
@ -363,22 +363,22 @@ String Call::to_string() const
return builder.to_string();
}
String EnterScope::to_string() const
String EnterScope::to_string(Bytecode::Executable const&) const
{
return "EnterScope";
}
String Return::to_string() const
String Return::to_string(Bytecode::Executable const&) const
{
return "Return";
}
String Increment::to_string() const
String Increment::to_string(Bytecode::Executable const&) const
{
return "Increment";
}
String Decrement::to_string() const
String Decrement::to_string(Bytecode::Executable const&) const
{
return "Decrement";
}

View file

@ -7,11 +7,11 @@
#pragma once
#include <AK/FlyString.h>
#include <LibCrypto/BigInt/SignedBigInteger.h>
#include <LibJS/Bytecode/Instruction.h>
#include <LibJS/Bytecode/Label.h>
#include <LibJS/Bytecode/Register.h>
#include <LibJS/Bytecode/StringTable.h>
#include <LibJS/Heap/Cell.h>
#include <LibJS/Runtime/Value.h>
@ -26,7 +26,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
Register m_src;
@ -41,7 +41,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
Value m_value;
@ -56,7 +56,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
Register m_dst;
@ -96,7 +96,7 @@ private:
} \
\
void execute(Bytecode::Interpreter&) const; \
String to_string() const; \
String to_string(Bytecode::Executable const&) const; \
\
private: \
Register m_lhs_reg; \
@ -121,7 +121,7 @@ JS_ENUMERATE_COMMON_BINARY_OPS(JS_DECLARE_COMMON_BINARY_OP)
} \
\
void execute(Bytecode::Interpreter&) const; \
String to_string() const; \
String to_string(Bytecode::Executable const&) const; \
};
JS_ENUMERATE_COMMON_UNARY_OPS(JS_DECLARE_COMMON_UNARY_OP)
@ -129,17 +129,17 @@ JS_ENUMERATE_COMMON_UNARY_OPS(JS_DECLARE_COMMON_UNARY_OP)
class NewString final : public Instruction {
public:
NewString(String string)
NewString(StringTableIndex string)
: Instruction(Type::NewString)
, m_string(move(string))
{
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
String m_string;
StringTableIndex m_string;
};
class NewObject final : public Instruction {
@ -150,7 +150,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
};
class NewBigInt final : public Instruction {
@ -162,7 +162,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
Crypto::SignedBigInteger m_bigint;
@ -180,7 +180,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
size_t length() const { return sizeof(*this) + sizeof(Register) * m_element_count; }
@ -198,7 +198,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
Register m_lhs;
@ -206,52 +206,52 @@ private:
class SetVariable final : public Instruction {
public:
SetVariable(FlyString identifier)
SetVariable(StringTableIndex identifier)
: Instruction(Type::SetVariable)
, m_identifier(move(identifier))
{
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
FlyString m_identifier;
StringTableIndex m_identifier;
};
class GetVariable final : public Instruction {
public:
GetVariable(FlyString identifier)
GetVariable(StringTableIndex identifier)
: Instruction(Type::GetVariable)
, m_identifier(move(identifier))
{
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
FlyString m_identifier;
StringTableIndex m_identifier;
};
class GetById final : public Instruction {
public:
GetById(FlyString property)
GetById(StringTableIndex property)
: Instruction(Type::GetById)
, m_property(move(property))
{
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
FlyString m_property;
StringTableIndex m_property;
};
class PutById final : public Instruction {
public:
PutById(Register base, FlyString property)
PutById(Register base, StringTableIndex property)
: Instruction(Type::PutById)
, m_base(base)
, m_property(move(property))
@ -259,11 +259,11 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
Register m_base;
FlyString m_property;
StringTableIndex m_property;
};
class Jump : public Instruction {
@ -291,7 +291,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
protected:
Optional<Label> m_true_target;
@ -306,7 +306,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
};
class JumpNullish final : public Jump {
@ -317,7 +317,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
};
// NOTE: This instruction is variable-width depending on the number of arguments!
@ -334,7 +334,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
size_t length() const { return sizeof(*this) + sizeof(Register) * m_argument_count; }
@ -354,7 +354,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
private:
ScopeNode const& m_scope_node;
@ -370,7 +370,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
};
class Increment final : public Instruction {
@ -381,7 +381,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
};
class Decrement final : public Instruction {
@ -392,7 +392,7 @@ public:
}
void execute(Bytecode::Interpreter&) const;
String to_string() const;
String to_string(Bytecode::Executable const&) const;
};
}

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2021, Gunnar Beutner <gbeutner@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibJS/Bytecode/StringTable.h>
namespace JS::Bytecode {
StringTableIndex StringTable::insert(StringView string)
{
for (size_t i = 0; i < m_strings.size(); i++) {
if (m_strings[i] == string)
return i;
}
m_strings.append(string);
return m_strings.size() - 1;
}
String const& StringTable::get(StringTableIndex index) const
{
return m_strings[index.value()];
}
void StringTable::dump() const
{
outln("String Table:");
for (size_t i = 0; i < m_strings.size(); i++)
outln("{}: {}", i, m_strings[i]);
}
}

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2021, Gunnar Beutner <gbeutner@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/DistinctNumeric.h>
#include <AK/String.h>
#include <AK/Vector.h>
namespace JS::Bytecode {
TYPEDEF_DISTINCT_NUMERIC_GENERAL(size_t, false, true, false, false, false, false, StringTableIndex);
class StringTable {
AK_MAKE_NONMOVABLE(StringTable);
AK_MAKE_NONCOPYABLE(StringTable);
public:
StringTable() = default;
StringTableIndex insert(StringView string);
String const& get(StringTableIndex) const;
void dump() const;
bool is_empty() const { return m_strings.is_empty(); }
private:
Vector<String> m_strings;
};
}

View file

@ -6,6 +6,7 @@ set(SOURCES
Bytecode/Instruction.cpp
Bytecode/Interpreter.cpp
Bytecode/Op.cpp
Bytecode/StringTable.cpp
Console.cpp
Heap/CellAllocator.cpp
Heap/BlockAllocator.cpp

View file

@ -166,6 +166,7 @@ class Handle;
namespace Bytecode {
class BasicBlock;
struct Executable;
class Generator;
class Instruction;
class Interpreter;

View file

@ -156,7 +156,7 @@ Value ScriptFunction::execute_function_body()
if constexpr (JS_BYTECODE_DEBUG) {
dbgln("Compiled Bytecode::Block for function '{}':", m_name);
for (auto& block : m_bytecode_executable->basic_blocks)
block.dump();
block.dump(*m_bytecode_executable);
}
}
return bytecode_interpreter->run(*m_bytecode_executable);

View file

@ -516,7 +516,11 @@ static bool parse_and_run(JS::Interpreter& interpreter, const StringView& source
auto unit = JS::Bytecode::Generator::generate(*program);
if (s_dump_bytecode) {
for (auto& block : unit.basic_blocks)
block.dump();
block.dump(unit);
if (!unit.string_table->is_empty()) {
outln();
unit.string_table->dump();
}
}
if (s_run_bytecode) {