LibJS: Reduce AST memory usage by shrink-wrapping source range info

Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:

    filename:       StringView (16 bytes)
    start:          Position (24 bytes)
    end:            Position (24 bytes)

The Position structs have { line, column, offset }, all members size_t.

To reduce memory consumption, AST nodes now only store the following:

    source_code:    NonnullRefPtr<SourceCode> (8 bytes)
    start_offset:   u32 (4 bytes)
    end_offset:     u32 (4 bytes)

SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.

The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.

With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
This commit is contained in:
Andreas Kling 2022-11-21 17:37:38 +01:00
parent 3d74d72bcb
commit b0b022507b
Notes: sideshowbarker 2024-07-17 06:45:52 +09:00
16 changed files with 315 additions and 179 deletions

View file

@ -71,7 +71,7 @@ Sheet::Sheet(Workbook& workbook)
for (auto& traceback_frame : error.traceback()) {
auto& function_name = traceback_frame.function_name;
auto& source_range = traceback_frame.source_range;
dbgln(" {} at {}:{}:{}", function_name, source_range.filename, source_range.start.line, source_range.start.column);
dbgln(" {} at {}:{}:{}", function_name, source_range.filename(), source_range.start.line, source_range.start.column);
}
} else {
warnln();

View file

@ -122,13 +122,13 @@ GUI::Variant SheetModel::data(const GUI::ModelIndex& index, GUI::ModelRole role)
StringBuilder builder;
builder.appendff("{}\n", error.get_without_side_effects(object.vm().names.message).to_string_without_side_effects());
for (auto const& frame : trace.in_reverse()) {
if (frame.source_range.filename.contains("runtime.js"sv)) {
if (frame.source_range.filename().contains("runtime.js"sv)) {
if (frame.function_name == "<unknown>")
builder.appendff(" in a builtin function at line {}, column {}\n", frame.source_range.start.line, frame.source_range.start.column);
else
builder.appendff(" while evaluating builtin '{}'\n", frame.function_name);
} else if (frame.source_range.filename.starts_with("cell "sv)) {
builder.appendff(" in cell '{}', at line {}, column {}\n", frame.source_range.filename.substring_view(5), frame.source_range.start.line, frame.source_range.start.column);
} else if (frame.source_range.filename().starts_with("cell "sv)) {
builder.appendff(" in cell '{}', at line {}, column {}\n", frame.source_range.filename().substring_view(5), frame.source_range.start.line, frame.source_range.start.column);
}
}
return builder.to_string();

View file

@ -61,6 +61,18 @@ private:
ExecutingASTNodeChain m_chain_node;
};
ASTNode::ASTNode(SourceRange source_range)
: m_source_code(source_range.code)
, m_start_offset(source_range.start.offset)
, m_end_offset(source_range.end.offset)
{
}
SourceRange ASTNode::source_range() const
{
return m_source_code->range_from_offsets(m_start_offset, m_end_offset);
}
String ASTNode::class_name() const
{
// NOTE: We strip the "JS::" prefix.
@ -4794,4 +4806,9 @@ ModuleRequest::ModuleRequest(FlyString module_specifier_, Vector<Assertion> asse
});
}
String const& SourceRange::filename() const
{
return code->filename();
}
}

View file

@ -52,8 +52,7 @@ public:
virtual Bytecode::CodeGenerationErrorOr<void> generate_bytecode(Bytecode::Generator&) const;
virtual void dump(int indent) const;
SourceRange const& source_range() const { return m_source_range; }
SourceRange& source_range() { return m_source_range; }
SourceRange source_range() const;
String class_name() const;
@ -84,13 +83,12 @@ public:
virtual bool is_class_method() const { return false; }
protected:
explicit ASTNode(SourceRange source_range)
: m_source_range(source_range)
{
}
explicit ASTNode(SourceRange);
private:
SourceRange m_source_range;
RefPtr<SourceCode> m_source_code;
u32 m_start_offset { 0 };
u32 m_end_offset { 0 };
};
class Statement : public ASTNode {

View file

@ -244,6 +244,7 @@ set(SOURCES
Runtime/WeakSetPrototype.cpp
Runtime/WrappedFunction.cpp
Script.cpp
SourceCode.cpp
SourceTextModule.cpp
SyntaxHighlighter.cpp
SyntheticModule.cpp

View file

@ -188,6 +188,8 @@ class Script;
class Shape;
class Statement;
class StringOrSymbol;
class SourceCode;
struct SourceRange;
class SourceTextModule;
class Symbol;
class Token;

View file

@ -20,8 +20,8 @@ public:
Token next();
StringView source() const { return m_source; };
StringView filename() const { return m_filename; };
String const& source() const { return m_source; };
String const& filename() const { return m_filename; };
void disallow_html_comments() { m_allow_html_comments = false; };
@ -57,13 +57,13 @@ private:
TokenType consume_regex_literal();
StringView m_source;
String m_source;
size_t m_position { 0 };
Token m_current_token;
char m_current_char { 0 };
bool m_eof { false };
StringView m_filename;
String m_filename;
size_t m_line_number { 1 };
size_t m_line_column { 0 };

View file

@ -150,7 +150,7 @@ void MarkupGenerator::trace_to_html(TracebackFrame const& traceback_frame, Strin
auto last_slash_index = filename.find_last('/');
return last_slash_index.has_value() ? filename.substring_view(*last_slash_index + 1) : filename;
};
auto filename = escape_html_entities(get_filename_from_path(traceback_frame.source_range.filename));
auto filename = escape_html_entities(get_filename_from_path(traceback_frame.source_range.filename()));
auto trace = String::formatted("at {} ({}:{}:{})", function_name, filename, line, column);
html_output.appendff("&nbsp;&nbsp;{}<br>", trace);

File diff suppressed because it is too large Load diff

View file

@ -343,6 +343,7 @@ private:
}
};
NonnullRefPtr<SourceCode> m_source_code;
Vector<Position> m_rule_starts;
ParserState m_state;
FlyString m_filename;

View file

@ -54,6 +54,8 @@ ThrowCompletionOr<void> Error::install_error_cause(Value options)
void Error::populate_stack()
{
static auto dummy_source_range = SourceRange { .code = SourceCode::create("", ""), .start = {}, .end = {} };
auto& vm = this->vm();
m_traceback.ensure_capacity(vm.execution_context_stack().size());
for (ssize_t i = vm.execution_context_stack().size() - 1; i >= 0; i--) {
@ -67,7 +69,7 @@ void Error::populate_stack()
// reaction jobs (which aren't called anywhere from the source code).
// They're not going to generate any _unhandled_ exceptions though, so a meaningless
// source range is fine.
context->current_node ? context->current_node->source_range() : SourceRange {});
context->current_node ? context->current_node->source_range() : dummy_source_range);
}
}
@ -82,12 +84,12 @@ String Error::stack_string() const
auto const& frame = m_traceback[i];
auto function_name = frame.function_name;
// Note: Since we don't know whether we have a valid SourceRange here we just check for some default values.
if (!frame.source_range.filename.is_null() || frame.source_range.start.offset != 0 || frame.source_range.end.offset != 0) {
if (!frame.source_range.filename().is_null() || frame.source_range.start.offset != 0 || frame.source_range.end.offset != 0) {
if (function_name == "<unknown>"sv)
stack_string_builder.appendff(" at {}:{}:{}\n", frame.source_range.filename, frame.source_range.start.line, frame.source_range.start.column);
stack_string_builder.appendff(" at {}:{}:{}\n", frame.source_range.filename(), frame.source_range.start.line, frame.source_range.start.column);
else
stack_string_builder.appendff(" at {} ({}:{}:{})\n", function_name, frame.source_range.filename, frame.source_range.start.line, frame.source_range.start.column);
stack_string_builder.appendff(" at {} ({}:{}:{})\n", function_name, frame.source_range.filename(), frame.source_range.start.line, frame.source_range.start.column);
} else {
stack_string_builder.appendff(" at {}\n", function_name.is_empty() ? "<unknown>"sv : function_name.view());
}

View file

@ -728,8 +728,8 @@ void VM::dump_backtrace() const
for (ssize_t i = m_execution_context_stack.size() - 1; i >= 0; --i) {
auto& frame = m_execution_context_stack[i];
if (frame->current_node) {
auto& source_range = frame->current_node->source_range();
dbgln("-> {} @ {}:{},{}", frame->function_name, source_range.filename, source_range.start.line, source_range.start.column);
auto source_range = frame->current_node->source_range();
dbgln("-> {} @ {}:{},{}", frame->function_name, source_range.filename(), source_range.start.line, source_range.start.column);
} else {
dbgln("-> {}", frame->function_name);
}

View file

@ -0,0 +1,81 @@
/*
* Copyright (c) 2022, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Utf8View.h>
#include <LibJS/SourceCode.h>
#include <LibJS/SourceRange.h>
#include <LibJS/Token.h>
namespace JS {
NonnullRefPtr<SourceCode> SourceCode::create(String filename, String code)
{
return adopt_ref(*new SourceCode(move(filename), move(code)));
}
SourceCode::SourceCode(String filename, String code)
: m_filename(move(filename))
, m_code(move(code))
{
}
String const& SourceCode::filename() const
{
return m_filename;
}
String const& SourceCode::code() const
{
return m_code;
}
SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const
{
Position start;
Position end;
size_t line = 1;
size_t column = 1;
bool previous_code_point_was_carriage_return = false;
Utf8View view(m_code);
for (auto it = view.begin(); it != view.end(); ++it) {
if (start_offset == view.byte_offset_of(it)) {
start = Position {
.line = line,
.column = column,
.offset = start_offset,
};
}
if (end_offset == view.byte_offset_of(it)) {
end = Position {
.line = line,
.column = column,
.offset = end_offset,
};
break;
}
u32 code_point = *it;
bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
previous_code_point_was_carriage_return = code_point == '\r';
if (is_line_terminator) {
++line;
column = 1;
continue;
}
++column;
}
return SourceRange { *this, start, end };
}
}

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2022, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/String.h>
#include <LibJS/Forward.h>
namespace JS {
class SourceCode : public RefCounted<SourceCode> {
public:
static NonnullRefPtr<SourceCode> create(String filename, String code);
String const& filename() const;
String const& code() const;
SourceRange range_from_offsets(u32 start_offset, u32 end_offset) const;
private:
SourceCode(String filename, String code);
String m_filename;
String m_code;
};
}

View file

@ -8,6 +8,7 @@
#include <AK/StringView.h>
#include <AK/Types.h>
#include <LibJS/SourceCode.h>
namespace JS {
@ -20,9 +21,11 @@ struct Position {
struct SourceRange {
[[nodiscard]] bool contains(Position const& position) const { return position.offset <= end.offset && position.offset >= start.offset; }
StringView filename;
NonnullRefPtr<SourceCode> code;
Position start;
Position end;
String const& filename() const;
};
}

View file

@ -34,7 +34,7 @@ void report_exception_to_console(JS::Value value, JS::Realm& realm, ErrorInPromi
for (auto& traceback_frame : error_value.traceback()) {
auto& function_name = traceback_frame.function_name;
auto& source_range = traceback_frame.source_range;
dbgln(" {} at {}:{}:{}", function_name, source_range.filename, source_range.start.line, source_range.start.column);
dbgln(" {} at {}:{}:{}", function_name, source_range.filename(), source_range.start.line, source_range.start.column);
}
console.report_exception(error_value, error_in_promise == ErrorInPromise::Yes);