LibJS: Cache source code positions more often
Some checks are pending
Lint Code / lint (push) Waiting to run
CI / Lagom (false, FUZZ, ubuntu-24.04, Linux, Clang) (push) Waiting to run
CI / Lagom (false, NO_FUZZ, macos-15, macOS, Clang) (push) Waiting to run
CI / Lagom (false, NO_FUZZ, ubuntu-24.04, Linux, GNU) (push) Waiting to run
CI / Lagom (true, NO_FUZZ, ubuntu-24.04, Linux, Clang) (push) Waiting to run
Package the js repl as a binary artifact / build-and-package (macos-14, macOS, macOS-universal2) (push) Waiting to run
Package the js repl as a binary artifact / build-and-package (ubuntu-24.04, Linux, Linux-x86_64) (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Push notes / build (push) Waiting to run

The source code position cache was moved from a line based approach
to a "chunk"-based approach to improve performance on large, minified
JavaScript files with few lines, but this has had an adverse effect
on _multi-line_ source files.

Reintroduce some of the old behaviour by caching lines again, with
some added sanity limits to avoid caching empty/overly small lines.

Source code positions in files with few lines will still be cached
less often, since minified JavaScript files can be assumed to be
unusually large, and since stack traces for minified JavaScript
are less useful as well.

On WPT tests with large JavaScript dependencies like
`css/css-masking/animations/clip-interpolation.html` this reduces the
amount of time spent in `SourceCode::range_from_offsets` by as much as
99.98%, for the small small price of 80KB extra memory usage.
This commit is contained in:
Jonne Ransijn 2024-12-01 22:21:18 +01:00 committed by Andreas Kling
parent 079fdf50d4
commit 1b3f8e1e9a
Notes: github-actions[bot] 2024-12-02 10:43:02 +00:00
2 changed files with 18 additions and 9 deletions

View file

@ -6,6 +6,8 @@
#pragma once
#include <AK/Types.h>
namespace JS {
struct Position {

View file

@ -35,26 +35,31 @@ String const& SourceCode::code() const
void SourceCode::fill_position_cache() const
{
constexpr size_t minimum_distance_between_cached_positions = 10000;
constexpr size_t predicted_mimimum_cached_positions = 8;
constexpr size_t minimum_distance_between_cached_positions = 32;
constexpr size_t maximum_distance_between_cached_positions = 8192;
if (m_code.is_empty())
return;
bool previous_code_point_was_carriage_return = false;
u32 previous_code_point = 0;
size_t line = 1;
size_t column = 1;
size_t offset_of_last_starting_point = 0;
m_cached_positions.ensure_capacity(m_code.bytes().size() / minimum_distance_between_cached_positions);
m_cached_positions.ensure_capacity(predicted_mimimum_cached_positions + m_code.bytes().size() / maximum_distance_between_cached_positions);
m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 });
Utf8View const view(m_code);
for (auto it = view.begin(); it != view.end(); ++it) {
u32 code_point = *it;
bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
previous_code_point_was_carriage_return = code_point == '\r';
bool is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
auto byte_offset = view.byte_offset_of(it);
if ((byte_offset - offset_of_last_starting_point) >= minimum_distance_between_cached_positions) {
bool is_nonempty_line = is_line_terminator && previous_code_point != '\n' && previous_code_point != LINE_SEPARATOR && previous_code_point != PARAGRAPH_SEPARATOR && (code_point == '\n' || previous_code_point != '\r');
auto distance_between_cached_position = byte_offset - offset_of_last_starting_point;
if ((distance_between_cached_position >= minimum_distance_between_cached_positions && is_nonempty_line) || distance_between_cached_position >= maximum_distance_between_cached_positions) {
m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset });
offset_of_last_starting_point = byte_offset;
}
@ -65,6 +70,8 @@ void SourceCode::fill_position_cache() const
} else {
column += 1;
}
previous_code_point = code_point;
}
}
@ -93,7 +100,7 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con
Optional<Position> start;
Optional<Position> end;
bool previous_code_point_was_carriage_return = false;
u32 previous_code_point = 0;
Utf8View const view(m_code);
for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) {
@ -119,8 +126,8 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con
u32 code_point = *it;
bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
previous_code_point_was_carriage_return = code_point == '\r';
bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
previous_code_point = code_point;
if (is_line_terminator) {
current.line += 1;