123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- /*
- * Copyright (c) 2022-2023, Andreas Kling <kling@serenityos.org>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <AK/BinarySearch.h>
- #include <AK/Utf8View.h>
- #include <LibJS/SourceCode.h>
- #include <LibJS/SourceRange.h>
- #include <LibJS/Token.h>
- namespace JS {
- NonnullRefPtr<SourceCode const> SourceCode::create(String filename, String code)
- {
- return adopt_ref(*new SourceCode(move(filename), move(code)));
- }
- SourceCode::SourceCode(String filename, String code)
- : m_filename(move(filename))
- , m_code(move(code))
- {
- }
- String const& SourceCode::filename() const
- {
- return m_filename;
- }
- String const& SourceCode::code() const
- {
- return m_code;
- }
- void SourceCode::fill_position_cache() const
- {
- constexpr size_t minimum_distance_between_cached_positions = 10000;
- if (m_code.is_empty())
- return;
- bool previous_code_point_was_carriage_return = false;
- size_t line = 1;
- size_t column = 1;
- size_t offset_of_last_starting_point = 0;
- m_cached_positions.ensure_capacity(m_code.bytes().size() / minimum_distance_between_cached_positions);
- m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 });
- Utf8View const view(m_code);
- for (auto it = view.begin(); it != view.end(); ++it) {
- u32 code_point = *it;
- bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
- previous_code_point_was_carriage_return = code_point == '\r';
- auto byte_offset = view.byte_offset_of(it);
- if ((byte_offset - offset_of_last_starting_point) >= minimum_distance_between_cached_positions) {
- m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset });
- offset_of_last_starting_point = byte_offset;
- }
- if (is_line_terminator) {
- line += 1;
- column = 1;
- } else {
- column += 1;
- }
- }
- }
- SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const
- {
- // If the underlying code is an empty string, the range is 1,1 - 1,1 no matter what.
- if (m_code.is_empty())
- return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } };
- if (m_cached_positions.is_empty())
- fill_position_cache();
- Position current { .line = 1, .column = 1, .offset = 0 };
- if (!m_cached_positions.is_empty()) {
- Position const dummy;
- size_t nearest_index = 0;
- binary_search(m_cached_positions, dummy, &nearest_index,
- [&](auto&, auto& starting_point) {
- return start_offset - starting_point.offset;
- });
- current = m_cached_positions[nearest_index];
- }
- Optional<Position> start;
- Optional<Position> end;
- bool previous_code_point_was_carriage_return = false;
- Utf8View const view(m_code);
- for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) {
- // If we're on or after the start offset, this is the start position.
- if (!start.has_value() && view.byte_offset_of(it) >= start_offset) {
- start = Position {
- .line = current.line,
- .column = current.column,
- .offset = start_offset,
- };
- }
- // If we're on or after the end offset, this is the end position.
- if (!end.has_value() && view.byte_offset_of(it) >= end_offset) {
- end = Position {
- .line = current.line,
- .column = current.column,
- .offset = end_offset,
- };
- break;
- }
- u32 code_point = *it;
- bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
- previous_code_point_was_carriage_return = code_point == '\r';
- if (is_line_terminator) {
- current.line += 1;
- current.column = 1;
- continue;
- }
- current.column += 1;
- }
- // If we didn't find both a start and end position, just return 1,1-1,1.
- // FIXME: This is a hack. Find a way to return the nicest possible values here.
- if (!start.has_value() || !end.has_value())
- return SourceRange { *this, { .line = 1, .column = 1 }, { .line = 1, .column = 1 } };
- return SourceRange { *this, *start, *end };
- }
- }
|