SourceCode.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /*
  2. * Copyright (c) 2022-2023, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/BinarySearch.h>
  7. #include <AK/Utf8View.h>
  8. #include <LibJS/SourceCode.h>
  9. #include <LibJS/SourceRange.h>
  10. #include <LibJS/Token.h>
  11. namespace JS {
  12. NonnullRefPtr<SourceCode const> SourceCode::create(String filename, String code)
  13. {
  14. return adopt_ref(*new SourceCode(move(filename), move(code)));
  15. }
  16. SourceCode::SourceCode(String filename, String code)
  17. : m_filename(move(filename))
  18. , m_code(move(code))
  19. {
  20. }
  21. String const& SourceCode::filename() const
  22. {
  23. return m_filename;
  24. }
  25. String const& SourceCode::code() const
  26. {
  27. return m_code;
  28. }
  29. void SourceCode::fill_position_cache() const
  30. {
  31. constexpr size_t minimum_distance_between_cached_positions = 10000;
  32. if (m_code.is_empty())
  33. return;
  34. bool previous_code_point_was_carriage_return = false;
  35. size_t line = 1;
  36. size_t column = 1;
  37. size_t offset_of_last_starting_point = 0;
  38. m_cached_positions.ensure_capacity(m_code.bytes().size() / minimum_distance_between_cached_positions);
  39. m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 });
  40. Utf8View const view(m_code);
  41. for (auto it = view.begin(); it != view.end(); ++it) {
  42. u32 code_point = *it;
  43. bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
  44. previous_code_point_was_carriage_return = code_point == '\r';
  45. auto byte_offset = view.byte_offset_of(it);
  46. if ((byte_offset - offset_of_last_starting_point) >= minimum_distance_between_cached_positions) {
  47. m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset });
  48. offset_of_last_starting_point = byte_offset;
  49. }
  50. if (is_line_terminator) {
  51. line += 1;
  52. column = 1;
  53. } else {
  54. column += 1;
  55. }
  56. }
  57. }
  58. SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const
  59. {
  60. // If the underlying code is an empty string, the range is 1,1 - 1,1 no matter what.
  61. if (m_code.is_empty())
  62. return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } };
  63. if (m_cached_positions.is_empty())
  64. fill_position_cache();
  65. Position current { .line = 1, .column = 1, .offset = 0 };
  66. if (!m_cached_positions.is_empty()) {
  67. Position const dummy;
  68. size_t nearest_index = 0;
  69. binary_search(m_cached_positions, dummy, &nearest_index,
  70. [&](auto&, auto& starting_point) {
  71. return start_offset - starting_point.offset;
  72. });
  73. current = m_cached_positions[nearest_index];
  74. }
  75. Optional<Position> start;
  76. Optional<Position> end;
  77. bool previous_code_point_was_carriage_return = false;
  78. Utf8View const view(m_code);
  79. for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) {
  80. // If we're on or after the start offset, this is the start position.
  81. if (!start.has_value() && view.byte_offset_of(it) >= start_offset) {
  82. start = Position {
  83. .line = current.line,
  84. .column = current.column,
  85. .offset = start_offset,
  86. };
  87. }
  88. // If we're on or after the end offset, this is the end position.
  89. if (!end.has_value() && view.byte_offset_of(it) >= end_offset) {
  90. end = Position {
  91. .line = current.line,
  92. .column = current.column,
  93. .offset = end_offset,
  94. };
  95. break;
  96. }
  97. u32 code_point = *it;
  98. bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
  99. previous_code_point_was_carriage_return = code_point == '\r';
  100. if (is_line_terminator) {
  101. current.line += 1;
  102. current.column = 1;
  103. continue;
  104. }
  105. current.column += 1;
  106. }
  107. // If we didn't find both a start and end position, just return 1,1-1,1.
  108. // FIXME: This is a hack. Find a way to return the nicest possible values here.
  109. if (!start.has_value() || !end.has_value())
  110. return SourceRange { *this, { .line = 1, .column = 1 }, { .line = 1, .column = 1 } };
  111. return SourceRange { *this, *start, *end };
  112. }
  113. }