SourceCode.cpp 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /*
  2. * Copyright (c) 2022-2023, Andreas Kling <andreas@ladybird.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/BinarySearch.h>
  7. #include <AK/Utf8View.h>
  8. #include <LibJS/SourceCode.h>
  9. #include <LibJS/SourceRange.h>
  10. #include <LibJS/Token.h>
  11. namespace JS {
  12. NonnullRefPtr<SourceCode const> SourceCode::create(String filename, String code)
  13. {
  14. return adopt_ref(*new SourceCode(move(filename), move(code)));
  15. }
  16. SourceCode::SourceCode(String filename, String code)
  17. : m_filename(move(filename))
  18. , m_code(move(code))
  19. {
  20. }
  21. String const& SourceCode::filename() const
  22. {
  23. return m_filename;
  24. }
  25. String const& SourceCode::code() const
  26. {
  27. return m_code;
  28. }
  29. void SourceCode::fill_position_cache() const
  30. {
  31. constexpr size_t predicted_mimimum_cached_positions = 8;
  32. constexpr size_t minimum_distance_between_cached_positions = 32;
  33. constexpr size_t maximum_distance_between_cached_positions = 8192;
  34. if (m_code.is_empty())
  35. return;
  36. u32 previous_code_point = 0;
  37. size_t line = 1;
  38. size_t column = 1;
  39. size_t offset_of_last_starting_point = 0;
  40. m_cached_positions.ensure_capacity(predicted_mimimum_cached_positions + m_code.bytes().size() / maximum_distance_between_cached_positions);
  41. m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 });
  42. Utf8View const view(m_code);
  43. for (auto it = view.begin(); it != view.end(); ++it) {
  44. u32 code_point = *it;
  45. bool is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
  46. auto byte_offset = view.byte_offset_of(it);
  47. bool is_nonempty_line = is_line_terminator && previous_code_point != '\n' && previous_code_point != LINE_SEPARATOR && previous_code_point != PARAGRAPH_SEPARATOR && (code_point == '\n' || previous_code_point != '\r');
  48. auto distance_between_cached_position = byte_offset - offset_of_last_starting_point;
  49. if ((distance_between_cached_position >= minimum_distance_between_cached_positions && is_nonempty_line) || distance_between_cached_position >= maximum_distance_between_cached_positions) {
  50. m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset });
  51. offset_of_last_starting_point = byte_offset;
  52. }
  53. if (is_line_terminator) {
  54. line += 1;
  55. column = 1;
  56. } else {
  57. column += 1;
  58. }
  59. previous_code_point = code_point;
  60. }
  61. }
  62. SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const
  63. {
  64. // If the underlying code is an empty string, the range is 1,1 - 1,1 no matter what.
  65. if (m_code.is_empty())
  66. return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } };
  67. if (m_cached_positions.is_empty())
  68. fill_position_cache();
  69. Position current { .line = 1, .column = 1, .offset = 0 };
  70. if (!m_cached_positions.is_empty()) {
  71. Position const dummy;
  72. size_t nearest_index = 0;
  73. binary_search(m_cached_positions, dummy, &nearest_index,
  74. [&](auto&, auto& starting_point) {
  75. return start_offset - starting_point.offset;
  76. });
  77. current = m_cached_positions[nearest_index];
  78. }
  79. Optional<Position> start;
  80. Optional<Position> end;
  81. u32 previous_code_point = 0;
  82. Utf8View const view(m_code);
  83. for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) {
  84. // If we're on or after the start offset, this is the start position.
  85. if (!start.has_value() && view.byte_offset_of(it) >= start_offset) {
  86. start = Position {
  87. .line = current.line,
  88. .column = current.column,
  89. .offset = start_offset,
  90. };
  91. }
  92. // If we're on or after the end offset, this is the end position.
  93. if (!end.has_value() && view.byte_offset_of(it) >= end_offset) {
  94. end = Position {
  95. .line = current.line,
  96. .column = current.column,
  97. .offset = end_offset,
  98. };
  99. break;
  100. }
  101. u32 code_point = *it;
  102. bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
  103. previous_code_point = code_point;
  104. if (is_line_terminator) {
  105. current.line += 1;
  106. current.column = 1;
  107. continue;
  108. }
  109. current.column += 1;
  110. }
  111. // If we didn't find both a start and end position, just return 1,1-1,1.
  112. // FIXME: This is a hack. Find a way to return the nicest possible values here.
  113. if (!start.has_value() || !end.has_value())
  114. return SourceRange { *this, { .line = 1, .column = 1 }, { .line = 1, .column = 1 } };
  115. return SourceRange { *this, *start, *end };
  116. }
  117. }