SourceCode.cpp 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. /*
  2. * Copyright (c) 2022-2023, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/BinarySearch.h>
  7. #include <AK/Utf8View.h>
  8. #include <LibJS/SourceCode.h>
  9. #include <LibJS/SourceRange.h>
  10. #include <LibJS/Token.h>
  11. namespace JS {
  12. NonnullRefPtr<SourceCode const> SourceCode::create(String filename, String code)
  13. {
  14. return adopt_ref(*new SourceCode(move(filename), move(code)));
  15. }
  16. SourceCode::SourceCode(String filename, String code)
  17. : m_filename(move(filename))
  18. , m_code(move(code))
  19. {
  20. }
  21. String const& SourceCode::filename() const
  22. {
  23. return m_filename;
  24. }
  25. String const& SourceCode::code() const
  26. {
  27. return m_code;
  28. }
  29. void SourceCode::compute_line_break_offsets() const
  30. {
  31. m_line_break_offsets = Vector<size_t> {};
  32. if (m_code.is_empty())
  33. return;
  34. bool previous_code_point_was_carriage_return = false;
  35. Utf8View view(m_code);
  36. for (auto it = view.begin(); it != view.end(); ++it) {
  37. u32 code_point = *it;
  38. bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
  39. previous_code_point_was_carriage_return = code_point == '\r';
  40. if (is_line_terminator)
  41. m_line_break_offsets->append(view.byte_offset_of(it));
  42. }
  43. }
  44. SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const
  45. {
  46. // If the underlying code is an empty string, the range is 1,1 - 1,1 no matter what.
  47. if (m_code.is_empty())
  48. return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } };
  49. if (!m_line_break_offsets.has_value())
  50. compute_line_break_offsets();
  51. size_t line = 1;
  52. size_t nearest_line_break_index = 0;
  53. size_t nearest_preceding_line_break_offset = 0;
  54. if (!m_line_break_offsets->is_empty()) {
  55. binary_search(*m_line_break_offsets, start_offset, &nearest_line_break_index);
  56. line = 1 + nearest_line_break_index;
  57. nearest_preceding_line_break_offset = (*m_line_break_offsets)[nearest_line_break_index];
  58. }
  59. Optional<Position> start;
  60. Optional<Position> end;
  61. size_t column = 1;
  62. bool previous_code_point_was_carriage_return = false;
  63. Utf8View view(m_code);
  64. for (auto it = view.iterator_at_byte_offset_without_validation(nearest_preceding_line_break_offset); it != view.end(); ++it) {
  65. // If we're on or after the start offset, this is the start position.
  66. if (!start.has_value() && view.byte_offset_of(it) >= start_offset) {
  67. start = Position {
  68. .line = line,
  69. .column = column,
  70. .offset = start_offset,
  71. };
  72. }
  73. // If we're on or after the end offset, this is the end position.
  74. if (!end.has_value() && view.byte_offset_of(it) >= end_offset) {
  75. end = Position {
  76. .line = line,
  77. .column = column,
  78. .offset = end_offset,
  79. };
  80. break;
  81. }
  82. u32 code_point = *it;
  83. bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
  84. previous_code_point_was_carriage_return = code_point == '\r';
  85. if (is_line_terminator) {
  86. ++line;
  87. column = 1;
  88. continue;
  89. }
  90. ++column;
  91. }
  92. // If we didn't find both a start and end position, just return 1,1-1,1.
  93. // FIXME: This is a hack. Find a way to return the nicest possible values here.
  94. if (!start.has_value() || !end.has_value())
  95. return SourceRange { *this, { .line = 1, .column = 1 }, { .line = 1, .column = 1 } };
  96. return SourceRange { *this, *start, *end };
  97. }
  98. }