SourceCode.cpp 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. /*
  2. * Copyright (c) 2022, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/BinarySearch.h>
  7. #include <AK/Utf8View.h>
  8. #include <LibJS/SourceCode.h>
  9. #include <LibJS/SourceRange.h>
  10. #include <LibJS/Token.h>
  11. namespace JS {
  12. NonnullRefPtr<SourceCode> SourceCode::create(String filename, String code)
  13. {
  14. return adopt_ref(*new SourceCode(move(filename), move(code)));
  15. }
  16. SourceCode::SourceCode(String filename, String code)
  17. : m_filename(move(filename))
  18. , m_code(move(code))
  19. {
  20. }
  21. String const& SourceCode::filename() const
  22. {
  23. return m_filename;
  24. }
  25. String const& SourceCode::code() const
  26. {
  27. return m_code;
  28. }
  29. void SourceCode::compute_line_break_offsets() const
  30. {
  31. m_line_break_offsets = Vector<size_t> {};
  32. if (m_code.is_empty())
  33. return;
  34. bool previous_code_point_was_carriage_return = false;
  35. Utf8View view(m_code.view());
  36. for (auto it = view.begin(); it != view.end(); ++it) {
  37. u32 code_point = *it;
  38. bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
  39. previous_code_point_was_carriage_return = code_point == '\r';
  40. if (is_line_terminator)
  41. m_line_break_offsets->append(view.byte_offset_of(it));
  42. }
  43. }
  44. SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const
  45. {
  46. if (m_code.is_empty())
  47. return { *this, {}, {} };
  48. if (!m_line_break_offsets.has_value())
  49. compute_line_break_offsets();
  50. size_t line = 1;
  51. size_t nearest_line_break_index = 0;
  52. size_t nearest_preceding_line_break_offset = 0;
  53. if (!m_line_break_offsets->is_empty()) {
  54. binary_search(*m_line_break_offsets, start_offset, &nearest_line_break_index);
  55. line = 1 + nearest_line_break_index;
  56. nearest_preceding_line_break_offset = (*m_line_break_offsets)[nearest_line_break_index];
  57. }
  58. Position start;
  59. Position end;
  60. size_t column = 1;
  61. bool previous_code_point_was_carriage_return = false;
  62. Utf8View view(m_code.view());
  63. for (auto it = view.iterator_at_byte_offset_without_validation(nearest_preceding_line_break_offset); it != view.end(); ++it) {
  64. if (start_offset == view.byte_offset_of(it)) {
  65. start = Position {
  66. .line = line,
  67. .column = column,
  68. .offset = start_offset,
  69. };
  70. }
  71. if (end_offset == view.byte_offset_of(it)) {
  72. end = Position {
  73. .line = line,
  74. .column = column,
  75. .offset = end_offset,
  76. };
  77. break;
  78. }
  79. u32 code_point = *it;
  80. bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
  81. previous_code_point_was_carriage_return = code_point == '\r';
  82. if (is_line_terminator) {
  83. ++line;
  84. column = 1;
  85. continue;
  86. }
  87. ++column;
  88. }
  89. return SourceRange { *this, start, end };
  90. }
  91. }