Hunks.cpp 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. /*
  2. * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
  3. * Copyright (c) 2023, Shannon Booth <shannon@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include "Hunks.h"
  8. #include <AK/Debug.h>
  9. #include <AK/LexicalPath.h>
  10. namespace Diff {
  11. Optional<HunkLocation> Parser::consume_unified_location()
  12. {
  13. auto consume_range = [this](Range& range) {
  14. if (!consume_line_number(range.start_line))
  15. return false;
  16. if (consume_specific(',')) {
  17. if (!consume_line_number(range.number_of_lines))
  18. return false;
  19. } else {
  20. range.number_of_lines = 1;
  21. }
  22. return true;
  23. };
  24. if (!consume_specific("@@ -"sv))
  25. return {};
  26. HunkLocation location;
  27. if (!consume_range(location.old_range))
  28. return {};
  29. if (!consume_specific(" +"sv))
  30. return {};
  31. if (!consume_range(location.new_range))
  32. return {};
  33. if (!consume_specific(" @@"sv))
  34. return {};
  35. return location;
  36. }
  37. bool Parser::consume_line_number(size_t& number)
  38. {
  39. auto line = consume_while(is_ascii_digit);
  40. auto maybe_number = line.to_number<size_t>();
  41. if (!maybe_number.has_value())
  42. return false;
  43. number = maybe_number.value();
  44. return true;
  45. }
  46. ErrorOr<String> Parser::parse_file_line(Optional<size_t> const& strip_count)
  47. {
  48. // FIXME: handle parsing timestamps as well.
  49. auto path = consume_line();
  50. // No strip count given. Default to basename of file.
  51. if (!strip_count.has_value())
  52. return String::from_byte_string(LexicalPath::basename(path));
  53. // NOTE: We cannot use LexicalPath::parts as we want to strip the non-canonicalized path.
  54. auto const& parts = path.split_view('/');
  55. // More components to strip than the filename has. Just pretend it is missing.
  56. if (strip_count.value() >= parts.size())
  57. return String();
  58. // Remove given number of leading components from the path.
  59. size_t components = parts.size() - strip_count.value();
  60. StringBuilder stripped_path;
  61. for (size_t i = parts.size() - components; i < parts.size(); ++i) {
  62. TRY(stripped_path.try_append(parts[i]));
  63. if (i != parts.size() - 1)
  64. TRY(stripped_path.try_append("/"sv));
  65. }
  66. return stripped_path.to_string();
  67. }
  68. ErrorOr<Patch> Parser::parse_patch(Optional<size_t> const& strip_count)
  69. {
  70. Patch patch;
  71. patch.header = TRY(parse_header(strip_count));
  72. patch.hunks = TRY(parse_hunks());
  73. return patch;
  74. }
  75. ErrorOr<Header> Parser::parse_header(Optional<size_t> const& strip_count)
  76. {
  77. Header header;
  78. while (!is_eof()) {
  79. if (consume_specific("+++ "sv)) {
  80. header.new_file_path = TRY(parse_file_line(strip_count));
  81. continue;
  82. }
  83. if (consume_specific("--- "sv)) {
  84. header.old_file_path = TRY(parse_file_line(strip_count));
  85. continue;
  86. }
  87. if (next_is("@@ ")) {
  88. header.format = Format::Unified;
  89. return header;
  90. }
  91. consume_line();
  92. }
  93. return header;
  94. }
  95. ErrorOr<Vector<Hunk>> Parser::parse_hunks()
  96. {
  97. Vector<Hunk> hunks;
  98. while (next_is("@@ ")) {
  99. // Try an locate a hunk location in this hunk. It may be prefixed with information.
  100. auto maybe_location = consume_unified_location();
  101. consume_line();
  102. if (!maybe_location.has_value())
  103. break;
  104. Hunk hunk { *maybe_location, {} };
  105. auto old_lines_expected = hunk.location.old_range.number_of_lines;
  106. auto new_lines_expected = hunk.location.new_range.number_of_lines;
  107. // We've found a location. Now parse out all of the expected content lines.
  108. while (old_lines_expected != 0 || new_lines_expected != 0) {
  109. StringView line = consume_line();
  110. if (line.is_empty())
  111. return Error::from_string_literal("Malformed empty content line in patch");
  112. if (line[0] != ' ' && line[0] != '+' && line[0] != '-')
  113. return Error::from_string_literal("Invaid operation in patch");
  114. auto const operation = Line::operation_from_symbol(line[0]);
  115. if (operation != Line::Operation::Removal) {
  116. if (new_lines_expected == 0)
  117. return Error::from_string_literal("Found more removal and context lines in patch than expected");
  118. --new_lines_expected;
  119. }
  120. if (operation != Line::Operation::Addition) {
  121. if (old_lines_expected == 0)
  122. return Error::from_string_literal("Found more addition and context lines in patch than expected");
  123. --old_lines_expected;
  124. }
  125. auto const content = line.substring_view(1, line.length() - 1);
  126. TRY(hunk.lines.try_append(Line { operation, TRY(String::from_utf8(content)) }));
  127. }
  128. TRY(hunks.try_append(hunk));
  129. }
  130. if constexpr (HUNKS_DEBUG) {
  131. for (auto const& hunk : hunks) {
  132. dbgln("{}", hunk.location);
  133. for (auto const& line : hunk.lines)
  134. dbgln("{}", line);
  135. }
  136. }
  137. return hunks;
  138. }
  139. ErrorOr<Vector<Hunk>> parse_hunks(StringView diff)
  140. {
  141. Parser lexer(diff);
  142. while (!lexer.next_is("@@ ") && !lexer.is_eof())
  143. lexer.consume_line();
  144. return lexer.parse_hunks();
  145. }
  146. }