Applier.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. /*
  2. * Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Stream.h>
  7. #include <LibDiff/Applier.h>
  8. #include <LibDiff/Hunks.h>
  9. namespace Diff {
  10. static size_t expected_line_number(HunkLocation const& location)
  11. {
  12. auto line = location.old_range.start_line;
  13. // NOTE: This is to handle the case we are adding a file, e.g for a range such as:
  14. // '@@ -0,0 +1,3 @@'
  15. if (location.old_range.start_line == 0)
  16. ++line;
  17. VERIFY(line != 0);
  18. return line;
  19. }
  20. struct Location {
  21. size_t line_number;
  22. size_t fuzz { 0 };
  23. ssize_t offset { 0 };
  24. };
  25. static Optional<Location> locate_hunk(Vector<StringView> const& content, Hunk const& hunk, ssize_t offset, size_t max_fuzz = 3)
  26. {
  27. // Make a first best guess at where the from-file range is telling us where the hunk should be.
  28. size_t offset_guess = expected_line_number(hunk.location) - 1 + offset;
  29. // If there's no lines surrounding this hunk - it will always succeed,
  30. // so there is no point in checking any further. Note that this check is
  31. // also what makes matching against an empty 'from file' work (with no lines),
  32. // as in that case there is no content for us to even match against in the
  33. // first place!
  34. //
  35. // However, we also should reject patches being added when the hunk is
  36. // claiming the file is completely empty - but there are actually lines in
  37. // that file.
  38. if (hunk.location.old_range.number_of_lines == 0) {
  39. if (hunk.location.old_range.start_line == 0 && !content.is_empty())
  40. return {};
  41. return Location { offset_guess, 0, 0 };
  42. }
  43. size_t patch_prefix_context = 0;
  44. for (auto const& line : hunk.lines) {
  45. if (line.operation != Line::Operation::Context)
  46. break;
  47. ++patch_prefix_context;
  48. }
  49. size_t patch_suffix_context = 0;
  50. for (auto const& line : hunk.lines.in_reverse()) {
  51. if (line.operation != Line::Operation::Context)
  52. break;
  53. ++patch_suffix_context;
  54. }
  55. size_t context = max(patch_prefix_context, patch_suffix_context);
  56. // Look through the file trying to match the hunk for it. If we can't find anything anywhere in the file, then try and
  57. // match the hunk by ignoring an increasing amount of context lines. The number of context lines that are ignored is
  58. // called the 'fuzz'.
  59. for (size_t fuzz = 0; fuzz <= max_fuzz; ++fuzz) {
  60. auto suffix_fuzz = max(fuzz + patch_suffix_context - context, 0);
  61. auto prefix_fuzz = max(fuzz + patch_prefix_context - context, 0);
  62. // If the fuzz is greater than the total number of lines for a hunk, then it may be possible for the hunk to match anything.
  63. if (suffix_fuzz + prefix_fuzz >= hunk.lines.size())
  64. return {};
  65. auto hunk_matches_starting_from_line = [&](size_t line) {
  66. line += prefix_fuzz;
  67. // Ensure that all of the lines in the hunk match starting from 'line', ignoring the specified number of context lines.
  68. return all_of(hunk.lines.begin() + prefix_fuzz, hunk.lines.end() - suffix_fuzz, [&](const Line& hunk_line) {
  69. // Ignore additions in our increment of line and comparison as they are not part of the 'original file'
  70. if (hunk_line.operation == Line::Operation::Addition)
  71. return true;
  72. if (line >= content.size())
  73. return false;
  74. if (content[line] != hunk_line.content)
  75. return false;
  76. ++line;
  77. return true;
  78. });
  79. };
  80. for (size_t line = offset_guess; line < content.size(); ++line) {
  81. if (hunk_matches_starting_from_line(line))
  82. return Location { line, fuzz, static_cast<ssize_t>(line - offset_guess) };
  83. }
  84. for (size_t line = offset_guess; line != 0; --line) {
  85. if (hunk_matches_starting_from_line(line - 1))
  86. return Location { line - 1, fuzz, static_cast<ssize_t>(line - offset_guess) };
  87. }
  88. }
  89. // No bueno.
  90. return {};
  91. }
  92. static ErrorOr<size_t> write_hunk(Stream& out, Hunk const& hunk, Location const& location, Vector<StringView> const& lines)
  93. {
  94. auto line_number = location.line_number;
  95. for (auto const& patch_line : hunk.lines) {
  96. if (patch_line.operation == Line::Operation::Context) {
  97. TRY(out.write_formatted("{}\n", lines.at(line_number)));
  98. ++line_number;
  99. } else if (patch_line.operation == Line::Operation::Addition) {
  100. TRY(out.write_formatted("{}\n", patch_line.content));
  101. } else if (patch_line.operation == Line::Operation::Removal) {
  102. ++line_number;
  103. }
  104. }
  105. return line_number;
  106. }
  107. ErrorOr<void> apply_patch(Stream& out, Vector<StringView> const& lines, Patch const& patch)
  108. {
  109. size_t line_number = 0; // NOTE: relative to 'old' file.
  110. ssize_t offset_error = 0;
  111. for (size_t hunk_num = 0; hunk_num < patch.hunks.size(); ++hunk_num) {
  112. auto const& hunk = patch.hunks[hunk_num];
  113. auto maybe_location = locate_hunk(lines, hunk, offset_error);
  114. if (!maybe_location.has_value())
  115. return Error::from_string_literal("Failed to locate where to apply patch");
  116. auto location = *maybe_location;
  117. offset_error += location.offset;
  118. // Write up until where we have found this latest hunk from the old file.
  119. for (; line_number < location.line_number; ++line_number)
  120. TRY(out.write_formatted("{}\n", lines.at(line_number)));
  121. // Then output the hunk to what we hope is the correct location in the file.
  122. line_number = TRY(write_hunk(out, hunk, location, lines));
  123. }
  124. // We've finished applying all hunks, write out anything from the old file we haven't already.
  125. for (; line_number < lines.size(); ++line_number)
  126. TRY(out.write_formatted("{}\n", lines[line_number]));
  127. return {};
  128. }
  129. }