Generator.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. /*
  2. * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
  3. * Copyright (c) 2023, Shannon Booth <shannon@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include "Generator.h"
  8. namespace Diff {
  9. ErrorOr<Vector<Hunk>> from_text(StringView old_text, StringView new_text, size_t context)
  10. {
  11. auto old_lines = old_text.lines();
  12. auto new_lines = new_text.lines();
  13. /**
  14. * This is a simple implementation of the Longest Common Subsequence algorithm (over
  15. * the lines of the text as opposed to the characters). A Dynamic programming approach
  16. * is used here.
  17. */
  18. enum class Direction {
  19. Down, // Added a new line
  20. Right, // Removed a line
  21. Diagonal, // Line remained the same
  22. };
  23. // A single cell in the DP-matrix. Cell (i, j) represents the longest common
  24. // sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
  25. struct Cell {
  26. size_t length;
  27. Direction direction;
  28. };
  29. auto dp_matrix = Vector<Cell>();
  30. TRY(dp_matrix.try_resize((old_lines.size() + 1) * (new_lines.size() + 1)));
  31. auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
  32. return dp_matrix[i + width * j];
  33. };
  34. // Initialize the first row and column
  35. for (size_t i = 0; i <= old_lines.size(); ++i)
  36. dp(i, new_lines.size()) = { 0, Direction::Right };
  37. for (size_t j = 0; j <= new_lines.size(); ++j)
  38. dp(old_lines.size(), 0) = { 0, Direction::Down };
  39. // Fill in the rest of the DP table
  40. for (int i = old_lines.size() - 1; i >= 0; --i) {
  41. for (int j = new_lines.size() - 1; j >= 0; --j) {
  42. if (old_lines[i] == new_lines[j]) {
  43. dp(i, j) = { dp(i + 1, j + 1).length + 1, Direction::Diagonal };
  44. } else {
  45. auto down = dp(i, j + 1).length;
  46. auto right = dp(i + 1, j).length;
  47. if (down > right)
  48. dp(i, j) = { down, Direction::Down };
  49. else
  50. dp(i, j) = { right, Direction::Right };
  51. }
  52. }
  53. }
  54. Vector<Hunk> hunks;
  55. Hunk cur_hunk;
  56. auto flush_hunk = [&]() -> ErrorOr<void> {
  57. // A file with no content has a zero indexed start line.
  58. if (cur_hunk.location.new_range.start_line != 0 || cur_hunk.location.new_range.number_of_lines != 0)
  59. cur_hunk.location.new_range.start_line++;
  60. if (cur_hunk.location.old_range.start_line != 0 || cur_hunk.location.old_range.number_of_lines != 0)
  61. cur_hunk.location.old_range.start_line++;
  62. TRY(hunks.try_append(cur_hunk));
  63. cur_hunk.lines.clear();
  64. return {};
  65. };
  66. size_t i = 0;
  67. size_t j = 0;
  68. auto set_up_hunk_prepended_with_context = [&](Hunk& hunk) -> ErrorOr<void> {
  69. // Prefix the hunk with requested number context lines, and set the hunk location to where that context begins.
  70. size_t available_context = min(i, context);
  71. hunk.location.old_range = { i - available_context, available_context };
  72. hunk.location.new_range = { j - available_context, available_context };
  73. for (size_t offset = 0; offset < available_context; ++offset) {
  74. size_t context_line = i + offset - available_context;
  75. TRY(hunk.lines.try_append(Line { Line::Operation::Context, TRY(String::from_utf8(old_lines[context_line])) }));
  76. }
  77. return {};
  78. };
  79. size_t current_context = 0;
  80. while (i < old_lines.size() && j < new_lines.size()) {
  81. auto& cell = dp(i, j);
  82. if (cell.direction == Direction::Down) {
  83. if (cur_hunk.lines.is_empty())
  84. TRY(set_up_hunk_prepended_with_context(cur_hunk));
  85. TRY(cur_hunk.lines.try_append(Line { Line::Operation::Addition, TRY(String::from_utf8(new_lines[j])) }));
  86. cur_hunk.location.new_range.number_of_lines++;
  87. ++j;
  88. current_context = 0;
  89. } else if (cell.direction == Direction::Right) {
  90. if (cur_hunk.lines.is_empty())
  91. TRY(set_up_hunk_prepended_with_context(cur_hunk));
  92. TRY(cur_hunk.lines.try_append(Line { Line::Operation::Removal, TRY(String::from_utf8(old_lines[i])) }));
  93. cur_hunk.location.old_range.number_of_lines++;
  94. ++i;
  95. current_context = 0;
  96. } else {
  97. if (!cur_hunk.lines.is_empty()) {
  98. // We're currently in the middle of generating a hunk and have found a context line. If we have already added
  99. // the number of context lines that were requested then we have already finished with this hunk. Otherwise we
  100. // need to continue looking through the hunk until we have located the requested number of context lines in a
  101. // row.
  102. if (current_context == context) {
  103. TRY(flush_hunk());
  104. } else {
  105. TRY(cur_hunk.lines.try_append(Line { Line::Operation::Context, TRY(String::from_utf8(old_lines[i])) }));
  106. cur_hunk.location.new_range.number_of_lines++;
  107. cur_hunk.location.old_range.number_of_lines++;
  108. }
  109. ++current_context;
  110. }
  111. ++i;
  112. ++j;
  113. }
  114. }
  115. while (i < old_lines.size()) {
  116. if (cur_hunk.lines.is_empty())
  117. TRY(set_up_hunk_prepended_with_context(cur_hunk));
  118. TRY(cur_hunk.lines.try_append(Line { Line::Operation::Removal, TRY(String::from_utf8(old_lines[i])) }));
  119. cur_hunk.location.old_range.number_of_lines++;
  120. ++i;
  121. }
  122. while (j < new_lines.size()) {
  123. if (cur_hunk.lines.is_empty())
  124. TRY(set_up_hunk_prepended_with_context(cur_hunk));
  125. TRY(cur_hunk.lines.try_append(Line { Line::Operation::Addition, TRY(String::from_utf8(new_lines[j])) }));
  126. cur_hunk.location.new_range.number_of_lines++;
  127. ++j;
  128. }
  129. if (!cur_hunk.lines.is_empty())
  130. TRY(flush_hunk());
  131. return hunks;
  132. }
  133. }