Generator.cpp 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. /*
  2. * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "Generator.h"
  7. namespace Diff {
  8. Vector<Hunk> from_text(StringView old_text, StringView new_text)
  9. {
  10. auto old_lines = old_text.lines();
  11. auto new_lines = new_text.lines();
  12. /**
  13. * This is a simple implementation of the Longest Common Subsequence algorithm (over
  14. * the lines of the text as opposed to the characters). A Dynamic programming approach
  15. * is used here.
  16. */
  17. enum class Direction {
  18. Down, // Added a new line
  19. Right, // Removed a line
  20. Diagonal, // Line remained the same
  21. };
  22. // A single cell in the DP-matrix. Cell (i, j) represents the longest common
  23. // sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
  24. struct Cell {
  25. size_t length;
  26. Direction direction;
  27. };
  28. auto dp_matrix = Vector<Cell>();
  29. dp_matrix.resize((old_lines.size() + 1) * (new_lines.size() + 1));
  30. auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
  31. return dp_matrix[i + width * j];
  32. };
  33. // Initialize the first row and column
  34. for (size_t i = 0; i <= old_lines.size(); ++i)
  35. dp(i, new_lines.size()) = { 0, Direction::Right };
  36. for (size_t j = 0; j <= new_lines.size(); ++j)
  37. dp(old_lines.size(), 0) = { 0, Direction::Down };
  38. // Fill in the rest of the DP table
  39. for (int i = old_lines.size() - 1; i >= 0; --i) {
  40. for (int j = new_lines.size() - 1; j >= 0; --j) {
  41. if (old_lines[i] == new_lines[j]) {
  42. dp(i, j) = { dp(i + 1, j + 1).length + 1, Direction::Diagonal };
  43. } else {
  44. auto down = dp(i, j + 1).length;
  45. auto right = dp(i + 1, j).length;
  46. if (down > right)
  47. dp(i, j) = { down, Direction::Down };
  48. else
  49. dp(i, j) = { right, Direction::Right };
  50. }
  51. }
  52. }
  53. Vector<Hunk> hunks;
  54. Hunk cur_hunk;
  55. bool in_hunk = false;
  56. auto update_hunk = [&](size_t i, size_t j, Direction direction) {
  57. if (!in_hunk) {
  58. in_hunk = true;
  59. cur_hunk = { i, j, {}, {} };
  60. }
  61. if (direction == Direction::Down) {
  62. cur_hunk.added_lines.append(new_lines[j]);
  63. } else if (direction == Direction::Right) {
  64. cur_hunk.removed_lines.append(old_lines[i]);
  65. }
  66. };
  67. auto flush_hunk = [&]() {
  68. if (in_hunk) {
  69. if (cur_hunk.added_lines.size() > 0)
  70. cur_hunk.target_start_line++;
  71. if (cur_hunk.removed_lines.size() > 0)
  72. cur_hunk.original_start_line++;
  73. hunks.append(cur_hunk);
  74. in_hunk = false;
  75. }
  76. };
  77. size_t i = 0;
  78. size_t j = 0;
  79. while (i < old_lines.size() && j < new_lines.size()) {
  80. auto& cell = dp(i, j);
  81. if (cell.direction == Direction::Down) {
  82. update_hunk(i, j, cell.direction);
  83. ++j;
  84. } else if (cell.direction == Direction::Right) {
  85. update_hunk(i, j, cell.direction);
  86. ++i;
  87. } else {
  88. ++i;
  89. ++j;
  90. flush_hunk();
  91. }
  92. }
  93. while (i < old_lines.size()) {
  94. update_hunk(i, new_lines.is_empty() ? 0 : new_lines.size() - 1, Direction::Right); // Remove a line
  95. ++i;
  96. }
  97. while (j < new_lines.size()) {
  98. update_hunk(old_lines.is_empty() ? 0 : old_lines.size() - 1, j, Direction::Down); // Add a line
  99. ++j;
  100. }
  101. flush_hunk();
  102. return hunks;
  103. }
  104. }