mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-25 09:00:22 +00:00
LibDiff: Add new API to generate hunks from two pieces of text
For now this is just a standard implementation of the longest common subsequence algorithm over the lines, except that it doesn't do any coalescing of the lines. This isn't really ideal since we get a single Hunk per changed line, and is definitely something to improve in the future.
This commit is contained in:
parent
27f28998b1
commit
5e28da1aa4
Notes:
sideshowbarker
2024-07-18 03:46:25 +09:00
Author: https://github.com/mustafaquraish Commit: https://github.com/SerenityOS/serenity/commit/5e28da1aa40 Pull-request: https://github.com/SerenityOS/serenity/pull/10052 Reviewed-by: https://github.com/BenWiederhake ✅ Reviewed-by: https://github.com/bgianfo ✅
3 changed files with 105 additions and 1 deletions
|
@ -1,7 +1,8 @@
|
|||
|
||||
set(SOURCES
|
||||
Hunks.cpp
|
||||
Format.cpp
|
||||
Generator.cpp
|
||||
Hunks.cpp
|
||||
)
|
||||
|
||||
serenity_lib(LibDiff diff)
|
||||
|
|
88
Userland/Libraries/LibDiff/Generator.cpp
Normal file
88
Userland/Libraries/LibDiff/Generator.cpp
Normal file
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "Generator.h"
|
||||
|
||||
namespace Diff {
|
||||
|
||||
Vector<Hunk> from_text(StringView const& old_text, StringView const& new_text)
|
||||
{
|
||||
auto old_lines = old_text.lines();
|
||||
auto new_lines = new_text.lines();
|
||||
|
||||
/**
|
||||
* This is a simple implementation of the Longest Common Subsequence algorithm (over
|
||||
* the lines of the text as opposed to the characters). A Dynamic programming approach
|
||||
* is used here.
|
||||
*/
|
||||
|
||||
enum class Direction {
|
||||
Up, // Added a new line
|
||||
Left, // Removed a line
|
||||
Diagonal, // Line remained the same
|
||||
};
|
||||
|
||||
// A single cell in the DP-matrix. Cell (i, j) represents the longest common
|
||||
// sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
|
||||
struct Cell {
|
||||
size_t length;
|
||||
Direction direction;
|
||||
};
|
||||
|
||||
auto dp_matrix = Vector<Cell>();
|
||||
dp_matrix.resize((old_lines.size() + 1) * (new_lines.size() + 1));
|
||||
|
||||
auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
|
||||
return dp_matrix[i + width * j];
|
||||
};
|
||||
|
||||
// Initialize the first row and column
|
||||
for (size_t i = 0; i <= old_lines.size(); ++i)
|
||||
dp(i, 0) = { 0, Direction::Left };
|
||||
|
||||
for (size_t j = 0; j <= new_lines.size(); ++j)
|
||||
dp(0, j) = { 0, Direction::Up };
|
||||
|
||||
// Fill in the rest of the DP table
|
||||
for (size_t i = 1; i <= old_lines.size(); ++i) {
|
||||
for (size_t j = 1; j <= new_lines.size(); ++j) {
|
||||
if (old_lines[i - 1] == new_lines[j - 1]) {
|
||||
dp(i, j) = { dp(i - 1, j - 1).length + 1, Direction::Diagonal };
|
||||
} else {
|
||||
auto up = dp(i, j - 1).length;
|
||||
auto left = dp(i - 1, j).length;
|
||||
if (up > left)
|
||||
dp(i, j) = { up, Direction::Up };
|
||||
else
|
||||
dp(i, j) = { left, Direction::Left };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Vector<Hunk> hunks;
|
||||
size_t i = old_lines.size();
|
||||
size_t j = new_lines.size();
|
||||
|
||||
// FIXME: This creates a hunk per line, very inefficient.
|
||||
while (i > 0 && j > 0) {
|
||||
auto& cell = dp(i, j);
|
||||
if (cell.direction == Direction::Up) {
|
||||
--j;
|
||||
hunks.append({ i, j, {}, { new_lines[j] } });
|
||||
} else if (cell.direction == Direction::Left) {
|
||||
--i;
|
||||
hunks.append({ i, j, { old_lines[i] }, {} });
|
||||
} else if (cell.direction == Direction::Diagonal) {
|
||||
--i;
|
||||
--j;
|
||||
}
|
||||
}
|
||||
|
||||
hunks.reverse();
|
||||
return hunks;
|
||||
}
|
||||
|
||||
}
|
15
Userland/Libraries/LibDiff/Generator.h
Normal file
15
Userland/Libraries/LibDiff/Generator.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Hunks.h"
|
||||
|
||||
namespace Diff {
|
||||
|
||||
Vector<Hunk> from_text(StringView const& old_text, StringView const& new_text);
|
||||
|
||||
}
|
Loading…
Reference in a new issue