mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibMarkdown: Handle CRLF line endings
Previously, MDDocument only split on Unix-style line endings. This adds a new function to StringView which handles LF, CR and CRLF.
This commit is contained in:
parent
035c4e15f4
commit
2eb5793d55
Notes:
sideshowbarker
2024-07-19 10:59:35 +09:00
Author: https://github.com/remyabel2 🔰 Commit: https://github.com/SerenityOS/serenity/commit/2eb5793d55b
4 changed files with 68 additions and 1 deletions
|
@ -40,6 +40,46 @@ Vector<StringView> StringView::split_view(const char separator, bool keep_empty)
|
|||
return v;
|
||||
}
|
||||
|
||||
Vector<StringView> StringView::lines(bool consider_cr) const
|
||||
{
|
||||
if (is_empty())
|
||||
return {};
|
||||
|
||||
if (!consider_cr)
|
||||
return split_view('\n', true);
|
||||
|
||||
Vector<StringView> v;
|
||||
ssize_t substart = 0;
|
||||
bool last_ch_was_cr = false;
|
||||
bool split_view = false;
|
||||
for (ssize_t i = 0; i < length(); ++i) {
|
||||
char ch = characters_without_null_termination()[i];
|
||||
if (ch == '\n') {
|
||||
split_view = true;
|
||||
if (last_ch_was_cr) {
|
||||
substart = i + 1;
|
||||
split_view = false;
|
||||
last_ch_was_cr = false;
|
||||
}
|
||||
}
|
||||
if (ch == '\r') {
|
||||
split_view = true;
|
||||
last_ch_was_cr = true;
|
||||
}
|
||||
if (split_view) {
|
||||
ssize_t sublen = i - substart;
|
||||
if (sublen != 0)
|
||||
v.append(substring_view(substart, sublen));
|
||||
substart = i + 1;
|
||||
}
|
||||
split_view = false;
|
||||
}
|
||||
ssize_t taillen = length() - substart;
|
||||
if (taillen != 0)
|
||||
v.append(substring_view(substart, taillen));
|
||||
return v;
|
||||
}
|
||||
|
||||
bool StringView::starts_with(const StringView& str) const
|
||||
{
|
||||
if (str.is_empty())
|
||||
|
|
|
@ -46,6 +46,12 @@ public:
|
|||
StringView substring_view(int start, int length) const;
|
||||
Vector<StringView> split_view(char, bool keep_empty = false) const;
|
||||
|
||||
// Create a Vector of StringViews split by line endings. As of CommonMark
|
||||
// 0.29, the spec defines a line ending as "a newline (U+000A), a carriage
|
||||
// return (U+000D) not followed by a newline, or a carriage return and a
|
||||
// following newline.".
|
||||
Vector<StringView> lines(bool consider_cr = true) const;
|
||||
|
||||
// FIXME: These should be shared between String and StringView somehow!
|
||||
unsigned to_uint(bool& ok) const;
|
||||
int to_int(bool& ok) const;
|
||||
|
|
|
@ -42,4 +42,25 @@ TEST_CASE(starts_with)
|
|||
EXPECT(!test_string_view.starts_with("DEF"));
|
||||
}
|
||||
|
||||
TEST_CASE(lines)
|
||||
{
|
||||
String test_string = "a\nb\r\nc\rd";
|
||||
StringView test_string_view = test_string.view();
|
||||
Vector<StringView> test_string_vector = test_string_view.lines();
|
||||
EXPECT_EQ(test_string_vector.size(), 4);
|
||||
EXPECT(test_string_vector.at(0) == String("a"));
|
||||
EXPECT(test_string_vector.at(1) == String("b"));
|
||||
EXPECT(test_string_vector.at(2) == String("c"));
|
||||
EXPECT(test_string_vector.at(3) == String("d"));
|
||||
|
||||
test_string = "```\nHello there\r\nHello there\n```";
|
||||
test_string_view = test_string.view();
|
||||
test_string_vector = test_string_view.lines();
|
||||
EXPECT_EQ(test_string_vector.size(), 4);
|
||||
EXPECT(test_string_vector.at(0) == String("```"));
|
||||
EXPECT(test_string_vector.at(1) == String("Hello there"));
|
||||
EXPECT(test_string_vector.at(2) == String("Hello there"));
|
||||
EXPECT(test_string_vector.at(3) == String("```"));
|
||||
}
|
||||
|
||||
TEST_MAIN(StringView)
|
||||
|
|
|
@ -49,7 +49,7 @@ static bool helper(Vector<StringView>::ConstIterator& lines, NonnullOwnPtrVector
|
|||
|
||||
bool MDDocument::parse(const StringView& str)
|
||||
{
|
||||
const Vector<StringView> lines_vec = str.split_view('\n', true);
|
||||
const Vector<StringView> lines_vec = str.lines();
|
||||
auto lines = lines_vec.begin();
|
||||
|
||||
while (true) {
|
||||
|
|
Loading…
Reference in a new issue