LibMarkdown: Handle CRLF line endings

Previously, MDDocument only split on Unix-style line endings. This adds
a new function to StringView which handles LF, CR and CRLF.
This commit is contained in:
Tommy Nguyen 2019-12-02 07:42:33 -05:00 committed by Andreas Kling
parent 035c4e15f4
commit 2eb5793d55
Notes: sideshowbarker 2024-07-19 10:59:35 +09:00
4 changed files with 68 additions and 1 deletions

View file

@ -40,6 +40,46 @@ Vector<StringView> StringView::split_view(const char separator, bool keep_empty)
return v; return v;
} }
Vector<StringView> StringView::lines(bool consider_cr) const
{
if (is_empty())
return {};
if (!consider_cr)
return split_view('\n', true);
Vector<StringView> v;
ssize_t substart = 0;
bool last_ch_was_cr = false;
bool split_view = false;
for (ssize_t i = 0; i < length(); ++i) {
char ch = characters_without_null_termination()[i];
if (ch == '\n') {
split_view = true;
if (last_ch_was_cr) {
substart = i + 1;
split_view = false;
last_ch_was_cr = false;
}
}
if (ch == '\r') {
split_view = true;
last_ch_was_cr = true;
}
if (split_view) {
ssize_t sublen = i - substart;
if (sublen != 0)
v.append(substring_view(substart, sublen));
substart = i + 1;
}
split_view = false;
}
ssize_t taillen = length() - substart;
if (taillen != 0)
v.append(substring_view(substart, taillen));
return v;
}
bool StringView::starts_with(const StringView& str) const bool StringView::starts_with(const StringView& str) const
{ {
if (str.is_empty()) if (str.is_empty())

View file

@ -46,6 +46,12 @@ public:
StringView substring_view(int start, int length) const; StringView substring_view(int start, int length) const;
Vector<StringView> split_view(char, bool keep_empty = false) const; Vector<StringView> split_view(char, bool keep_empty = false) const;
// Create a Vector of StringViews split by line endings. As of CommonMark
// 0.29, the spec defines a line ending as "a newline (U+000A), a carriage
// return (U+000D) not followed by a newline, or a carriage return and a
// following newline.".
Vector<StringView> lines(bool consider_cr = true) const;
// FIXME: These should be shared between String and StringView somehow! // FIXME: These should be shared between String and StringView somehow!
unsigned to_uint(bool& ok) const; unsigned to_uint(bool& ok) const;
int to_int(bool& ok) const; int to_int(bool& ok) const;

View file

@ -42,4 +42,25 @@ TEST_CASE(starts_with)
EXPECT(!test_string_view.starts_with("DEF")); EXPECT(!test_string_view.starts_with("DEF"));
} }
TEST_CASE(lines)
{
String test_string = "a\nb\r\nc\rd";
StringView test_string_view = test_string.view();
Vector<StringView> test_string_vector = test_string_view.lines();
EXPECT_EQ(test_string_vector.size(), 4);
EXPECT(test_string_vector.at(0) == String("a"));
EXPECT(test_string_vector.at(1) == String("b"));
EXPECT(test_string_vector.at(2) == String("c"));
EXPECT(test_string_vector.at(3) == String("d"));
test_string = "```\nHello there\r\nHello there\n```";
test_string_view = test_string.view();
test_string_vector = test_string_view.lines();
EXPECT_EQ(test_string_vector.size(), 4);
EXPECT(test_string_vector.at(0) == String("```"));
EXPECT(test_string_vector.at(1) == String("Hello there"));
EXPECT(test_string_vector.at(2) == String("Hello there"));
EXPECT(test_string_vector.at(3) == String("```"));
}
TEST_MAIN(StringView) TEST_MAIN(StringView)

View file

@ -49,7 +49,7 @@ static bool helper(Vector<StringView>::ConstIterator& lines, NonnullOwnPtrVector
bool MDDocument::parse(const StringView& str) bool MDDocument::parse(const StringView& str)
{ {
const Vector<StringView> lines_vec = str.split_view('\n', true); const Vector<StringView> lines_vec = str.lines();
auto lines = lines_vec.begin(); auto lines = lines_vec.begin();
while (true) { while (true) {