Selaa lähdekoodia

LibWeb: Refactor TextNode::ChunkIterator

This commit refactors the text chunking algorithm used in
TextNode::ChunkIterator. The m_start_of_chunk member parameter has been
replaced with a local variable that's anchored to the current iterator
at the start of every next() call, and the algorithm is made a little
more clear by explicitly separating what can and cannot peek into the
next character during iteration.
sin-ack 3 vuotta sitten
vanhempi
commit
0342fe4e0c

+ 46 - 25
Userland/Libraries/LibWeb/Layout/TextNode.cpp

@@ -322,7 +322,6 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
     , m_wrap_lines(wrap_lines)
     , m_wrap_breaks(wrap_breaks)
     , m_utf8_view(text)
-    , m_start_of_chunk(m_utf8_view.begin())
     , m_iterator(m_utf8_view.begin())
 {
     m_last_was_space = !text.is_empty() && is_ascii_space(*m_utf8_view.begin());
@@ -330,66 +329,88 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
 
 Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
 {
+    if (m_iterator == m_utf8_view.end())
+        return {};
+
+    auto start_of_chunk = m_iterator;
     while (m_iterator != m_utf8_view.end()) {
-        auto guard = ScopeGuard([&] { ++m_iterator; });
-        if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
-            if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
+        ++m_iterator;
+
+        if (m_last_was_newline) {
+            // NOTE: This expression looks out for the case where we have
+            //       multiple newlines in a row. Because every output next()
+            //       that's a newline newline must be prepared for in advance by
+            //       the previous next() call, we need to check whether the next
+            //       character is a newline here as well. Otherwise, the newline
+            //       becomes part of the next expression and causes rendering
+            //       issues.
+            m_last_was_newline = m_iterator != m_utf8_view.end() && *m_iterator == '\n';
+            if (auto result = try_commit_chunk(start_of_chunk, m_iterator, true); result.has_value())
                 return result.release_value();
         }
-        if (m_last_was_newline) {
-            m_last_was_newline = false;
-            if (auto result = try_commit_chunk(m_iterator, true); result.has_value())
+
+        if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
+            if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
                 return result.release_value();
+            }
         }
+
+        // NOTE: The checks after this need to look at the current iterator
+        //       position, which depends on not being at the end.
+        if (m_iterator == m_utf8_view.end())
+            break;
+
+        // NOTE: When we're supposed to stop on linebreaks, we're actually
+        //       supposed to output two chunks: "content" and "\n". Since we
+        //       can't output two chunks at once, we store this information as a
+        //       flag to output the newline immediately at the earliest
+        //       opportunity.
         if (m_wrap_breaks && *m_iterator == '\n') {
             m_last_was_newline = true;
-            if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
+            if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
                 return result.release_value();
+            }
         }
+
         if (m_wrap_lines) {
             bool is_space = is_ascii_space(*m_iterator);
             if (is_space != m_last_was_space) {
                 m_last_was_space = is_space;
-                if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
+                if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
                     return result.release_value();
+                }
             }
         }
     }
 
-    if (m_last_was_newline) {
-        m_last_was_newline = false;
-        if (auto result = try_commit_chunk(m_utf8_view.end(), true); result.has_value())
-            return result.release_value();
-    }
-    if (m_start_of_chunk != m_utf8_view.end()) {
-        if (auto result = try_commit_chunk(m_utf8_view.end(), false, true); result.has_value())
+    if (start_of_chunk != m_utf8_view.end()) {
+        // Try to output whatever's left at the end of the text node.
+        if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.end(), false, true); result.has_value())
             return result.release_value();
     }
 
     return {};
 }
 
-Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& it, bool has_breaking_newline, bool must_commit)
+Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, bool must_commit)
 {
     if (m_layout_mode == LayoutMode::OnlyRequiredLineBreaks && !must_commit)
         return {};
 
-    auto start = m_utf8_view.byte_offset_of(m_start_of_chunk);
-    auto length = m_utf8_view.byte_offset_of(it) - m_utf8_view.byte_offset_of(m_start_of_chunk);
+    auto byte_offset = m_utf8_view.byte_offset_of(start);
+    auto byte_length = m_utf8_view.byte_offset_of(end) - byte_offset;
 
-    if (has_breaking_newline || length > 0) {
-        auto chunk_view = m_utf8_view.substring_view(start, length);
-        m_start_of_chunk = it;
+    if (byte_length > 0) {
+        auto chunk_view = m_utf8_view.substring_view(byte_offset, byte_length);
         return Chunk {
             .view = chunk_view,
-            .start = start,
-            .length = length,
+            .start = byte_offset,
+            .length = byte_length,
             .has_breaking_newline = has_breaking_newline,
             .is_all_whitespace = is_all_whitespace(chunk_view.as_string()),
         };
     }
 
-    m_start_of_chunk = it;
     return {};
 }
 

+ 1 - 2
Userland/Libraries/LibWeb/Layout/TextNode.h

@@ -41,7 +41,7 @@ public:
         Optional<Chunk> next();
 
     private:
-        Optional<Chunk> try_commit_chunk(Utf8View::Iterator const&, bool has_breaking_newline, bool must_commit = false);
+        Optional<Chunk> try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, bool must_commit = false);
 
         const LayoutMode m_layout_mode;
         const bool m_wrap_lines;
@@ -49,7 +49,6 @@ public:
         bool m_last_was_space { false };
         bool m_last_was_newline { false };
         Utf8View m_utf8_view;
-        Utf8View::Iterator m_start_of_chunk;
         Utf8View::Iterator m_iterator;
     };