LibWeb: Make Node.normalize() ignore CDATASection nodes

We hadn't modeled the "exclusive text node" concept correctly.
Author: https://github.com/awesomekling Commit: https://github.com/LadybirdBrowser/ladybird/commit/ab0dc83d287 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2458
2024-11-21 23:20:20 +00:00 · 2024-11-20 11:31:59 +01:00 · 2024-11-20 11:31:59 +01:00 · ab0dc83d28 · 2024-11-20 15:12:01 +00:00
commit ab0dc83d28
parent 6fc06f45c2
4 changed files with 101 additions and 9 deletions
--- a/Libraries/LibWeb/DOM/Node.cpp
+++ b/Libraries/LibWeb/DOM/Node.cpp
@ -235,10 +235,7 @@ WebIDL::ExceptionOr<void> Node::normalize()
        Vector<Text*> nodes;
        auto* current_node = node.previous_sibling();
-        while (current_node) {
+        while (current_node && current_node->is_exclusive_text()) {
            if (!current_node->is_text())
                break;
            nodes.append(static_cast<Text*>(current_node));
            current_node = current_node->previous_sibling();
        }
@ -247,10 +244,7 @@ WebIDL::ExceptionOr<void> Node::normalize()
        nodes.reverse();
        current_node = node.next_sibling();
-        while (current_node) {
+        while (current_node && current_node->is_exclusive_text()) {
            if (!current_node->is_text())
                break;
            nodes.append(static_cast<Text*>(current_node));
            current_node = current_node->next_sibling();
        }
@ -291,7 +285,7 @@ WebIDL::ExceptionOr<void> Node::normalize()
        auto* current_node = node.next_sibling();
        // 6. While currentNode is an exclusive Text node:
-        while (current_node && is<Text>(*current_node)) {
+        while (current_node && current_node->is_exclusive_text()) {
            // 1. For each live range whose start node is currentNode, add length to its start offset and set its start node to node.
            for (auto& range : Range::live_ranges()) {
                if (range->start_container() == current_node)
--- a/Libraries/LibWeb/DOM/Node.h
+++ b/Libraries/LibWeb/DOM/Node.h
@ -111,6 +111,7 @@ public:
    NodeType type() const { return m_type; }
    bool is_element() const { return type() == NodeType::ELEMENT_NODE; }
    bool is_text() const { return type() == NodeType::TEXT_NODE || type() == NodeType::CDATA_SECTION_NODE; }
    bool is_exclusive_text() const { return type() == NodeType::TEXT_NODE; }
    bool is_document() const { return type() == NodeType::DOCUMENT_NODE; }
    bool is_document_type() const { return type() == NodeType::DOCUMENT_TYPE_NODE; }
    bool is_comment() const { return type() == NodeType::COMMENT_NODE; }
--- a/Tests/LibWeb/Text/expected/wpt-import/dom/nodes/Node-normalize.txt
+++ b/Tests/LibWeb/Text/expected/wpt-import/dom/nodes/Node-normalize.txt
@ -0,0 +1,14 @@
 Summary
 Harness status: OK
 Rerun
 Found 4 tests
 4 Pass
 Details
 Result	Test Name	MessagePass	Node.normalize()	
 Pass	Empty text nodes separated by a non-empty text node	
 Pass	Empty text nodes	
 Pass	Non-text nodes with empty textContent values.	
--- a/Tests/LibWeb/Text/input/wpt-import/dom/nodes/Node-normalize.html
+++ b/Tests/LibWeb/Text/input/wpt-import/dom/nodes/Node-normalize.html
@ -0,0 +1,83 @@
 <!DOCTYPE html>
 <title>Node.normalize()</title>
 <script src="../../resources/testharness.js"></script>
 <script src="../../resources/testharnessreport.js"></script>
 <div id=log></div>
 <script>
 test(function() {
  var df = document.createDocumentFragment(),
      t1 = document.createTextNode("1"),
      t2 = document.createTextNode("2"),
      t3 = document.createTextNode("3"),
      t4 = document.createTextNode("4")
  df.appendChild(t1)
  df.appendChild(t2)
  assert_equals(df.childNodes.length, 2)
  assert_equals(df.textContent, "12")
  var el = document.createElement('x')
  df.appendChild(el)
  el.appendChild(t3)
  el.appendChild(t4)
  document.normalize()
  assert_equals(el.childNodes.length, 2)
  assert_equals(el.textContent, "34")
  assert_equals(df.childNodes.length, 3)
  assert_equals(t1.data, "1")
  df.normalize()
  assert_equals(df.childNodes.length, 2)
  assert_equals(df.firstChild, t1)
  assert_equals(t1.data, "12")
  assert_equals(t2.data, "2")
  assert_equals(el.firstChild, t3)
  assert_equals(t3.data, "34")
  assert_equals(t4.data, "4")
 })
 // https://www.w3.org/Bugs/Public/show_bug.cgi?id=19837
 test(function() {
  var div = document.createElement("div")
  var t1 = div.appendChild(document.createTextNode(""))
  var t2 = div.appendChild(document.createTextNode("a"))
  var t3 = div.appendChild(document.createTextNode(""))
  assert_array_equals(div.childNodes, [t1, t2, t3])
  div.normalize();
  assert_array_equals(div.childNodes, [t2])
 }, "Empty text nodes separated by a non-empty text node")
 test(function() {
  var div = document.createElement("div")
  var t1 = div.appendChild(document.createTextNode(""))
  var t2 = div.appendChild(document.createTextNode(""))
  assert_array_equals(div.childNodes, [t1, t2])
  div.normalize();
  assert_array_equals(div.childNodes, [])
 }, "Empty text nodes")
 // The specification for normalize is clear that only "exclusive Text
 // nodes" are to be modified. This excludes CDATASection nodes, which
 // derive from Text. Naïve implementations may fail to skip
 // CDATASection nodes, or even worse, try to test textContent or
 // nodeValue without taking care to check the node type. They will
 // fail this test.
 test(function() {
  // We create an XML document so that we can create CDATASection.
  // Except for the CDATASection the result should be the same for
  // an HTML document. (No non-Text node should be touched.)
  var doc = new DOMParser().parseFromString("<div/>", "text/xml")
  var div = doc.documentElement
  var t1 = div.appendChild(doc.createTextNode("a"))
  // The first parameter is the "target" of the processing
  // instruction, and the 2nd is the text content.
  var t2 = div.appendChild(doc.createProcessingInstruction("pi", ""))
  var t3 = div.appendChild(doc.createTextNode("b"))
  var t4 = div.appendChild(doc.createCDATASection(""))
  var t5 = div.appendChild(doc.createTextNode("c"))
  var t6 = div.appendChild(doc.createComment(""))
  var t7 = div.appendChild(doc.createTextNode("d"))
  var t8 = div.appendChild(doc.createElement("el"))
  var t9 = div.appendChild(doc.createTextNode("e"))
  var expected = [t1, t2, t3, t4, t5, t6, t7, t8, t9]
  assert_array_equals(div.childNodes, expected)
  div.normalize();
  assert_array_equals(div.childNodes, expected)
 }, "Non-text nodes with empty textContent values.")
 </script>