LibWeb: Make Node.normalize() ignore CDATASection nodes

We hadn't modeled the "exclusive text node" concept correctly.
Author: https://github.com/awesomekling Commit: https://github.com/LadybirdBrowser/ladybird/commit/ab0dc83d287 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2458
2024-11-21 15:10:19 +00:00 · 2024-11-20 11:31:59 +01:00 · 2024-11-20 11:31:59 +01:00 · ab0dc83d28 · 2024-11-20 15:12:01 +00:00
commit ab0dc83d28
parent 6fc06f45c2
4 changed files with 101 additions and 9 deletions
--- a/Libraries/LibWeb/DOM/Node.cpp
+++ b/Libraries/LibWeb/DOM/Node.cpp
@ -235,10 +235,7 @@ WebIDL::ExceptionOr<void> Node::normalize()
        Vector<Text*> nodes;

        auto* current_node = node.previous_sibling();
-        while (current_node) {
-            if (!current_node->is_text())
-                break;
-
+        while (current_node && current_node->is_exclusive_text()) {
            nodes.append(static_cast<Text*>(current_node));
            current_node = current_node->previous_sibling();
        }
@ -247,10 +244,7 @@ WebIDL::ExceptionOr<void> Node::normalize()
        nodes.reverse();

        current_node = node.next_sibling();
-        while (current_node) {
-            if (!current_node->is_text())
-                break;
-
+        while (current_node && current_node->is_exclusive_text()) {
            nodes.append(static_cast<Text*>(current_node));
            current_node = current_node->next_sibling();
        }
@ -291,7 +285,7 @@ WebIDL::ExceptionOr<void> Node::normalize()
        auto* current_node = node.next_sibling();

        // 6. While currentNode is an exclusive Text node:
-        while (current_node && is<Text>(*current_node)) {
+        while (current_node && current_node->is_exclusive_text()) {
            // 1. For each live range whose start node is currentNode, add length to its start offset and set its start node to node.
            for (auto& range : Range::live_ranges()) {
                if (range->start_container() == current_node)
--- a/Libraries/LibWeb/DOM/Node.h
+++ b/Libraries/LibWeb/DOM/Node.h
@ -111,6 +111,7 @@ public:
    NodeType type() const { return m_type; }
    bool is_element() const { return type() == NodeType::ELEMENT_NODE; }
    bool is_text() const { return type() == NodeType::TEXT_NODE || type() == NodeType::CDATA_SECTION_NODE; }
+    bool is_exclusive_text() const { return type() == NodeType::TEXT_NODE; }
    bool is_document() const { return type() == NodeType::DOCUMENT_NODE; }
    bool is_document_type() const { return type() == NodeType::DOCUMENT_TYPE_NODE; }
    bool is_comment() const { return type() == NodeType::COMMENT_NODE; }
--- a/Tests/LibWeb/Text/expected/wpt-import/dom/nodes/Node-normalize.txt
+++ b/Tests/LibWeb/Text/expected/wpt-import/dom/nodes/Node-normalize.txt
@ -0,0 +1,14 @@
+Summary
+
+Harness status: OK
+
+Rerun
+
+Found 4 tests
+
+4 Pass
+Details
+Result	Test Name	MessagePass	Node.normalize()	
+Pass	Empty text nodes separated by a non-empty text node	
+Pass	Empty text nodes	
+Pass	Non-text nodes with empty textContent values.	
--- a/Tests/LibWeb/Text/input/wpt-import/dom/nodes/Node-normalize.html
+++ b/Tests/LibWeb/Text/input/wpt-import/dom/nodes/Node-normalize.html
@ -0,0 +1,83 @@
+<!DOCTYPE html>
+<title>Node.normalize()</title>
+<script src="../../resources/testharness.js"></script>
+<script src="../../resources/testharnessreport.js"></script>
+<div id=log></div>
+<script>
+test(function() {
+  var df = document.createDocumentFragment(),
+      t1 = document.createTextNode("1"),
+      t2 = document.createTextNode("2"),
+      t3 = document.createTextNode("3"),
+      t4 = document.createTextNode("4")
+  df.appendChild(t1)
+  df.appendChild(t2)
+  assert_equals(df.childNodes.length, 2)
+  assert_equals(df.textContent, "12")
+  var el = document.createElement('x')
+  df.appendChild(el)
+  el.appendChild(t3)
+  el.appendChild(t4)
+  document.normalize()
+  assert_equals(el.childNodes.length, 2)
+  assert_equals(el.textContent, "34")
+  assert_equals(df.childNodes.length, 3)
+  assert_equals(t1.data, "1")
+  df.normalize()
+  assert_equals(df.childNodes.length, 2)
+  assert_equals(df.firstChild, t1)
+  assert_equals(t1.data, "12")
+  assert_equals(t2.data, "2")
+  assert_equals(el.firstChild, t3)
+  assert_equals(t3.data, "34")
+  assert_equals(t4.data, "4")
+})
+
+// https://www.w3.org/Bugs/Public/show_bug.cgi?id=19837
+test(function() {
+  var div = document.createElement("div")
+  var t1 = div.appendChild(document.createTextNode(""))
+  var t2 = div.appendChild(document.createTextNode("a"))
+  var t3 = div.appendChild(document.createTextNode(""))
+  assert_array_equals(div.childNodes, [t1, t2, t3])
+  div.normalize();
+  assert_array_equals(div.childNodes, [t2])
+}, "Empty text nodes separated by a non-empty text node")
+test(function() {
+  var div = document.createElement("div")
+  var t1 = div.appendChild(document.createTextNode(""))
+  var t2 = div.appendChild(document.createTextNode(""))
+  assert_array_equals(div.childNodes, [t1, t2])
+  div.normalize();
+  assert_array_equals(div.childNodes, [])
+}, "Empty text nodes")
+
+// The specification for normalize is clear that only "exclusive Text
+// nodes" are to be modified. This excludes CDATASection nodes, which
+// derive from Text. Naïve implementations may fail to skip
+// CDATASection nodes, or even worse, try to test textContent or
+// nodeValue without taking care to check the node type. They will
+// fail this test.
+test(function() {
+  // We create an XML document so that we can create CDATASection.
+  // Except for the CDATASection the result should be the same for
+  // an HTML document. (No non-Text node should be touched.)
+  var doc = new DOMParser().parseFromString("<div/>", "text/xml")
+  var div = doc.documentElement
+  var t1 = div.appendChild(doc.createTextNode("a"))
+  // The first parameter is the "target" of the processing
+  // instruction, and the 2nd is the text content.
+  var t2 = div.appendChild(doc.createProcessingInstruction("pi", ""))
+  var t3 = div.appendChild(doc.createTextNode("b"))
+  var t4 = div.appendChild(doc.createCDATASection(""))
+  var t5 = div.appendChild(doc.createTextNode("c"))
+  var t6 = div.appendChild(doc.createComment(""))
+  var t7 = div.appendChild(doc.createTextNode("d"))
+  var t8 = div.appendChild(doc.createElement("el"))
+  var t9 = div.appendChild(doc.createTextNode("e"))
+  var expected = [t1, t2, t3, t4, t5, t6, t7, t8, t9]
+  assert_array_equals(div.childNodes, expected)
+  div.normalize();
+  assert_array_equals(div.childNodes, expected)
+}, "Non-text nodes with empty textContent values.")
+</script>