Pārlūkot izejas kodu

LibHTML: Add basic <!DOCTYPE> parsing and a DocumentType class

Plus, Document::fixup() will now make sure that the document always
starts with a doctype node, followed by an <html> element.
Andreas Kling 5 gadi atpakaļ
vecāks
revīzija
fc53867937

+ 7 - 3
Libraries/LibHTML/DOM/Document.cpp

@@ -2,6 +2,7 @@
 #include <AK/StringBuilder.h>
 #include <LibHTML/CSS/StyleResolver.h>
 #include <LibHTML/DOM/Document.h>
+#include <LibHTML/DOM/DocumentType.h>
 #include <LibHTML/DOM/Element.h>
 #include <LibHTML/DOM/HTMLBodyElement.h>
 #include <LibHTML/DOM/HTMLHeadElement.h>
@@ -29,11 +30,14 @@ StyleResolver& Document::style_resolver()
 
 void Document::fixup()
 {
-    if (is<HTMLHtmlElement>(first_child()))
+    if (!is<DocumentType>(first_child()))
+        prepend_child(adopt(*new DocumentType(*this)));
+
+    if (is<HTMLHtmlElement>(first_child()->next_sibling()))
         return;
 
-    NonnullRefPtr<Element> body = adopt(*new Element(*this, "body"));
-    NonnullRefPtr<Element> html = adopt(*new Element(*this, "html"));
+    auto body = adopt(*new HTMLBodyElement(*this, "body"));
+    auto html = adopt(*new HTMLHtmlElement(*this, "html"));
     html->append_child(body);
     this->donate_all_children_to(body);
     this->append_child(html);

+ 10 - 0
Libraries/LibHTML/DOM/DocumentType.cpp

@@ -0,0 +1,10 @@
+#include <LibHTML/DOM/DocumentType.h>
+
+DocumentType::DocumentType(Document& document)
+    : Node(document, NodeType::DOCUMENT_TYPE_NODE)
+{
+}
+
+DocumentType::~DocumentType()
+{
+}

+ 17 - 0
Libraries/LibHTML/DOM/DocumentType.h

@@ -0,0 +1,17 @@
+#pragma once
+
+#include <LibHTML/DOM/Node.h>
+
+class DocumentType final : public Node {
+public:
+    explicit DocumentType(Document&);
+    virtual ~DocumentType() override;
+
+    virtual String tag_name() const override { return "!DOCTYPE"; }
+};
+
+template<>
+inline bool is<DocumentType>(const Node& node)
+{
+    return node.type() == NodeType::DOCUMENT_TYPE_NODE;
+}

+ 5 - 0
Libraries/LibHTML/DOM/Node.cpp

@@ -97,3 +97,8 @@ const Element* Node::previous_element_sibling() const
     }
     return nullptr;
 }
+
+RefPtr<LayoutNode> Node::create_layout_node(const StyleResolver&, const StyleProperties*) const
+{
+    return nullptr;
+}

+ 3 - 1
Libraries/LibHTML/DOM/Node.h

@@ -11,6 +11,7 @@ enum class NodeType : unsigned {
     ELEMENT_NODE = 1,
     TEXT_NODE = 3,
     DOCUMENT_NODE = 9,
+    DOCUMENT_TYPE_NODE = 10,
 };
 
 class Document;
@@ -30,9 +31,10 @@ public:
     bool is_element() const { return type() == NodeType::ELEMENT_NODE; }
     bool is_text() const { return type() == NodeType::TEXT_NODE; }
     bool is_document() const { return type() == NodeType::DOCUMENT_NODE; }
+    bool is_document_type() const { return type() == NodeType::DOCUMENT_TYPE_NODE; }
     bool is_parent_node() const { return is_element() || is_document(); }
 
-    virtual RefPtr<LayoutNode> create_layout_node(const StyleResolver&, const StyleProperties* parent_style) const = 0;
+    virtual RefPtr<LayoutNode> create_layout_node(const StyleResolver&, const StyleProperties* parent_style) const;
     RefPtr<LayoutNode> create_layout_tree(const StyleResolver&, const StyleProperties* parent_style) const;
 
     virtual String tag_name() const = 0;

+ 3 - 0
Libraries/LibHTML/Dump.cpp

@@ -1,6 +1,7 @@
 #include <AK/Utf8View.h>
 #include <LibHTML/CSS/StyleSheet.h>
 #include <LibHTML/DOM/Document.h>
+#include <LibHTML/DOM/DocumentType.h>
 #include <LibHTML/DOM/Element.h>
 #include <LibHTML/DOM/Text.h>
 #include <LibHTML/Dump.h>
@@ -24,6 +25,8 @@ void dump_tree(const Node& node)
         dbgprintf(">\n");
     } else if (is<Text>(node)) {
         dbgprintf("\"%s\"\n", static_cast<const Text&>(node).data().characters());
+    } else if (is<DocumentType>(node)) {
+        dbgprintf("<!DOCTYPE>\n");
     }
     ++indent;
     if (is<ParentNode>(node)) {

+ 1 - 0
Libraries/LibHTML/Makefile.shared

@@ -16,6 +16,7 @@ LIBHTML_OBJS = \
     DOM/HTMLLinkElement.o \
     DOM/Document.o \
     DOM/Text.o \
+    DOM/DocumentType.o \
     CSS/Selector.o \
     CSS/StyleSheet.o \
     CSS/StyleRule.o \

+ 21 - 2
Libraries/LibHTML/Parser/HTMLParser.cpp

@@ -1,6 +1,7 @@
 #include <AK/Function.h>
 #include <AK/NonnullRefPtrVector.h>
 #include <AK/StringBuilder.h>
+#include <LibHTML/DOM/DocumentType.h>
 #include <LibHTML/DOM/Element.h>
 #include <LibHTML/DOM/HTMLAnchorElement.h>
 #include <LibHTML/DOM/HTMLBodyElement.h>
@@ -10,9 +11,9 @@
 #include <LibHTML/DOM/HTMLHeadingElement.h>
 #include <LibHTML/DOM/HTMLHtmlElement.h>
 #include <LibHTML/DOM/HTMLImageElement.h>
+#include <LibHTML/DOM/HTMLLinkElement.h>
 #include <LibHTML/DOM/HTMLStyleElement.h>
 #include <LibHTML/DOM/HTMLTitleElement.h>
-#include <LibHTML/DOM/HTMLLinkElement.h>
 #include <LibHTML/DOM/Text.h>
 #include <LibHTML/Parser/HTMLParser.h>
 #include <ctype.h>
@@ -106,10 +107,12 @@ NonnullRefPtr<Document> parse_html(const StringView& html, const URL& url)
     Vector<char, 256> attribute_value_buffer;
 
     bool is_slash_tag = false;
+    bool is_exclamation_tag = false;
 
     auto move_to_state = [&](State new_state) {
         if (new_state == State::BeforeTagName) {
             is_slash_tag = false;
+            is_exclamation_tag = false;
             tag_name_buffer.clear();
             attributes.clear();
         }
@@ -142,8 +145,19 @@ NonnullRefPtr<Document> parse_html(const StringView& html, const URL& url)
             close_tag();
     };
 
+    auto handle_exclamation_tag = [&] {
+        auto name = String::copy(tag_name_buffer);
+        tag_name_buffer.clear();
+        ASSERT(name == "DOCTYPE");
+        if (node_stack.size() != 1)
+            node_stack[node_stack.size() - 2].append_child(adopt(*new DocumentType(document)), false);
+        close_tag();
+    };
+
     auto commit_tag = [&] {
-        if (is_slash_tag)
+        if (is_exclamation_tag)
+            handle_exclamation_tag();
+        else if (is_slash_tag)
             close_tag();
         else
             open_tag();
@@ -159,6 +173,7 @@ NonnullRefPtr<Document> parse_html(const StringView& html, const URL& url)
         case State::Free:
             if (ch == '<') {
                 is_slash_tag = false;
+                is_exclamation_tag = false;
                 move_to_state(State::BeforeTagName);
                 break;
             }
@@ -193,6 +208,10 @@ NonnullRefPtr<Document> parse_html(const StringView& html, const URL& url)
                 is_slash_tag = true;
                 break;
             }
+            if (ch == '!') {
+                is_exclamation_tag = true;
+                break;
+            }
             if (ch == '>') {
                 move_to_state(State::Free);
                 break;

+ 17 - 0
Libraries/LibHTML/TreeNode.h

@@ -33,6 +33,7 @@ public:
     const T* first_child() const { return m_first_child; }
     const T* last_child() const { return m_last_child; }
 
+    void prepend_child(NonnullRefPtr<T> node, bool call_inserted_into = true);
     void append_child(NonnullRefPtr<T> node, bool call_inserted_into = true);
     void donate_all_children_to(T& node);
 
@@ -64,6 +65,22 @@ inline void TreeNode<T>::append_child(NonnullRefPtr<T> node, bool call_inserted_
     (void)node.leak_ref();
 }
 
+template<typename T>
+inline void TreeNode<T>::prepend_child(NonnullRefPtr<T> node, bool call_inserted_into)
+{
+    ASSERT(!node->m_parent);
+    if (m_first_child)
+        m_first_child->m_previous_sibling = node.ptr();
+    node->m_next_sibling = m_first_child;
+    node->m_parent = static_cast<T*>(this);
+    m_first_child = node.ptr();
+    if (!m_last_child)
+        m_last_child = m_first_child;
+    if (call_inserted_into)
+        node->inserted_into(static_cast<T&>(*this));
+    (void)node.leak_ref();
+}
+
 template<typename T>
 inline void TreeNode<T>::donate_all_children_to(T& node)
 {