瀏覽代碼

LibWeb: Implement quirks mode detection

This allows us to determine which mode to render the page in.

Exposes "doctype" and "compatMode" on Document.
Exposes "name", "publicId" and "systemId" on DocumentType.
Luke 5 年之前
父節點
當前提交
19d6884529

+ 3 - 0
Libraries/LibWeb/Bindings/NodeWrapperFactory.cpp

@@ -25,6 +25,7 @@
  */
 
 #include <LibWeb/Bindings/DocumentWrapper.h>
+#include <LibWeb/Bindings/DocumentTypeWrapper.h>
 #include <LibWeb/Bindings/HTMLCanvasElementWrapper.h>
 #include <LibWeb/Bindings/HTMLImageElementWrapper.h>
 #include <LibWeb/Bindings/HTMLElementWrapper.h>
@@ -41,6 +42,8 @@ NodeWrapper* wrap(JS::GlobalObject& global_object, Node& node)
 {
     if (is<Document>(node))
         return static_cast<NodeWrapper*>(wrap_impl(global_object, to<Document>(node)));
+    if (is<DocumentType>(node))
+        return static_cast<NodeWrapper*>(wrap_impl(global_object, to<DocumentType>(node)));
     if (is<HTMLCanvasElement>(node))
         return static_cast<NodeWrapper*>(wrap_impl(global_object, to<HTMLCanvasElement>(node)));
     if (is<HTMLImageElement>(node))

+ 1 - 0
Libraries/LibWeb/CMakeLists.txt

@@ -156,6 +156,7 @@ endfunction()
 libweb_js_wrapper(EventTarget)
 libweb_js_wrapper(Node)
 libweb_js_wrapper(Document)
+libweb_js_wrapper(DocumentType)
 libweb_js_wrapper(Element)
 libweb_js_wrapper(HTMLElement)
 libweb_js_wrapper(HTMLImageElement)

+ 1 - 0
Libraries/LibWeb/CodeGenerators/WrapperGenerator.cpp

@@ -423,6 +423,7 @@ void generate_implementation(const IDL::Interface& interface)
     out() << "#include <LibWeb/DOM/Element.h>";
     out() << "#include <LibWeb/DOM/HTMLElement.h>";
     out() << "#include <LibWeb/DOM/EventListener.h>";
+    out() << "#include <LibWeb/Bindings/DocumentTypeWrapper.h>";
     out() << "#include <LibWeb/Bindings/HTMLCanvasElementWrapper.h>";
     out() << "#include <LibWeb/Bindings/HTMLImageElementWrapper.h>";
     out() << "#include <LibWeb/Bindings/ImageDataWrapper.h>";

+ 16 - 0
Libraries/LibWeb/DOM/Document.cpp

@@ -477,4 +477,20 @@ void Document::adopt_node(Node& subtree_root)
     });
 }
 
+const DocumentType* Document::doctype() const
+{
+    return first_child_of_type<DocumentType>();
+}
+
+const String& Document::compat_mode() const
+{
+    static String back_compat = "BackCompat";
+    static String css1_compat = "CSS1Compat";
+
+    if (m_quirks_mode == QuirksMode::Yes)
+        return back_compat;
+
+    return css1_compat;
+}
+
 }

+ 13 - 3
Libraries/LibWeb/DOM/Document.h

@@ -43,6 +43,12 @@
 
 namespace Web {
 
+enum class QuirksMode {
+    No,
+    Limited,
+    Yes
+};
+
 class Document
     : public ParentNode
     , public NonElementParentNode<Document> {
@@ -142,11 +148,15 @@ public:
     void add_script_to_execute_as_soon_as_possible(Badge<HTMLScriptElement>, HTMLScriptElement&);
     NonnullRefPtrVector<HTMLScriptElement> take_scripts_to_execute_as_soon_as_possible(Badge<HTMLDocumentParser>);
 
-    bool in_quirks_mode() const { return m_quirks_mode; }
-    void set_quirks_mode(bool mode) { m_quirks_mode = mode; }
+    QuirksMode mode() const { return m_quirks_mode; }
+    bool in_quirks_mode() const { return m_quirks_mode == QuirksMode::Yes; }
+    void set_quirks_mode(QuirksMode mode) { m_quirks_mode = mode; }
 
     void adopt_node(Node&);
 
+    const DocumentType* doctype() const;
+    const String& compat_mode() const;
+
 private:
     virtual RefPtr<LayoutNode> create_layout_node(const StyleProperties* parent_style) override;
 
@@ -175,7 +185,7 @@ private:
     NonnullRefPtrVector<HTMLScriptElement> m_scripts_to_execute_when_parsing_has_finished;
     NonnullRefPtrVector<HTMLScriptElement> m_scripts_to_execute_as_soon_as_possible;
 
-    bool m_quirks_mode { false };
+    QuirksMode m_quirks_mode { QuirksMode::No };
 };
 
 template<>

+ 3 - 0
Libraries/LibWeb/DOM/Document.idl

@@ -6,6 +6,9 @@ interface Document : Node {
     ArrayFromVector querySelectorAll(DOMString selectors);
     Element createElement(DOMString tagName);
 
+    readonly attribute DOMString compatMode;
+    readonly attribute DocumentType? doctype;
+
     readonly attribute HTMLElement? body;
 
 }

+ 10 - 0
Libraries/LibWeb/DOM/DocumentType.h

@@ -33,6 +33,8 @@ namespace Web {
 
 class DocumentType final : public Node {
 public:
+    using WrapperType = Bindings::DocumentTypeWrapper;
+
     explicit DocumentType(Document&);
     virtual ~DocumentType() override;
 
@@ -41,8 +43,16 @@ public:
     const String& name() const { return m_name; }
     void set_name(const String& name) { m_name = name; }
 
+    const String& public_id() const { return m_public_id; }
+    void set_public_id(const String& public_id) { m_public_id = public_id; }
+
+    const String& system_id() const { return m_system_id; }
+    void set_system_id(const String& system_id) { m_system_id = system_id; }
+
 private:
     String m_name;
+    String m_public_id;
+    String m_system_id;
 };
 
 template<>

+ 7 - 0
Libraries/LibWeb/DOM/DocumentType.idl

@@ -0,0 +1,7 @@
+interface DocumentType : Node {
+
+    readonly attribute DOMString name;
+    readonly attribute DOMString publicId;
+    readonly attribute DOMString systemId;
+
+}

+ 3 - 0
Libraries/LibWeb/Forward.h

@@ -30,6 +30,7 @@ namespace Web {
 
 class CanvasRenderingContext2D;
 class Document;
+class DocumentType;
 class Element;
 class Event;
 class EventHandler;
@@ -72,11 +73,13 @@ class Text;
 class Timer;
 class Window;
 class XMLHttpRequest;
+enum class QuirksMode;
 
 namespace Bindings {
 
 class CanvasRenderingContext2DWrapper;
 class DocumentWrapper;
+class DocumentTypeWrapper;
 class ElementWrapper;
 class EventWrapper;
 class EventListenerWrapper;

+ 117 - 3
Libraries/LibWeb/Parser/HTMLDocumentParser.cpp

@@ -46,6 +46,64 @@
 
 namespace Web {
 
+static Vector<FlyString> s_quirks_public_ids = {
+    "+//Silmaril//dtd html Pro v0r11 19970101//",
+    "-//AS//DTD HTML 3.0 asWedit + extensions//",
+    "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//",
+    "-//IETF//DTD HTML 2.0 Level 1//",
+    "-//IETF//DTD HTML 2.0 Level 2//",
+    "-//IETF//DTD HTML 2.0 Strict Level 1//",
+    "-//IETF//DTD HTML 2.0 Strict Level 2//",
+    "-//IETF//DTD HTML 2.0 Strict//",
+    "-//IETF//DTD HTML 2.0//",
+    "-//IETF//DTD HTML 2.1E//",
+    "-//IETF//DTD HTML 3.0//",
+    "-//IETF//DTD HTML 3.2 Final//",
+    "-//IETF//DTD HTML 3.2//",
+    "-//IETF//DTD HTML 3//",
+    "-//IETF//DTD HTML Level 0//",
+    "-//IETF//DTD HTML Level 1//",
+    "-//IETF//DTD HTML Level 2//",
+    "-//IETF//DTD HTML Level 3//",
+    "-//IETF//DTD HTML Strict Level 0//",
+    "-//IETF//DTD HTML Strict Level 1//",
+    "-//IETF//DTD HTML Strict Level 2//",
+    "-//IETF//DTD HTML Strict Level 3//",
+    "-//IETF//DTD HTML Strict//",
+    "-//IETF//DTD HTML//",
+    "-//Metrius//DTD Metrius Presentational//",
+    "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//",
+    "-//Microsoft//DTD Internet Explorer 2.0 HTML//",
+    "-//Microsoft//DTD Internet Explorer 2.0 Tables//",
+    "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//",
+    "-//Microsoft//DTD Internet Explorer 3.0 HTML//",
+    "-//Microsoft//DTD Internet Explorer 3.0 Tables//",
+    "-//Netscape Comm. Corp.//DTD HTML//",
+    "-//Netscape Comm. Corp.//DTD Strict HTML//",
+    "-//O'Reilly and Associates//DTD HTML 2.0//",
+    "-//O'Reilly and Associates//DTD HTML Extended 1.0//",
+    "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//",
+    "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//",
+    "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//",
+    "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//",
+    "-//Spyglass//DTD HTML 2.0 Extended//",
+    "-//Sun Microsystems Corp.//DTD HotJava HTML//",
+    "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//",
+    "-//W3C//DTD HTML 3 1995-03-24//",
+    "-//W3C//DTD HTML 3.2 Draft//",
+    "-//W3C//DTD HTML 3.2 Final//",
+    "-//W3C//DTD HTML 3.2//",
+    "-//W3C//DTD HTML 3.2S Draft//",
+    "-//W3C//DTD HTML 4.0 Frameset//",
+    "-//W3C//DTD HTML 4.0 Transitional//",
+    "-//W3C//DTD HTML Experimental 19960712//",
+    "-//W3C//DTD HTML Experimental 970421//",
+    "-//W3C//DTD W3 HTML//",
+    "-//W3O//DTD W3 HTML 3.0//",
+    "-//WebTechs//DTD Mozilla HTML 2.0//",
+    "-//WebTechs//DTD Mozilla HTML//"
+};
+
 RefPtr<Document> parse_html_document(const StringView& data, const URL& url, const String& encoding)
 {
     HTMLDocumentParser parser(data, encoding);
@@ -181,6 +239,60 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok
     }
 }
 
+QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_token) const
+{
+    if (doctype_token.m_doctype.force_quirks)
+        return QuirksMode::Yes;
+
+    // NOTE: The tokenizer puts the name into lower case for us.
+    if (doctype_token.m_doctype.name.to_string() != "html")
+        return QuirksMode::Yes;
+
+    auto public_identifier = doctype_token.m_doctype.public_identifier.to_string();
+    auto system_identifier = doctype_token.m_doctype.system_identifier.to_string();
+
+    if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//"))
+        return QuirksMode::Yes;
+
+    if (public_identifier.equals_ignoring_case("-/W3C/DTD HTML 4.0 Transitional/EN"))
+        return QuirksMode::Yes;
+
+    if (public_identifier.equals_ignoring_case("HTML"))
+        return QuirksMode::Yes;
+
+    if (system_identifier.equals_ignoring_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))
+        return QuirksMode::Yes;
+
+    for (auto& public_id : s_quirks_public_ids) {
+        if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive))
+            return QuirksMode::Yes;
+    }
+
+    if (doctype_token.m_doctype.missing_system_identifier) {
+        if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
+            return QuirksMode::Yes;
+
+        if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive))
+            return QuirksMode::Yes;
+    }
+
+    if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//", CaseSensitivity::CaseInsensitive))
+        return QuirksMode::Limited;
+
+    if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive))
+        return QuirksMode::Limited;
+
+    if (!doctype_token.m_doctype.missing_system_identifier) {
+        if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
+            return QuirksMode::Limited;
+
+        if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive))
+            return QuirksMode::Limited;
+    }
+
+    return QuirksMode::No;
+}
+
 void HTMLDocumentParser::handle_initial(HTMLToken& token)
 {
     if (token.is_character() && token.is_parser_whitespace()) {
@@ -196,14 +308,16 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token)
     if (token.is_doctype()) {
         auto doctype = adopt(*new DocumentType(document()));
         doctype->set_name(token.m_doctype.name.to_string());
+        doctype->set_public_id(token.m_doctype.public_identifier.to_string());
+        doctype->set_system_id(token.m_doctype.system_identifier.to_string());
         document().append_child(move(doctype));
-        document().set_quirks_mode(token.m_doctype.force_quirks);
+        document().set_quirks_mode(which_quirks_mode(token));
         m_insertion_mode = InsertionMode::BeforeHTML;
         return;
     }
 
     PARSE_ERROR();
-    document().set_quirks_mode(true);
+    document().set_quirks_mode(QuirksMode::Yes);
     m_insertion_mode = InsertionMode::BeforeHTML;
     process_using_the_rules_for(InsertionMode::BeforeHTML, token);
 }
@@ -2612,7 +2726,7 @@ NonnullRefPtrVector<Node> HTMLDocumentParser::parse_html_fragment(Element& conte
 {
     HTMLDocumentParser parser(markup, "utf-8");
     parser.m_parsing_fragment = true;
-    parser.document().set_quirks_mode(context_element.document().in_quirks_mode());
+    parser.document().set_quirks_mode(context_element.document().mode());
 
     if (context_element.tag_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
         parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);

+ 2 - 0
Libraries/LibWeb/Parser/HTMLDocumentParser.h

@@ -85,6 +85,8 @@ public:
 private:
     const char* insertion_mode_name() const;
 
+    QuirksMode which_quirks_mode(const HTMLToken&) const;
+
     void handle_initial(HTMLToken&);
     void handle_before_html(HTMLToken&);
     void handle_before_head(HTMLToken&);

+ 5 - 0
Libraries/LibWeb/Parser/HTMLToken.h

@@ -174,9 +174,14 @@ private:
 
     // Type::DOCTYPE
     struct {
+        // NOTE: "Missing" is a distinct state from the empty string.
+
         StringBuilder name;
+        bool missing_name { true };
         StringBuilder public_identifier;
+        bool missing_public_identifier { true };
         StringBuilder system_identifier;
+        bool missing_system_identifier { true };
         bool force_quirks { false };
     } m_doctype;
 

+ 15 - 0
Libraries/LibWeb/Parser/HTMLTokenizer.cpp

@@ -455,6 +455,7 @@ _StartOfFunction:
                 {
                     create_new_token(HTMLToken::Type::DOCTYPE);
                     m_current_token.m_doctype.name.append(tolower(current_input_character.value()));
+                    m_current_token.m_doctype.missing_name = false;
                     SWITCH_TO(DOCTYPEName);
                 }
                 ON(0)
@@ -462,6 +463,7 @@ _StartOfFunction:
                     PARSE_ERROR();
                     create_new_token(HTMLToken::Type::DOCTYPE);
                     m_current_token.m_doctype.name.append_codepoint(0xFFFD);
+                    m_current_token.m_doctype.missing_name = false;
                     SWITCH_TO(DOCTYPEName);
                 }
                 ON('>')
@@ -483,6 +485,7 @@ _StartOfFunction:
                 {
                     create_new_token(HTMLToken::Type::DOCTYPE);
                     m_current_token.m_doctype.name.append_codepoint(current_input_character.value());
+                    m_current_token.m_doctype.missing_name = false;
                     SWITCH_TO(DOCTYPEName);
                 }
             }
@@ -566,12 +569,14 @@ _StartOfFunction:
                 {
                     PARSE_ERROR();
                     m_current_token.m_doctype.public_identifier.clear();
+                    m_current_token.m_doctype.missing_public_identifier = false;
                     SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
                 }
                 ON('\'')
                 {
                     PARSE_ERROR();
                     m_current_token.m_doctype.public_identifier.clear();
+                    m_current_token.m_doctype.missing_public_identifier = false;
                     SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
                 }
                 ON('>')
@@ -606,12 +611,14 @@ _StartOfFunction:
                 {
                     PARSE_ERROR();
                     m_current_token.m_doctype.system_identifier.clear();
+                    m_current_token.m_doctype.missing_system_identifier = false;
                     SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
                 }
                 ON('\'')
                 {
                     PARSE_ERROR();
                     m_current_token.m_doctype.system_identifier.clear();
+                    m_current_token.m_doctype.missing_system_identifier = false;
                     SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
                 }
                 ON('>')
@@ -645,11 +652,13 @@ _StartOfFunction:
                 ON('"')
                 {
                     m_current_token.m_doctype.public_identifier.clear();
+                    m_current_token.m_doctype.missing_public_identifier = false;
                     SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
                 }
                 ON('\'')
                 {
                     m_current_token.m_doctype.public_identifier.clear();
+                    m_current_token.m_doctype.missing_public_identifier = false;
                     SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
                 }
                 ON('>')
@@ -683,11 +692,13 @@ _StartOfFunction:
                 ON('"')
                 {
                     m_current_token.m_doctype.system_identifier.clear();
+                    m_current_token.m_doctype.missing_system_identifier = false;
                     SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
                 }
                 ON('\'')
                 {
                     m_current_token.m_doctype.system_identifier.clear();
+                    m_current_token.m_doctype.missing_system_identifier = false;
                     SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
                 }
                 ON('>')
@@ -858,12 +869,14 @@ _StartOfFunction:
                 {
                     PARSE_ERROR();
                     m_current_token.m_doctype.system_identifier.clear();
+                    m_current_token.m_doctype.missing_system_identifier = false;
                     SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
                 }
                 ON('\'')
                 {
                     PARSE_ERROR();
                     m_current_token.m_doctype.system_identifier.clear();
+                    m_current_token.m_doctype.missing_system_identifier = false;
                     SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
                 }
                 ON_EOF
@@ -895,11 +908,13 @@ _StartOfFunction:
                 ON('"')
                 {
                     m_current_token.m_doctype.system_identifier.clear();
+                    m_current_token.m_doctype.missing_system_identifier = false;
                     SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
                 }
                 ON('\'')
                 {
                     m_current_token.m_doctype.system_identifier.clear();
+                    m_current_token.m_doctype.missing_system_identifier = false;
                     SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
                 }
                 ON_EOF