mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-26 01:20:25 +00:00
LibWeb: Implement quirks mode detection
This allows us to determine which mode to render the page in. Exposes "doctype" and "compatMode" on Document. Exposes "name", "publicId" and "systemId" on DocumentType.
This commit is contained in:
parent
a5ecb9bd6b
commit
19d6884529
Notes:
sideshowbarker
2024-07-19 04:42:11 +09:00
Author: https://github.com/Lubrsi Commit: https://github.com/SerenityOS/serenity/commit/19d6884529d Pull-request: https://github.com/SerenityOS/serenity/pull/2842
13 changed files with 196 additions and 6 deletions
|
@ -25,6 +25,7 @@
|
|||
*/
|
||||
|
||||
#include <LibWeb/Bindings/DocumentWrapper.h>
|
||||
#include <LibWeb/Bindings/DocumentTypeWrapper.h>
|
||||
#include <LibWeb/Bindings/HTMLCanvasElementWrapper.h>
|
||||
#include <LibWeb/Bindings/HTMLImageElementWrapper.h>
|
||||
#include <LibWeb/Bindings/HTMLElementWrapper.h>
|
||||
|
@ -41,6 +42,8 @@ NodeWrapper* wrap(JS::GlobalObject& global_object, Node& node)
|
|||
{
|
||||
if (is<Document>(node))
|
||||
return static_cast<NodeWrapper*>(wrap_impl(global_object, to<Document>(node)));
|
||||
if (is<DocumentType>(node))
|
||||
return static_cast<NodeWrapper*>(wrap_impl(global_object, to<DocumentType>(node)));
|
||||
if (is<HTMLCanvasElement>(node))
|
||||
return static_cast<NodeWrapper*>(wrap_impl(global_object, to<HTMLCanvasElement>(node)));
|
||||
if (is<HTMLImageElement>(node))
|
||||
|
|
|
@ -156,6 +156,7 @@ endfunction()
|
|||
libweb_js_wrapper(EventTarget)
|
||||
libweb_js_wrapper(Node)
|
||||
libweb_js_wrapper(Document)
|
||||
libweb_js_wrapper(DocumentType)
|
||||
libweb_js_wrapper(Element)
|
||||
libweb_js_wrapper(HTMLElement)
|
||||
libweb_js_wrapper(HTMLImageElement)
|
||||
|
|
|
@ -423,6 +423,7 @@ void generate_implementation(const IDL::Interface& interface)
|
|||
out() << "#include <LibWeb/DOM/Element.h>";
|
||||
out() << "#include <LibWeb/DOM/HTMLElement.h>";
|
||||
out() << "#include <LibWeb/DOM/EventListener.h>";
|
||||
out() << "#include <LibWeb/Bindings/DocumentTypeWrapper.h>";
|
||||
out() << "#include <LibWeb/Bindings/HTMLCanvasElementWrapper.h>";
|
||||
out() << "#include <LibWeb/Bindings/HTMLImageElementWrapper.h>";
|
||||
out() << "#include <LibWeb/Bindings/ImageDataWrapper.h>";
|
||||
|
|
|
@ -477,4 +477,20 @@ void Document::adopt_node(Node& subtree_root)
|
|||
});
|
||||
}
|
||||
|
||||
const DocumentType* Document::doctype() const
|
||||
{
|
||||
return first_child_of_type<DocumentType>();
|
||||
}
|
||||
|
||||
const String& Document::compat_mode() const
|
||||
{
|
||||
static String back_compat = "BackCompat";
|
||||
static String css1_compat = "CSS1Compat";
|
||||
|
||||
if (m_quirks_mode == QuirksMode::Yes)
|
||||
return back_compat;
|
||||
|
||||
return css1_compat;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -43,6 +43,12 @@
|
|||
|
||||
namespace Web {
|
||||
|
||||
enum class QuirksMode {
|
||||
No,
|
||||
Limited,
|
||||
Yes
|
||||
};
|
||||
|
||||
class Document
|
||||
: public ParentNode
|
||||
, public NonElementParentNode<Document> {
|
||||
|
@ -142,11 +148,15 @@ public:
|
|||
void add_script_to_execute_as_soon_as_possible(Badge<HTMLScriptElement>, HTMLScriptElement&);
|
||||
NonnullRefPtrVector<HTMLScriptElement> take_scripts_to_execute_as_soon_as_possible(Badge<HTMLDocumentParser>);
|
||||
|
||||
bool in_quirks_mode() const { return m_quirks_mode; }
|
||||
void set_quirks_mode(bool mode) { m_quirks_mode = mode; }
|
||||
QuirksMode mode() const { return m_quirks_mode; }
|
||||
bool in_quirks_mode() const { return m_quirks_mode == QuirksMode::Yes; }
|
||||
void set_quirks_mode(QuirksMode mode) { m_quirks_mode = mode; }
|
||||
|
||||
void adopt_node(Node&);
|
||||
|
||||
const DocumentType* doctype() const;
|
||||
const String& compat_mode() const;
|
||||
|
||||
private:
|
||||
virtual RefPtr<LayoutNode> create_layout_node(const StyleProperties* parent_style) override;
|
||||
|
||||
|
@ -175,7 +185,7 @@ private:
|
|||
NonnullRefPtrVector<HTMLScriptElement> m_scripts_to_execute_when_parsing_has_finished;
|
||||
NonnullRefPtrVector<HTMLScriptElement> m_scripts_to_execute_as_soon_as_possible;
|
||||
|
||||
bool m_quirks_mode { false };
|
||||
QuirksMode m_quirks_mode { QuirksMode::No };
|
||||
};
|
||||
|
||||
template<>
|
||||
|
|
|
@ -6,6 +6,9 @@ interface Document : Node {
|
|||
ArrayFromVector querySelectorAll(DOMString selectors);
|
||||
Element createElement(DOMString tagName);
|
||||
|
||||
readonly attribute DOMString compatMode;
|
||||
readonly attribute DocumentType? doctype;
|
||||
|
||||
readonly attribute HTMLElement? body;
|
||||
|
||||
}
|
||||
|
|
|
@ -33,6 +33,8 @@ namespace Web {
|
|||
|
||||
class DocumentType final : public Node {
|
||||
public:
|
||||
using WrapperType = Bindings::DocumentTypeWrapper;
|
||||
|
||||
explicit DocumentType(Document&);
|
||||
virtual ~DocumentType() override;
|
||||
|
||||
|
@ -41,8 +43,16 @@ public:
|
|||
const String& name() const { return m_name; }
|
||||
void set_name(const String& name) { m_name = name; }
|
||||
|
||||
const String& public_id() const { return m_public_id; }
|
||||
void set_public_id(const String& public_id) { m_public_id = public_id; }
|
||||
|
||||
const String& system_id() const { return m_system_id; }
|
||||
void set_system_id(const String& system_id) { m_system_id = system_id; }
|
||||
|
||||
private:
|
||||
String m_name;
|
||||
String m_public_id;
|
||||
String m_system_id;
|
||||
};
|
||||
|
||||
template<>
|
||||
|
|
7
Libraries/LibWeb/DOM/DocumentType.idl
Normal file
7
Libraries/LibWeb/DOM/DocumentType.idl
Normal file
|
@ -0,0 +1,7 @@
|
|||
interface DocumentType : Node {
|
||||
|
||||
readonly attribute DOMString name;
|
||||
readonly attribute DOMString publicId;
|
||||
readonly attribute DOMString systemId;
|
||||
|
||||
}
|
|
@ -30,6 +30,7 @@ namespace Web {
|
|||
|
||||
class CanvasRenderingContext2D;
|
||||
class Document;
|
||||
class DocumentType;
|
||||
class Element;
|
||||
class Event;
|
||||
class EventHandler;
|
||||
|
@ -72,11 +73,13 @@ class Text;
|
|||
class Timer;
|
||||
class Window;
|
||||
class XMLHttpRequest;
|
||||
enum class QuirksMode;
|
||||
|
||||
namespace Bindings {
|
||||
|
||||
class CanvasRenderingContext2DWrapper;
|
||||
class DocumentWrapper;
|
||||
class DocumentTypeWrapper;
|
||||
class ElementWrapper;
|
||||
class EventWrapper;
|
||||
class EventListenerWrapper;
|
||||
|
|
|
@ -46,6 +46,64 @@
|
|||
|
||||
namespace Web {
|
||||
|
||||
static Vector<FlyString> s_quirks_public_ids = {
|
||||
"+//Silmaril//dtd html Pro v0r11 19970101//",
|
||||
"-//AS//DTD HTML 3.0 asWedit + extensions//",
|
||||
"-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//",
|
||||
"-//IETF//DTD HTML 2.0 Level 1//",
|
||||
"-//IETF//DTD HTML 2.0 Level 2//",
|
||||
"-//IETF//DTD HTML 2.0 Strict Level 1//",
|
||||
"-//IETF//DTD HTML 2.0 Strict Level 2//",
|
||||
"-//IETF//DTD HTML 2.0 Strict//",
|
||||
"-//IETF//DTD HTML 2.0//",
|
||||
"-//IETF//DTD HTML 2.1E//",
|
||||
"-//IETF//DTD HTML 3.0//",
|
||||
"-//IETF//DTD HTML 3.2 Final//",
|
||||
"-//IETF//DTD HTML 3.2//",
|
||||
"-//IETF//DTD HTML 3//",
|
||||
"-//IETF//DTD HTML Level 0//",
|
||||
"-//IETF//DTD HTML Level 1//",
|
||||
"-//IETF//DTD HTML Level 2//",
|
||||
"-//IETF//DTD HTML Level 3//",
|
||||
"-//IETF//DTD HTML Strict Level 0//",
|
||||
"-//IETF//DTD HTML Strict Level 1//",
|
||||
"-//IETF//DTD HTML Strict Level 2//",
|
||||
"-//IETF//DTD HTML Strict Level 3//",
|
||||
"-//IETF//DTD HTML Strict//",
|
||||
"-//IETF//DTD HTML//",
|
||||
"-//Metrius//DTD Metrius Presentational//",
|
||||
"-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//",
|
||||
"-//Microsoft//DTD Internet Explorer 2.0 HTML//",
|
||||
"-//Microsoft//DTD Internet Explorer 2.0 Tables//",
|
||||
"-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//",
|
||||
"-//Microsoft//DTD Internet Explorer 3.0 HTML//",
|
||||
"-//Microsoft//DTD Internet Explorer 3.0 Tables//",
|
||||
"-//Netscape Comm. Corp.//DTD HTML//",
|
||||
"-//Netscape Comm. Corp.//DTD Strict HTML//",
|
||||
"-//O'Reilly and Associates//DTD HTML 2.0//",
|
||||
"-//O'Reilly and Associates//DTD HTML Extended 1.0//",
|
||||
"-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//",
|
||||
"-//SQ//DTD HTML 2.0 HoTMetaL + extensions//",
|
||||
"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//",
|
||||
"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//",
|
||||
"-//Spyglass//DTD HTML 2.0 Extended//",
|
||||
"-//Sun Microsystems Corp.//DTD HotJava HTML//",
|
||||
"-//Sun Microsystems Corp.//DTD HotJava Strict HTML//",
|
||||
"-//W3C//DTD HTML 3 1995-03-24//",
|
||||
"-//W3C//DTD HTML 3.2 Draft//",
|
||||
"-//W3C//DTD HTML 3.2 Final//",
|
||||
"-//W3C//DTD HTML 3.2//",
|
||||
"-//W3C//DTD HTML 3.2S Draft//",
|
||||
"-//W3C//DTD HTML 4.0 Frameset//",
|
||||
"-//W3C//DTD HTML 4.0 Transitional//",
|
||||
"-//W3C//DTD HTML Experimental 19960712//",
|
||||
"-//W3C//DTD HTML Experimental 970421//",
|
||||
"-//W3C//DTD W3 HTML//",
|
||||
"-//W3O//DTD W3 HTML 3.0//",
|
||||
"-//WebTechs//DTD Mozilla HTML 2.0//",
|
||||
"-//WebTechs//DTD Mozilla HTML//"
|
||||
};
|
||||
|
||||
RefPtr<Document> parse_html_document(const StringView& data, const URL& url, const String& encoding)
|
||||
{
|
||||
HTMLDocumentParser parser(data, encoding);
|
||||
|
@ -181,6 +239,60 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok
|
|||
}
|
||||
}
|
||||
|
||||
QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_token) const
|
||||
{
|
||||
if (doctype_token.m_doctype.force_quirks)
|
||||
return QuirksMode::Yes;
|
||||
|
||||
// NOTE: The tokenizer puts the name into lower case for us.
|
||||
if (doctype_token.m_doctype.name.to_string() != "html")
|
||||
return QuirksMode::Yes;
|
||||
|
||||
auto public_identifier = doctype_token.m_doctype.public_identifier.to_string();
|
||||
auto system_identifier = doctype_token.m_doctype.system_identifier.to_string();
|
||||
|
||||
if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//"))
|
||||
return QuirksMode::Yes;
|
||||
|
||||
if (public_identifier.equals_ignoring_case("-/W3C/DTD HTML 4.0 Transitional/EN"))
|
||||
return QuirksMode::Yes;
|
||||
|
||||
if (public_identifier.equals_ignoring_case("HTML"))
|
||||
return QuirksMode::Yes;
|
||||
|
||||
if (system_identifier.equals_ignoring_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))
|
||||
return QuirksMode::Yes;
|
||||
|
||||
for (auto& public_id : s_quirks_public_ids) {
|
||||
if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive))
|
||||
return QuirksMode::Yes;
|
||||
}
|
||||
|
||||
if (doctype_token.m_doctype.missing_system_identifier) {
|
||||
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
|
||||
return QuirksMode::Yes;
|
||||
|
||||
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive))
|
||||
return QuirksMode::Yes;
|
||||
}
|
||||
|
||||
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//", CaseSensitivity::CaseInsensitive))
|
||||
return QuirksMode::Limited;
|
||||
|
||||
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive))
|
||||
return QuirksMode::Limited;
|
||||
|
||||
if (!doctype_token.m_doctype.missing_system_identifier) {
|
||||
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
|
||||
return QuirksMode::Limited;
|
||||
|
||||
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive))
|
||||
return QuirksMode::Limited;
|
||||
}
|
||||
|
||||
return QuirksMode::No;
|
||||
}
|
||||
|
||||
void HTMLDocumentParser::handle_initial(HTMLToken& token)
|
||||
{
|
||||
if (token.is_character() && token.is_parser_whitespace()) {
|
||||
|
@ -196,14 +308,16 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token)
|
|||
if (token.is_doctype()) {
|
||||
auto doctype = adopt(*new DocumentType(document()));
|
||||
doctype->set_name(token.m_doctype.name.to_string());
|
||||
doctype->set_public_id(token.m_doctype.public_identifier.to_string());
|
||||
doctype->set_system_id(token.m_doctype.system_identifier.to_string());
|
||||
document().append_child(move(doctype));
|
||||
document().set_quirks_mode(token.m_doctype.force_quirks);
|
||||
document().set_quirks_mode(which_quirks_mode(token));
|
||||
m_insertion_mode = InsertionMode::BeforeHTML;
|
||||
return;
|
||||
}
|
||||
|
||||
PARSE_ERROR();
|
||||
document().set_quirks_mode(true);
|
||||
document().set_quirks_mode(QuirksMode::Yes);
|
||||
m_insertion_mode = InsertionMode::BeforeHTML;
|
||||
process_using_the_rules_for(InsertionMode::BeforeHTML, token);
|
||||
}
|
||||
|
@ -2612,7 +2726,7 @@ NonnullRefPtrVector<Node> HTMLDocumentParser::parse_html_fragment(Element& conte
|
|||
{
|
||||
HTMLDocumentParser parser(markup, "utf-8");
|
||||
parser.m_parsing_fragment = true;
|
||||
parser.document().set_quirks_mode(context_element.document().in_quirks_mode());
|
||||
parser.document().set_quirks_mode(context_element.document().mode());
|
||||
|
||||
if (context_element.tag_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
|
||||
parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
|
||||
|
|
|
@ -85,6 +85,8 @@ public:
|
|||
private:
|
||||
const char* insertion_mode_name() const;
|
||||
|
||||
QuirksMode which_quirks_mode(const HTMLToken&) const;
|
||||
|
||||
void handle_initial(HTMLToken&);
|
||||
void handle_before_html(HTMLToken&);
|
||||
void handle_before_head(HTMLToken&);
|
||||
|
|
|
@ -174,9 +174,14 @@ private:
|
|||
|
||||
// Type::DOCTYPE
|
||||
struct {
|
||||
// NOTE: "Missing" is a distinct state from the empty string.
|
||||
|
||||
StringBuilder name;
|
||||
bool missing_name { true };
|
||||
StringBuilder public_identifier;
|
||||
bool missing_public_identifier { true };
|
||||
StringBuilder system_identifier;
|
||||
bool missing_system_identifier { true };
|
||||
bool force_quirks { false };
|
||||
} m_doctype;
|
||||
|
||||
|
|
|
@ -455,6 +455,7 @@ _StartOfFunction:
|
|||
{
|
||||
create_new_token(HTMLToken::Type::DOCTYPE);
|
||||
m_current_token.m_doctype.name.append(tolower(current_input_character.value()));
|
||||
m_current_token.m_doctype.missing_name = false;
|
||||
SWITCH_TO(DOCTYPEName);
|
||||
}
|
||||
ON(0)
|
||||
|
@ -462,6 +463,7 @@ _StartOfFunction:
|
|||
PARSE_ERROR();
|
||||
create_new_token(HTMLToken::Type::DOCTYPE);
|
||||
m_current_token.m_doctype.name.append_codepoint(0xFFFD);
|
||||
m_current_token.m_doctype.missing_name = false;
|
||||
SWITCH_TO(DOCTYPEName);
|
||||
}
|
||||
ON('>')
|
||||
|
@ -483,6 +485,7 @@ _StartOfFunction:
|
|||
{
|
||||
create_new_token(HTMLToken::Type::DOCTYPE);
|
||||
m_current_token.m_doctype.name.append_codepoint(current_input_character.value());
|
||||
m_current_token.m_doctype.missing_name = false;
|
||||
SWITCH_TO(DOCTYPEName);
|
||||
}
|
||||
}
|
||||
|
@ -566,12 +569,14 @@ _StartOfFunction:
|
|||
{
|
||||
PARSE_ERROR();
|
||||
m_current_token.m_doctype.public_identifier.clear();
|
||||
m_current_token.m_doctype.missing_public_identifier = false;
|
||||
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
|
||||
}
|
||||
ON('\'')
|
||||
{
|
||||
PARSE_ERROR();
|
||||
m_current_token.m_doctype.public_identifier.clear();
|
||||
m_current_token.m_doctype.missing_public_identifier = false;
|
||||
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
|
||||
}
|
||||
ON('>')
|
||||
|
@ -606,12 +611,14 @@ _StartOfFunction:
|
|||
{
|
||||
PARSE_ERROR();
|
||||
m_current_token.m_doctype.system_identifier.clear();
|
||||
m_current_token.m_doctype.missing_system_identifier = false;
|
||||
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
|
||||
}
|
||||
ON('\'')
|
||||
{
|
||||
PARSE_ERROR();
|
||||
m_current_token.m_doctype.system_identifier.clear();
|
||||
m_current_token.m_doctype.missing_system_identifier = false;
|
||||
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
|
||||
}
|
||||
ON('>')
|
||||
|
@ -645,11 +652,13 @@ _StartOfFunction:
|
|||
ON('"')
|
||||
{
|
||||
m_current_token.m_doctype.public_identifier.clear();
|
||||
m_current_token.m_doctype.missing_public_identifier = false;
|
||||
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
|
||||
}
|
||||
ON('\'')
|
||||
{
|
||||
m_current_token.m_doctype.public_identifier.clear();
|
||||
m_current_token.m_doctype.missing_public_identifier = false;
|
||||
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
|
||||
}
|
||||
ON('>')
|
||||
|
@ -683,11 +692,13 @@ _StartOfFunction:
|
|||
ON('"')
|
||||
{
|
||||
m_current_token.m_doctype.system_identifier.clear();
|
||||
m_current_token.m_doctype.missing_system_identifier = false;
|
||||
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
|
||||
}
|
||||
ON('\'')
|
||||
{
|
||||
m_current_token.m_doctype.system_identifier.clear();
|
||||
m_current_token.m_doctype.missing_system_identifier = false;
|
||||
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
|
||||
}
|
||||
ON('>')
|
||||
|
@ -858,12 +869,14 @@ _StartOfFunction:
|
|||
{
|
||||
PARSE_ERROR();
|
||||
m_current_token.m_doctype.system_identifier.clear();
|
||||
m_current_token.m_doctype.missing_system_identifier = false;
|
||||
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
|
||||
}
|
||||
ON('\'')
|
||||
{
|
||||
PARSE_ERROR();
|
||||
m_current_token.m_doctype.system_identifier.clear();
|
||||
m_current_token.m_doctype.missing_system_identifier = false;
|
||||
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
|
||||
}
|
||||
ON_EOF
|
||||
|
@ -895,11 +908,13 @@ _StartOfFunction:
|
|||
ON('"')
|
||||
{
|
||||
m_current_token.m_doctype.system_identifier.clear();
|
||||
m_current_token.m_doctype.missing_system_identifier = false;
|
||||
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
|
||||
}
|
||||
ON('\'')
|
||||
{
|
||||
m_current_token.m_doctype.system_identifier.clear();
|
||||
m_current_token.m_doctype.missing_system_identifier = false;
|
||||
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
|
||||
}
|
||||
ON_EOF
|
||||
|
|
Loading…
Reference in a new issue