LibWeb: Implement quirks mode detection

This allows us to determine which mode to render the page in.

Exposes "doctype" and "compatMode" on Document.
Exposes "name", "publicId" and "systemId" on DocumentType.
This commit is contained in:
Luke 2020-07-18 21:17:17 +01:00 committed by Andreas Kling
parent a5ecb9bd6b
commit 19d6884529
Notes: sideshowbarker 2024-07-19 04:42:11 +09:00
13 changed files with 196 additions and 6 deletions

View file

@ -25,6 +25,7 @@
*/
#include <LibWeb/Bindings/DocumentWrapper.h>
#include <LibWeb/Bindings/DocumentTypeWrapper.h>
#include <LibWeb/Bindings/HTMLCanvasElementWrapper.h>
#include <LibWeb/Bindings/HTMLImageElementWrapper.h>
#include <LibWeb/Bindings/HTMLElementWrapper.h>
@ -41,6 +42,8 @@ NodeWrapper* wrap(JS::GlobalObject& global_object, Node& node)
{
if (is<Document>(node))
return static_cast<NodeWrapper*>(wrap_impl(global_object, to<Document>(node)));
if (is<DocumentType>(node))
return static_cast<NodeWrapper*>(wrap_impl(global_object, to<DocumentType>(node)));
if (is<HTMLCanvasElement>(node))
return static_cast<NodeWrapper*>(wrap_impl(global_object, to<HTMLCanvasElement>(node)));
if (is<HTMLImageElement>(node))

View file

@ -156,6 +156,7 @@ endfunction()
libweb_js_wrapper(EventTarget)
libweb_js_wrapper(Node)
libweb_js_wrapper(Document)
libweb_js_wrapper(DocumentType)
libweb_js_wrapper(Element)
libweb_js_wrapper(HTMLElement)
libweb_js_wrapper(HTMLImageElement)

View file

@ -423,6 +423,7 @@ void generate_implementation(const IDL::Interface& interface)
out() << "#include <LibWeb/DOM/Element.h>";
out() << "#include <LibWeb/DOM/HTMLElement.h>";
out() << "#include <LibWeb/DOM/EventListener.h>";
out() << "#include <LibWeb/Bindings/DocumentTypeWrapper.h>";
out() << "#include <LibWeb/Bindings/HTMLCanvasElementWrapper.h>";
out() << "#include <LibWeb/Bindings/HTMLImageElementWrapper.h>";
out() << "#include <LibWeb/Bindings/ImageDataWrapper.h>";

View file

@ -477,4 +477,20 @@ void Document::adopt_node(Node& subtree_root)
});
}
const DocumentType* Document::doctype() const
{
return first_child_of_type<DocumentType>();
}
const String& Document::compat_mode() const
{
static String back_compat = "BackCompat";
static String css1_compat = "CSS1Compat";
if (m_quirks_mode == QuirksMode::Yes)
return back_compat;
return css1_compat;
}
}

View file

@ -43,6 +43,12 @@
namespace Web {
enum class QuirksMode {
No,
Limited,
Yes
};
class Document
: public ParentNode
, public NonElementParentNode<Document> {
@ -142,11 +148,15 @@ public:
void add_script_to_execute_as_soon_as_possible(Badge<HTMLScriptElement>, HTMLScriptElement&);
NonnullRefPtrVector<HTMLScriptElement> take_scripts_to_execute_as_soon_as_possible(Badge<HTMLDocumentParser>);
bool in_quirks_mode() const { return m_quirks_mode; }
void set_quirks_mode(bool mode) { m_quirks_mode = mode; }
QuirksMode mode() const { return m_quirks_mode; }
bool in_quirks_mode() const { return m_quirks_mode == QuirksMode::Yes; }
void set_quirks_mode(QuirksMode mode) { m_quirks_mode = mode; }
void adopt_node(Node&);
const DocumentType* doctype() const;
const String& compat_mode() const;
private:
virtual RefPtr<LayoutNode> create_layout_node(const StyleProperties* parent_style) override;
@ -175,7 +185,7 @@ private:
NonnullRefPtrVector<HTMLScriptElement> m_scripts_to_execute_when_parsing_has_finished;
NonnullRefPtrVector<HTMLScriptElement> m_scripts_to_execute_as_soon_as_possible;
bool m_quirks_mode { false };
QuirksMode m_quirks_mode { QuirksMode::No };
};
template<>

View file

@ -6,6 +6,9 @@ interface Document : Node {
ArrayFromVector querySelectorAll(DOMString selectors);
Element createElement(DOMString tagName);
readonly attribute DOMString compatMode;
readonly attribute DocumentType? doctype;
readonly attribute HTMLElement? body;
}

View file

@ -33,6 +33,8 @@ namespace Web {
class DocumentType final : public Node {
public:
using WrapperType = Bindings::DocumentTypeWrapper;
explicit DocumentType(Document&);
virtual ~DocumentType() override;
@ -41,8 +43,16 @@ public:
const String& name() const { return m_name; }
void set_name(const String& name) { m_name = name; }
const String& public_id() const { return m_public_id; }
void set_public_id(const String& public_id) { m_public_id = public_id; }
const String& system_id() const { return m_system_id; }
void set_system_id(const String& system_id) { m_system_id = system_id; }
private:
String m_name;
String m_public_id;
String m_system_id;
};
template<>

View file

@ -0,0 +1,7 @@
interface DocumentType : Node {
readonly attribute DOMString name;
readonly attribute DOMString publicId;
readonly attribute DOMString systemId;
}

View file

@ -30,6 +30,7 @@ namespace Web {
class CanvasRenderingContext2D;
class Document;
class DocumentType;
class Element;
class Event;
class EventHandler;
@ -72,11 +73,13 @@ class Text;
class Timer;
class Window;
class XMLHttpRequest;
enum class QuirksMode;
namespace Bindings {
class CanvasRenderingContext2DWrapper;
class DocumentWrapper;
class DocumentTypeWrapper;
class ElementWrapper;
class EventWrapper;
class EventListenerWrapper;

View file

@ -46,6 +46,64 @@
namespace Web {
static Vector<FlyString> s_quirks_public_ids = {
"+//Silmaril//dtd html Pro v0r11 19970101//",
"-//AS//DTD HTML 3.0 asWedit + extensions//",
"-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//",
"-//IETF//DTD HTML 2.0 Level 1//",
"-//IETF//DTD HTML 2.0 Level 2//",
"-//IETF//DTD HTML 2.0 Strict Level 1//",
"-//IETF//DTD HTML 2.0 Strict Level 2//",
"-//IETF//DTD HTML 2.0 Strict//",
"-//IETF//DTD HTML 2.0//",
"-//IETF//DTD HTML 2.1E//",
"-//IETF//DTD HTML 3.0//",
"-//IETF//DTD HTML 3.2 Final//",
"-//IETF//DTD HTML 3.2//",
"-//IETF//DTD HTML 3//",
"-//IETF//DTD HTML Level 0//",
"-//IETF//DTD HTML Level 1//",
"-//IETF//DTD HTML Level 2//",
"-//IETF//DTD HTML Level 3//",
"-//IETF//DTD HTML Strict Level 0//",
"-//IETF//DTD HTML Strict Level 1//",
"-//IETF//DTD HTML Strict Level 2//",
"-//IETF//DTD HTML Strict Level 3//",
"-//IETF//DTD HTML Strict//",
"-//IETF//DTD HTML//",
"-//Metrius//DTD Metrius Presentational//",
"-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//",
"-//Microsoft//DTD Internet Explorer 2.0 HTML//",
"-//Microsoft//DTD Internet Explorer 2.0 Tables//",
"-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//",
"-//Microsoft//DTD Internet Explorer 3.0 HTML//",
"-//Microsoft//DTD Internet Explorer 3.0 Tables//",
"-//Netscape Comm. Corp.//DTD HTML//",
"-//Netscape Comm. Corp.//DTD Strict HTML//",
"-//O'Reilly and Associates//DTD HTML 2.0//",
"-//O'Reilly and Associates//DTD HTML Extended 1.0//",
"-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//",
"-//SQ//DTD HTML 2.0 HoTMetaL + extensions//",
"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//",
"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//",
"-//Spyglass//DTD HTML 2.0 Extended//",
"-//Sun Microsystems Corp.//DTD HotJava HTML//",
"-//Sun Microsystems Corp.//DTD HotJava Strict HTML//",
"-//W3C//DTD HTML 3 1995-03-24//",
"-//W3C//DTD HTML 3.2 Draft//",
"-//W3C//DTD HTML 3.2 Final//",
"-//W3C//DTD HTML 3.2//",
"-//W3C//DTD HTML 3.2S Draft//",
"-//W3C//DTD HTML 4.0 Frameset//",
"-//W3C//DTD HTML 4.0 Transitional//",
"-//W3C//DTD HTML Experimental 19960712//",
"-//W3C//DTD HTML Experimental 970421//",
"-//W3C//DTD W3 HTML//",
"-//W3O//DTD W3 HTML 3.0//",
"-//WebTechs//DTD Mozilla HTML 2.0//",
"-//WebTechs//DTD Mozilla HTML//"
};
RefPtr<Document> parse_html_document(const StringView& data, const URL& url, const String& encoding)
{
HTMLDocumentParser parser(data, encoding);
@ -181,6 +239,60 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok
}
}
QuirksMode HTMLDocumentParser::which_quirks_mode(const HTMLToken& doctype_token) const
{
if (doctype_token.m_doctype.force_quirks)
return QuirksMode::Yes;
// NOTE: The tokenizer puts the name into lower case for us.
if (doctype_token.m_doctype.name.to_string() != "html")
return QuirksMode::Yes;
auto public_identifier = doctype_token.m_doctype.public_identifier.to_string();
auto system_identifier = doctype_token.m_doctype.system_identifier.to_string();
if (public_identifier.equals_ignoring_case("-//W3O//DTD W3 HTML Strict 3.0//EN//"))
return QuirksMode::Yes;
if (public_identifier.equals_ignoring_case("-/W3C/DTD HTML 4.0 Transitional/EN"))
return QuirksMode::Yes;
if (public_identifier.equals_ignoring_case("HTML"))
return QuirksMode::Yes;
if (system_identifier.equals_ignoring_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))
return QuirksMode::Yes;
for (auto& public_id : s_quirks_public_ids) {
if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive))
return QuirksMode::Yes;
}
if (doctype_token.m_doctype.missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
return QuirksMode::Yes;
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive))
return QuirksMode::Yes;
}
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//", CaseSensitivity::CaseInsensitive))
return QuirksMode::Limited;
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//", CaseSensitivity::CaseInsensitive))
return QuirksMode::Limited;
if (!doctype_token.m_doctype.missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//", CaseSensitivity::CaseInsensitive))
return QuirksMode::Limited;
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//", CaseSensitivity::CaseInsensitive))
return QuirksMode::Limited;
}
return QuirksMode::No;
}
void HTMLDocumentParser::handle_initial(HTMLToken& token)
{
if (token.is_character() && token.is_parser_whitespace()) {
@ -196,14 +308,16 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token)
if (token.is_doctype()) {
auto doctype = adopt(*new DocumentType(document()));
doctype->set_name(token.m_doctype.name.to_string());
doctype->set_public_id(token.m_doctype.public_identifier.to_string());
doctype->set_system_id(token.m_doctype.system_identifier.to_string());
document().append_child(move(doctype));
document().set_quirks_mode(token.m_doctype.force_quirks);
document().set_quirks_mode(which_quirks_mode(token));
m_insertion_mode = InsertionMode::BeforeHTML;
return;
}
PARSE_ERROR();
document().set_quirks_mode(true);
document().set_quirks_mode(QuirksMode::Yes);
m_insertion_mode = InsertionMode::BeforeHTML;
process_using_the_rules_for(InsertionMode::BeforeHTML, token);
}
@ -2612,7 +2726,7 @@ NonnullRefPtrVector<Node> HTMLDocumentParser::parse_html_fragment(Element& conte
{
HTMLDocumentParser parser(markup, "utf-8");
parser.m_parsing_fragment = true;
parser.document().set_quirks_mode(context_element.document().in_quirks_mode());
parser.document().set_quirks_mode(context_element.document().mode());
if (context_element.tag_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);

View file

@ -85,6 +85,8 @@ public:
private:
const char* insertion_mode_name() const;
QuirksMode which_quirks_mode(const HTMLToken&) const;
void handle_initial(HTMLToken&);
void handle_before_html(HTMLToken&);
void handle_before_head(HTMLToken&);

View file

@ -174,9 +174,14 @@ private:
// Type::DOCTYPE
struct {
// NOTE: "Missing" is a distinct state from the empty string.
StringBuilder name;
bool missing_name { true };
StringBuilder public_identifier;
bool missing_public_identifier { true };
StringBuilder system_identifier;
bool missing_system_identifier { true };
bool force_quirks { false };
} m_doctype;

View file

@ -455,6 +455,7 @@ _StartOfFunction:
{
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.name.append(tolower(current_input_character.value()));
m_current_token.m_doctype.missing_name = false;
SWITCH_TO(DOCTYPEName);
}
ON(0)
@ -462,6 +463,7 @@ _StartOfFunction:
PARSE_ERROR();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.name.append_codepoint(0xFFFD);
m_current_token.m_doctype.missing_name = false;
SWITCH_TO(DOCTYPEName);
}
ON('>')
@ -483,6 +485,7 @@ _StartOfFunction:
{
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.name.append_codepoint(current_input_character.value());
m_current_token.m_doctype.missing_name = false;
SWITCH_TO(DOCTYPEName);
}
}
@ -566,12 +569,14 @@ _StartOfFunction:
{
PARSE_ERROR();
m_current_token.m_doctype.public_identifier.clear();
m_current_token.m_doctype.missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
}
ON('\'')
{
PARSE_ERROR();
m_current_token.m_doctype.public_identifier.clear();
m_current_token.m_doctype.missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
}
ON('>')
@ -606,12 +611,14 @@ _StartOfFunction:
{
PARSE_ERROR();
m_current_token.m_doctype.system_identifier.clear();
m_current_token.m_doctype.missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
PARSE_ERROR();
m_current_token.m_doctype.system_identifier.clear();
m_current_token.m_doctype.missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON('>')
@ -645,11 +652,13 @@ _StartOfFunction:
ON('"')
{
m_current_token.m_doctype.public_identifier.clear();
m_current_token.m_doctype.missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.public_identifier.clear();
m_current_token.m_doctype.missing_public_identifier = false;
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
}
ON('>')
@ -683,11 +692,13 @@ _StartOfFunction:
ON('"')
{
m_current_token.m_doctype.system_identifier.clear();
m_current_token.m_doctype.missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.system_identifier.clear();
m_current_token.m_doctype.missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON('>')
@ -858,12 +869,14 @@ _StartOfFunction:
{
PARSE_ERROR();
m_current_token.m_doctype.system_identifier.clear();
m_current_token.m_doctype.missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
PARSE_ERROR();
m_current_token.m_doctype.system_identifier.clear();
m_current_token.m_doctype.missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON_EOF
@ -895,11 +908,13 @@ _StartOfFunction:
ON('"')
{
m_current_token.m_doctype.system_identifier.clear();
m_current_token.m_doctype.missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
}
ON('\'')
{
m_current_token.m_doctype.system_identifier.clear();
m_current_token.m_doctype.missing_system_identifier = false;
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
}
ON_EOF