Browse Source

LibWeb: Use standardized encoding names, add encoding attribute to document

Luke 4 years ago
parent
commit
ed2689c00a

+ 3 - 1
Libraries/LibWeb/Bindings/WindowObject.cpp

@@ -283,7 +283,9 @@ JS_DEFINE_NATIVE_FUNCTION(WindowObject::atob)
     auto decoded = decode_base64(StringView(string));
     auto decoded = decode_base64(StringView(string));
 
 
     // decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8.
     // decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8.
-    return JS::js_string(vm, TextCodec::decoder_for("iso-8859-1")->to_utf8(decoded));
+    auto decoder = TextCodec::decoder_for("windows-1252");
+    ASSERT(decoder);
+    return JS::js_string(vm, decoder->to_utf8(decoded));
 }
 }
 
 
 JS_DEFINE_NATIVE_FUNCTION(WindowObject::btoa)
 JS_DEFINE_NATIVE_FUNCTION(WindowObject::btoa)

+ 9 - 0
Libraries/LibWeb/DOM/Document.h

@@ -199,6 +199,14 @@ public:
     const String& content_type() const { return m_content_type; }
     const String& content_type() const { return m_content_type; }
     void set_content_type(const String& content_type) { m_content_type = content_type; }
     void set_content_type(const String& content_type) { m_content_type = content_type; }
 
 
+    const String& encoding() const { return m_encoding; }
+    void set_encoding(const String& encoding) { m_encoding = encoding; }
+
+    // NOTE: These are intended for the JS bindings
+    const String& character_set() const { return encoding(); }
+    const String& charset() const { return encoding(); }
+    const String& input_encoding() const { return encoding(); }
+
     const NonnullRefPtr<DOMImplementation> implementation() { return m_implementation; }
     const NonnullRefPtr<DOMImplementation> implementation() { return m_implementation; }
 
 
 private:
 private:
@@ -262,6 +270,7 @@ private:
 
 
     String m_ready_state { "loading" };
     String m_ready_state { "loading" };
     String m_content_type { "application/xml" };
     String m_content_type { "application/xml" };
+    String m_encoding { "UTF-8" };
 
 
     NonnullRefPtr<DOMImplementation> m_implementation;
     NonnullRefPtr<DOMImplementation> m_implementation;
 };
 };

+ 3 - 0
Libraries/LibWeb/DOM/Document.idl

@@ -2,6 +2,9 @@ interface Document : Node {
 
 
     readonly attribute DOMImplementation implementation;
     readonly attribute DOMImplementation implementation;
 
 
+    readonly attribute DOMString characterSet;
+    readonly attribute DOMString charset;
+    readonly attribute DOMString inputEncoding;
     readonly attribute DOMString contentType;
     readonly attribute DOMString contentType;
 
 
     Element? getElementById(DOMString id);
     Element? getElementById(DOMString id);

+ 4 - 1
Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp

@@ -27,6 +27,7 @@
 //#define PARSER_DEBUG
 //#define PARSER_DEBUG
 
 
 #include <AK/Utf32View.h>
 #include <AK/Utf32View.h>
+#include <LibTextCodec/Decoder.h>
 #include <LibWeb/DOM/Comment.h>
 #include <LibWeb/DOM/Comment.h>
 #include <LibWeb/DOM/Document.h>
 #include <LibWeb/DOM/Document.h>
 #include <LibWeb/DOM/DocumentType.h>
 #include <LibWeb/DOM/DocumentType.h>
@@ -110,7 +111,7 @@ static Vector<FlyString> s_quirks_public_ids = {
 
 
 RefPtr<DOM::Document> parse_html_document(const StringView& data, const URL& url, const String& encoding)
 RefPtr<DOM::Document> parse_html_document(const StringView& data, const URL& url, const String& encoding)
 {
 {
-    HTMLDocumentParser parser(data, encoding);
+    HTMLDocumentParser parser(data, TextCodec::get_standardized_encoding(encoding));
     parser.run(url);
     parser.run(url);
     return parser.document();
     return parser.document();
 }
 }
@@ -119,12 +120,14 @@ HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& en
     : m_tokenizer(input, encoding)
     : m_tokenizer(input, encoding)
 {
 {
     m_document = DOM::Document::create();
     m_document = DOM::Document::create();
+    m_document->set_encoding(encoding);
 }
 }
 
 
 HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding, DOM::Document& existing_document)
 HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding, DOM::Document& existing_document)
     : m_tokenizer(input, encoding)
     : m_tokenizer(input, encoding)
     , m_document(existing_document)
     , m_document(existing_document)
 {
 {
+    m_document->set_encoding(encoding);
 }
 }
 
 
 HTMLDocumentParser::~HTMLDocumentParser()
 HTMLDocumentParser::~HTMLDocumentParser()

+ 1 - 3
Libraries/LibWeb/Loader/FrameLoader.cpp

@@ -135,9 +135,7 @@ RefPtr<DOM::Document> FrameLoader::create_document_from_mime_type(const ByteBuff
     RefPtr<DOM::Document> document;
     RefPtr<DOM::Document> document;
 
 
     if (mime_type == "text/html" || mime_type == "image/svg+xml") {
     if (mime_type == "text/html" || mime_type == "image/svg+xml") {
-        HTML::HTMLDocumentParser parser(data, encoding);
-        parser.run(url);
-        document = parser.document();
+        document = HTML::parse_html_document(data, url, encoding);
     } else if (mime_type.starts_with("image/")) {
     } else if (mime_type.starts_with("image/")) {
         document = create_image_document(data, url);
         document = create_image_document(data, url);
     } else if (mime_type == "text/plain") {
     } else if (mime_type == "text/plain") {