Browse Source

LibWeb: Let Resource figure out its own encoding and MIME type

Also, if the request URL is a data: URL, use the MIME type from the URL
itself if available. This makes it possible to load arbitrary MIME type
data: URLs in the browser :^)
Andreas Kling 5 years ago
parent
commit
f88146c7c9

+ 2 - 46
Libraries/LibWeb/Loader/FrameLoader.cpp

@@ -118,37 +118,6 @@ static RefPtr<Document> create_gemini_document(const ByteBuffer& data, const URL
     return parse_html_document(markdown_document->render_to_html(), url);
 }
 
-String encoding_from_content_type(const String& content_type)
-{
-    auto offset = content_type.index_of("charset=");
-    if (offset.has_value())
-        return content_type.substring(offset.value() + 8, content_type.length() - offset.value() - 8).to_lowercase();
-
-    return "utf-8";
-}
-
-String mime_type_from_content_type(const String& content_type)
-{
-    auto offset = content_type.index_of(";");
-    if (offset.has_value())
-        return content_type.substring(0, offset.value()).to_lowercase();
-
-    return content_type;
-}
-
-static String guess_mime_type_based_on_filename(const URL& url)
-{
-    if (url.path().ends_with(".png"))
-        return "image/png";
-    if (url.path().ends_with(".gif"))
-        return "image/gif";
-    if (url.path().ends_with(".md"))
-        return "text/markdown";
-    if (url.path().ends_with(".html") || url.path().ends_with(".htm"))
-        return "text/html";
-    return "text/plain";
-}
-
 RefPtr<Document> FrameLoader::create_document_from_mime_type(const ByteBuffer& data, const URL& url, const String& mime_type, const String& encoding)
 {
     if (mime_type.starts_with("image/"))
@@ -250,21 +219,8 @@ void FrameLoader::resource_did_load()
         return;
     }
 
-    String encoding = "utf-8";
-    String mime_type;
-
-    auto content_type = resource()->response_headers().get("Content-Type");
-    if (content_type.has_value()) {
-        dbg() << "Content-Type header: _" << content_type.value() << "_";
-        encoding = encoding_from_content_type(content_type.value());
-        mime_type = mime_type_from_content_type(content_type.value());
-    } else {
-        dbg() << "No Content-Type header to go on! Guessing based on filename...";
-        mime_type = guess_mime_type_based_on_filename(url);
-    }
-
-    dbg() << "I believe this content has MIME type '" << mime_type << "', encoding '" << encoding << "'";
-    auto document = create_document_from_mime_type(resource()->encoded_data(), url, mime_type, encoding);
+    dbg() << "I believe this content has MIME type '" << resource()->mime_type() << "', encoding '" << resource()->encoding() << "'";
+    auto document = create_document_from_mime_type(resource()->encoded_data(), url, resource()->mime_type(), resource()->encoding());
     ASSERT(document);
     frame().set_document(document);
 

+ 46 - 0
Libraries/LibWeb/Loader/Resource.cpp

@@ -59,6 +59,37 @@ void Resource::for_each_client(Function<void(ResourceClient&)> callback)
     }
 }
 
+String encoding_from_content_type(const String& content_type)
+{
+    auto offset = content_type.index_of("charset=");
+    if (offset.has_value())
+        return content_type.substring(offset.value() + 8, content_type.length() - offset.value() - 8).to_lowercase();
+
+    return "utf-8";
+}
+
+String mime_type_from_content_type(const String& content_type)
+{
+    auto offset = content_type.index_of(";");
+    if (offset.has_value())
+        return content_type.substring(0, offset.value()).to_lowercase();
+
+    return content_type;
+}
+
+static String guess_mime_type_based_on_filename(const URL& url)
+{
+    if (url.path().ends_with(".png"))
+        return "image/png";
+    if (url.path().ends_with(".gif"))
+        return "image/gif";
+    if (url.path().ends_with(".md"))
+        return "text/markdown";
+    if (url.path().ends_with(".html") || url.path().ends_with(".htm"))
+        return "text/html";
+    return "text/plain";
+}
+
 void Resource::did_load(Badge<ResourceLoader>, const ByteBuffer& data, const HashMap<String, String, CaseInsensitiveStringTraits>& headers)
 {
     ASSERT(!m_loaded);
@@ -66,6 +97,21 @@ void Resource::did_load(Badge<ResourceLoader>, const ByteBuffer& data, const Has
     m_response_headers = headers;
     m_loaded = true;
 
+    auto content_type = headers.get("Content-Type");
+    if (content_type.has_value()) {
+        dbg() << "Content-Type header: _" << content_type.value() << "_";
+        m_encoding = encoding_from_content_type(content_type.value());
+        m_mime_type = mime_type_from_content_type(content_type.value());
+    } else if (url().protocol() == "data" && !url().data_mime_type().is_empty()) {
+        dbg() << "This is a data URL with mime-type _" << url().data_mime_type() << "_";
+        m_encoding = "utf-8"; // FIXME: This doesn't seem nice.
+        m_mime_type = url().data_mime_type();
+    } else {
+        dbg() << "No Content-Type header to go on! Guessing based on filename...";
+        m_encoding = "utf-8"; // FIXME: This doesn't seem nice.
+        m_mime_type = guess_mime_type_based_on_filename(url());
+    }
+
     for_each_client([](auto& client) {
         client.resource_did_load();
     });

+ 5 - 0
Libraries/LibWeb/Loader/Resource.h

@@ -72,6 +72,9 @@ public:
     void register_client(Badge<ResourceClient>, ResourceClient&);
     void unregister_client(Badge<ResourceClient>, ResourceClient&);
 
+    const String& encoding() const { return m_encoding; }
+    const String& mime_type() const { return m_mime_type; }
+
     void for_each_client(Function<void(ResourceClient&)>);
 
     void did_load(Badge<ResourceLoader>, const ByteBuffer& data, const HashMap<String, String, CaseInsensitiveStringTraits>& headers);
@@ -87,6 +90,8 @@ private:
     bool m_loaded { false };
     bool m_failed { false };
     String m_error;
+    String m_encoding;
+    String m_mime_type;
     HashMap<String, String, CaseInsensitiveStringTraits> m_response_headers;
     HashTable<ResourceClient*> m_clients;
 };