浏览代码

LibWeb/MimeSniff: Port MimeType to new String

Linus Groh 2 年之前
父节点
当前提交
2d7ce38ee2

+ 1 - 1
Userland/Libraries/LibWeb/Fetch/Body.cpp

@@ -108,7 +108,7 @@ WebIDL::ExceptionOr<JS::Value> package_data(JS::Realm& realm, ByteBuffer bytes,
     case PackageDataType::Blob: {
         // Return a Blob whose contents are bytes and type attribute is mimeType.
         // NOTE: If extracting the mime type returns failure, other browsers set it to an empty string - not sure if that's spec'd.
-        auto mime_type_string = mime_type.has_value() ? TRY_OR_THROW_OOM(vm, String::from_deprecated_string(mime_type->serialized())) : String {};
+        auto mime_type_string = mime_type.has_value() ? TRY_OR_THROW_OOM(vm, mime_type->serialized()) : String {};
         return TRY(FileAPI::Blob::create(realm, move(bytes), move(mime_type_string)));
     }
     case PackageDataType::FormData:

+ 5 - 5
Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp

@@ -322,7 +322,7 @@ Optional<MimeSniff::MimeType> HeaderList::extract_mime_type() const
     // 6. For each value of values:
     for (auto const& value : *values) {
         // 1. Let temporaryMimeType be the result of parsing value.
-        auto temporary_mime_type = MimeSniff::MimeType::parse(value);
+        auto temporary_mime_type = MimeSniff::MimeType::parse(value).release_value_but_fixme_should_propagate_errors();
 
         // 2. If temporaryMimeType is failure or its essence is "*/*", then continue.
         if (!temporary_mime_type.has_value() || temporary_mime_type->essence() == "*/*"sv)
@@ -339,14 +339,14 @@ Optional<MimeSniff::MimeType> HeaderList::extract_mime_type() const
             // 2. If mimeType’s parameters["charset"] exists, then set charset to mimeType’s parameters["charset"].
             auto it = mime_type->parameters().find("charset"sv);
             if (it != mime_type->parameters().end())
-                charset = String::from_deprecated_string(it->value).release_value_but_fixme_should_propagate_errors();
+                charset = it->value;
 
             // 3. Set essence to mimeType’s essence.
-            essence = String::from_deprecated_string(mime_type->essence()).release_value_but_fixme_should_propagate_errors();
+            essence = mime_type->essence();
         }
         // 5. Otherwise, if mimeType’s parameters["charset"] does not exist, and charset is non-null, set mimeType’s parameters["charset"] to charset.
         else if (!mime_type->parameters().contains("charset"sv) && charset.has_value()) {
-            mime_type->set_parameter("charset"sv, charset->to_deprecated_string());
+            mime_type->set_parameter("charset"_string.release_value_but_fixme_should_propagate_errors(), charset.release_value()).release_value_but_fixme_should_propagate_errors();
         }
     }
 
@@ -457,7 +457,7 @@ bool is_cors_safelisted_request_header(Header const& header)
             return false;
 
         // 2. Let mimeType be the result of parsing the result of isomorphic decoding value.
-        auto mime_type = MimeSniff::MimeType::parse(StringView { value });
+        auto mime_type = MimeSniff::MimeType::parse(StringView { value }).release_value_but_fixme_should_propagate_errors();
 
         // 3. If mimeType is failure, then return false.
         if (!mime_type.has_value())

+ 1 - 1
Userland/Libraries/LibWeb/Fetch/Infrastructure/MimeTypeBlocking.cpp

@@ -24,7 +24,7 @@ RequestOrResponseBlocking should_response_to_request_be_blocked_due_to_its_mime_
     // 4. If destination is script-like and one of the following is true, then return blocked:
     if (request.destination_is_script_like() && (
             // - mimeType’s essence starts with "audio/", "image/", or "video/".
-            any_of(Array { "audio/"sv, "image/"sv, "video/"sv }, [&](auto prefix) { return mime_type->essence().starts_with(prefix); })
+            any_of(Array { "audio/"sv, "image/"sv, "video/"sv }, [&](auto prefix) { return mime_type->essence().starts_with_bytes(prefix); })
             // - mimeType’s essence is "text/csv".
             || mime_type->essence() == "text/csv"sv)) {
         return RequestOrResponseBlocking::Blocked;

+ 2 - 2
Userland/Libraries/LibWeb/HTML/HTMLObjectElement.cpp

@@ -218,12 +218,12 @@ void HTMLObjectElement::resource_did_load()
 
 static bool is_xml_mime_type(StringView resource_type)
 {
-    auto mime_type = MimeSniff::MimeType::parse(resource_type);
+    auto mime_type = MimeSniff::MimeType::parse(resource_type).release_value_but_fixme_should_propagate_errors();
     if (!mime_type.has_value())
         return false;
 
     // An XML MIME type is any MIME type whose subtype ends in "+xml" or whose essence is "text/xml" or "application/xml". [RFC7303]
-    if (mime_type->subtype().ends_with("+xml"sv))
+    if (mime_type->subtype().ends_with_bytes("+xml"sv))
         return true;
 
     return mime_type->essence().is_one_of("text/xml"sv, "application/xml"sv);

+ 50 - 42
Userland/Libraries/LibWeb/MimeSniff/MimeType.cpp

@@ -11,15 +11,16 @@
 #include <AK/String.h>
 #include <AK/StringBuilder.h>
 #include <LibWeb/Fetch/Infrastructure/HTTP.h>
+#include <LibWeb/Infra/Strings.h>
 #include <LibWeb/MimeSniff/MimeType.h>
 
 namespace Web::MimeSniff {
 
 // https://mimesniff.spec.whatwg.org/#javascript-mime-type-essence-match
-bool is_javascript_mime_type_essence_match(DeprecatedString const& string)
+bool is_javascript_mime_type_essence_match(StringView string)
 {
     // NOTE: The mime type parser automatically lowercases the essence.
-    auto type = MimeType::parse(string);
+    auto type = MimeType::parse(string).release_value_but_fixme_should_propagate_errors();
     if (!type.has_value())
         return false;
     return type->is_javascript();
@@ -38,7 +39,7 @@ static bool contains_only_http_quoted_string_token_code_points(StringView string
     return true;
 }
 
-MimeType::MimeType(DeprecatedString type, DeprecatedString subtype)
+MimeType::MimeType(String type, String subtype)
     : m_type(move(type))
     , m_subtype(move(subtype))
 {
@@ -63,8 +64,15 @@ static bool contains_only_http_token_code_points(StringView string)
     return true;
 }
 
+ErrorOr<MimeType> MimeType::create(String type, String value)
+{
+    auto mime_type = MimeType { move(type), move(value) };
+    mime_type.m_cached_essence = TRY(String::formatted("{}/{}", mime_type.m_type, mime_type.m_subtype));
+    return mime_type;
+}
+
 // https://mimesniff.spec.whatwg.org/#parse-a-mime-type
-Optional<MimeType> MimeType::parse(StringView string)
+ErrorOr<Optional<MimeType>> MimeType::parse(StringView string)
 {
     // 1. Remove any leading and trailing HTTP whitespace from input.
     auto trimmed_string = string.trim(Fetch::Infrastructure::HTTP_WHITESPACE, TrimMode::Both);
@@ -77,11 +85,11 @@ Optional<MimeType> MimeType::parse(StringView string)
 
     // 4. If type is the empty string or does not solely contain HTTP token code points, then return failure.
     if (type.is_empty() || !contains_only_http_token_code_points(type))
-        return {};
+        return OptionalNone {};
 
     // 5. If position is past the end of input, then return failure.
     if (lexer.is_eof())
-        return {};
+        return OptionalNone {};
 
     // 6. Advance position by 1. (This skips past U+002F (/).)
     lexer.ignore(1);
@@ -94,10 +102,10 @@ Optional<MimeType> MimeType::parse(StringView string)
 
     // 9. If subtype is the empty string or does not solely contain HTTP token code points, then return failure.
     if (subtype.is_empty() || !contains_only_http_token_code_points(subtype))
-        return {};
+        return OptionalNone {};
 
     // 10. Let mimeType be a new MIME type record whose type is type, in ASCII lowercase, and subtype is subtype, in ASCII lowercase.
-    auto mime_type = MimeType(type.to_lowercase_string(), subtype.to_lowercase_string());
+    auto mime_type = TRY(MimeType::create(TRY(Infra::to_ascii_lower_case(type)), TRY(Infra::to_ascii_lower_case(subtype))));
 
     // 11. While position is not past the end of input:
     while (!lexer.is_eof()) {
@@ -108,13 +116,12 @@ Optional<MimeType> MimeType::parse(StringView string)
         lexer.ignore_while(is_any_of(Fetch::Infrastructure::HTTP_WHITESPACE));
 
         // 3. Let parameterName be the result of collecting a sequence of code points that are not U+003B (;) or U+003D (=) from input, given position.
-        auto parameter_name = lexer.consume_until([](char ch) {
+        auto parameter_name_view = lexer.consume_until([](char ch) {
             return ch == ';' || ch == '=';
         });
 
         // 4. Set parameterName to parameterName, in ASCII lowercase.
-        // NOTE: Reassigning to parameter_name here causes a UAF when trying to use parameter_name down the road.
-        auto lowercase_parameter_name = parameter_name.to_lowercase_string();
+        auto parameter_name = TRY(Infra::to_ascii_lower_case(parameter_name_view));
 
         // 5. If position is not past the end of input, then:
         if (!lexer.is_eof()) {
@@ -132,12 +139,12 @@ Optional<MimeType> MimeType::parse(StringView string)
             break;
 
         // 7. Let parameterValue be null.
-        DeprecatedString parameter_value;
+        String parameter_value;
 
         // 8. If the code point at position within input is U+0022 ("), then:
         if (lexer.peek() == '"') {
             // 1. Set parameterValue to the result of collecting an HTTP quoted string from input, given position and the extract-value flag.
-            parameter_value = Fetch::Infrastructure::collect_an_http_quoted_string(lexer, Fetch::Infrastructure::HttpQuotedStringExtractValue::Yes).release_value_but_fixme_should_propagate_errors().to_deprecated_string();
+            parameter_value = TRY(Fetch::Infrastructure::collect_an_http_quoted_string(lexer, Fetch::Infrastructure::HttpQuotedStringExtractValue::Yes));
 
             // 2. Collect a sequence of code points that are not U+003B (;) from input, given position.
             lexer.ignore_until(';');
@@ -146,10 +153,10 @@ Optional<MimeType> MimeType::parse(StringView string)
         // 9. Otherwise:
         else {
             // 1. Set parameterValue to the result of collecting a sequence of code points that are not U+003B (;) from input, given position.
-            parameter_value = lexer.consume_until(';');
+            parameter_value = TRY(String::from_utf8(lexer.consume_until(';')));
 
             // 2. Remove any trailing HTTP whitespace from parameterValue.
-            parameter_value = parameter_value.trim(Fetch::Infrastructure::HTTP_WHITESPACE, TrimMode::Right);
+            parameter_value = TRY(parameter_value.trim(Fetch::Infrastructure::HTTP_WHITESPACE, TrimMode::Right));
 
             // 3. If parameterValue is the empty string, then continue.
             if (parameter_value.is_empty())
@@ -157,77 +164,78 @@ Optional<MimeType> MimeType::parse(StringView string)
         }
 
         // 10. If all of the following are true
-        //       - parameterName is not the empty string
-        //       - parameterName solely contains HTTP token code points
-        //       - parameterValue solely contains HTTP quoted-string token code points
-        //       - mimeType’s parameters[parameterName] does not exist
-        //     then set mimeType’s parameters[parameterName] to parameterValue.
-        if (!parameter_name.is_empty()
-            && contains_only_http_token_code_points(lowercase_parameter_name)
+        if (
+            // - parameterName is not the empty string
+            !parameter_name.is_empty()
+            // - parameterName solely contains HTTP token code points
+            && contains_only_http_token_code_points(parameter_name)
+            // - parameterValue solely contains HTTP quoted-string token code points
             && contains_only_http_quoted_string_token_code_points(parameter_value)
-            && !mime_type.m_parameters.contains(lowercase_parameter_name)) {
-            mime_type.m_parameters.set(lowercase_parameter_name, parameter_value);
+            // - mimeType’s parameters[parameterName] does not exist
+            && !mime_type.m_parameters.contains(parameter_name)) {
+            // then set mimeType’s parameters[parameterName] to parameterValue.
+            TRY(mime_type.m_parameters.try_set(move(parameter_name), move(parameter_value)));
         }
     }
 
     // 12. Return mimeType.
-    return Optional<MimeType> { move(mime_type) };
+    return mime_type;
 }
 
 // https://mimesniff.spec.whatwg.org/#mime-type-essence
-DeprecatedString MimeType::essence() const
+String const& MimeType::essence() const
 {
     // The essence of a MIME type mimeType is mimeType’s type, followed by U+002F (/), followed by mimeType’s subtype.
-    // FIXME: I believe this can easily be cached as I don't think anything directly changes the type and subtype.
-    return DeprecatedString::formatted("{}/{}", m_type, m_subtype);
+    return m_cached_essence;
 }
 
 // https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
-DeprecatedString MimeType::serialized() const
+ErrorOr<String> MimeType::serialized() const
 {
     // 1. Let serialization be the concatenation of mimeType’s type, U+002F (/), and mimeType’s subtype.
     StringBuilder serialization;
-    serialization.append(m_type);
-    serialization.append('/');
-    serialization.append(m_subtype);
+    TRY(serialization.try_append(m_type));
+    TRY(serialization.try_append('/'));
+    TRY(serialization.try_append(m_subtype));
 
     // 2. For each name → value of mimeType’s parameters:
     for (auto [name, value] : m_parameters) {
         // 1. Append U+003B (;) to serialization.
-        serialization.append(';');
+        TRY(serialization.try_append(';'));
 
         // 2. Append name to serialization.
-        serialization.append(name);
+        TRY(serialization.try_append(name));
 
         // 3. Append U+003D (=) to serialization.
-        serialization.append('=');
+        TRY(serialization.try_append('='));
 
         // 4. If value does not solely contain HTTP token code points or value is the empty string, then:
         if (!contains_only_http_token_code_points(value) || value.is_empty()) {
             // 1. Precede each occurrence of U+0022 (") or U+005C (\) in value with U+005C (\).
-            value = value.replace("\\"sv, "\\\\"sv, ReplaceMode::All);
-            value = value.replace("\""sv, "\\\""sv, ReplaceMode::All);
+            value = TRY(value.replace("\\"sv, "\\\\"sv, ReplaceMode::All));
+            value = TRY(value.replace("\""sv, "\\\""sv, ReplaceMode::All));
 
             // 2. Prepend U+0022 (") to value.
             // 3. Append U+0022 (") to value.
-            value = DeprecatedString::formatted("\"{}\"", value);
+            value = TRY(String::formatted("\"{}\"", value));
         }
 
         // 5. Append value to serialization.
-        serialization.append(value);
+        TRY(serialization.try_append(value));
     }
 
     // 3. Return serialization.
-    return serialization.to_deprecated_string();
+    return serialization.to_string();
 }
 
-void MimeType::set_parameter(DeprecatedString const& name, DeprecatedString const& value)
+ErrorOr<void> MimeType::set_parameter(String name, String value)
 {
     // https://mimesniff.spec.whatwg.org/#parameters
     // A MIME type’s parameters is an ordered map whose keys are ASCII strings and values are strings limited to HTTP quoted-string token code points.
     VERIFY(contains_only_http_quoted_string_token_code_points(name));
     VERIFY(contains_only_http_quoted_string_token_code_points(value));
-    m_parameters.set(name, value);
+    TRY(m_parameters.try_set(move(name), move(value)));
+    return {};
 }
 
 // https://mimesniff.spec.whatwg.org/#javascript-mime-type

+ 18 - 13
Userland/Libraries/LibWeb/MimeSniff/MimeType.h

@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2022, Luke Wilde <lukew@serenityos.org>
- * Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
+ * Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
@@ -12,39 +12,44 @@
 
 namespace Web::MimeSniff {
 
-bool is_javascript_mime_type_essence_match(DeprecatedString const&);
+bool is_javascript_mime_type_essence_match(StringView);
 
 // https://mimesniff.spec.whatwg.org/#mime-type
 class MimeType {
 public:
-    static Optional<MimeType> parse(StringView);
+    static ErrorOr<MimeType> create(String type, String subtype);
+    static ErrorOr<Optional<MimeType>> parse(StringView);
 
-    MimeType(DeprecatedString type, DeprecatedString subtype);
     ~MimeType();
 
-    DeprecatedString const& type() const { return m_type; }
-    DeprecatedString const& subtype() const { return m_subtype; }
-    OrderedHashMap<DeprecatedString, DeprecatedString> const& parameters() const { return m_parameters; }
+    String const& type() const { return m_type; }
+    String const& subtype() const { return m_subtype; }
+    OrderedHashMap<String, String> const& parameters() const { return m_parameters; }
 
     bool is_javascript() const;
 
-    void set_parameter(DeprecatedString const& name, DeprecatedString const& value);
+    ErrorOr<void> set_parameter(String name, String value);
 
-    DeprecatedString essence() const;
-    DeprecatedString serialized() const;
+    String const& essence() const;
+    ErrorOr<String> serialized() const;
 
 private:
+    MimeType(String type, String subtype);
+
     // https://mimesniff.spec.whatwg.org/#type
     // A MIME type’s type is a non-empty ASCII string.
-    DeprecatedString m_type;
+    String m_type;
 
     // https://mimesniff.spec.whatwg.org/#subtype
     // A MIME type’s subtype is a non-empty ASCII string.
-    DeprecatedString m_subtype;
+    String m_subtype;
 
     // https://mimesniff.spec.whatwg.org/#parameters
     // A MIME type’s parameters is an ordered map whose keys are ASCII strings and values are strings limited to HTTP quoted-string token code points. It is initially empty.
-    OrderedHashMap<DeprecatedString, DeprecatedString> m_parameters;
+    OrderedHashMap<String, String> m_parameters;
+
+    // Non-standard, but computed once upfront.
+    String m_cached_essence;
 };
 
 }

+ 9 - 7
Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp

@@ -161,7 +161,7 @@ WebIDL::ExceptionOr<JS::Value> XMLHttpRequest::response()
     }
     // 6. Otherwise, if this’s response type is "blob", set this’s response object to a new Blob object representing this’s received bytes with type set to the result of get a final MIME type for this.
     else if (m_response_type == Bindings::XMLHttpRequestResponseType::Blob) {
-        auto mime_type_as_string = TRY_OR_THROW_OOM(vm, String::from_deprecated_string(get_final_mime_type().serialized()));
+        auto mime_type_as_string = TRY_OR_THROW_OOM(vm, get_final_mime_type().serialized());
         auto blob_part = TRY(FileAPI::Blob::create(realm(), m_received_bytes, move(mime_type_as_string)));
         auto blob = TRY(FileAPI::Blob::create(realm(), Vector<FileAPI::BlobPart> { JS::make_handle(*blob_part) }));
         m_response_object = JS::Value(blob.ptr());
@@ -207,7 +207,7 @@ DeprecatedString XMLHttpRequest::get_text_response() const
         if (mime_type.essence().is_one_of("text/xml"sv, "application/xml"sv))
             return true;
 
-        return mime_type.subtype().ends_with("+xml"sv);
+        return mime_type.subtype().ends_with_bytes("+xml"sv);
     };
 
     // 3. If xhr’s response type is the empty string, charset is null, and the result of get a final MIME type for xhr is an XML MIME type,
@@ -256,7 +256,7 @@ MimeSniff::MimeType XMLHttpRequest::get_response_mime_type() const
 
     // 2. If mimeType is failure, then set mimeType to text/xml.
     if (!mime_type.has_value())
-        return MimeSniff::MimeType("text"sv, "xml"sv);
+        return MimeSniff::MimeType::create("text"_string.release_value_but_fixme_should_propagate_errors(), "xml"_short_string).release_value_but_fixme_should_propagate_errors();
 
     // 3. Return mimeType.
     return mime_type.release_value();
@@ -274,13 +274,13 @@ Optional<StringView> XMLHttpRequest::get_final_encoding() const
     // 3. If responseMIME’s parameters["charset"] exists, then set label to it.
     auto response_mime_charset_it = response_mime.parameters().find("charset"sv);
     if (response_mime_charset_it != response_mime.parameters().end())
-        label = response_mime_charset_it->value;
+        label = response_mime_charset_it->value.to_deprecated_string();
 
     // 4. If xhr’s override MIME type’s parameters["charset"] exists, then set label to it.
     if (m_override_mime_type.has_value()) {
         auto override_mime_charset_it = m_override_mime_type->parameters().find("charset"sv);
         if (override_mime_charset_it != m_override_mime_type->parameters().end())
-            label = override_mime_charset_it->value;
+            label = override_mime_charset_it->value.to_deprecated_string();
     }
 
     // 5. If label is null, then return null.
@@ -605,16 +605,18 @@ DeprecatedString XMLHttpRequest::get_all_response_headers() const
 // https://xhr.spec.whatwg.org/#dom-xmlhttprequest-overridemimetype
 WebIDL::ExceptionOr<void> XMLHttpRequest::override_mime_type(DeprecatedString const& mime)
 {
+    auto& vm = this->vm();
+
     // 1. If this’s state is loading or done, then throw an "InvalidStateError" DOMException.
     if (m_state == State::Loading || m_state == State::Done)
         return WebIDL::InvalidStateError::create(realm(), "Cannot override MIME type when state is Loading or Done.");
 
     // 2. Set this’s override MIME type to the result of parsing mime.
-    m_override_mime_type = MimeSniff::MimeType::parse(mime);
+    m_override_mime_type = TRY_OR_THROW_OOM(vm, MimeSniff::MimeType::parse(mime));
 
     // 3. If this’s override MIME type is failure, then set this’s override MIME type to application/octet-stream.
     if (!m_override_mime_type.has_value())
-        m_override_mime_type = MimeSniff::MimeType("application"sv, "octet-stream"sv);
+        m_override_mime_type = TRY_OR_THROW_OOM(vm, MimeSniff::MimeType::create(TRY_OR_THROW_OOM(vm, "application"_string), TRY_OR_THROW_OOM(vm, "octet-stream"_string)));
 
     return {};
 }