소스 검색

LibWeb/MimeSniff: Implement `Resource`

See https://mimesniff.spec.whatwg.org/#resource
Kemal Zebari 1 년 전
부모
커밋
04e19df06a

+ 1 - 0
Tests/LibWeb/CMakeLists.txt

@@ -2,6 +2,7 @@ set(TEST_SOURCES
     TestCSSIDSpeed.cpp
     TestCSSPixels.cpp
     TestHTMLTokenizer.cpp
+    TestMimeSniff.cpp
     TestNumbers.cpp
 )
 

+ 32 - 0
Tests/LibWeb/TestMimeSniff.cpp

@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023, Kemal Zebari <kemalzebra@gmail.com>.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibTest/TestCase.h>
+
+#include <LibWeb/MimeSniff/Resource.h>
+
+TEST_CASE(determine_computed_mime_type_given_no_sniff_is_set)
+{
+    auto mime_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "html"_string));
+    auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = mime_type, .no_sniff = true }));
+
+    EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized()));
+}
+
+TEST_CASE(determine_computed_mime_type_given_no_sniff_is_unset)
+{
+    auto supplied_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "html"_string));
+    auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
+
+    EXPECT_EQ("application/octet-stream"sv, MUST(computed_mime_type.serialized()));
+
+    // Make sure we cover the XML code path in the mime type sniffing algorithm.
+    auto xml_mime_type = "application/rss+xml"sv;
+    supplied_type = MUST(Web::MimeSniff::MimeType::parse(xml_mime_type)).release_value();
+    computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
+
+    EXPECT_EQ(xml_mime_type, MUST(computed_mime_type.serialized()));
+}

+ 1 - 0
Userland/Libraries/LibWeb/CMakeLists.txt

@@ -472,6 +472,7 @@ set(SOURCES
     MathML/MathMLElement.cpp
     MathML/TagNames.cpp
     MimeSniff/MimeType.cpp
+    MimeSniff/Resource.cpp
     Namespace.cpp
     NavigationTiming/EntryNames.cpp
     NavigationTiming/PerformanceTiming.cpp

+ 172 - 0
Userland/Libraries/LibWeb/MimeSniff/Resource.cpp

@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2023, Kemal Zebari <kemalzebra@gmail.com>.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibWeb/Fetch/Infrastructure/URL.h>
+#include <LibWeb/MimeSniff/Resource.h>
+
+namespace Web::MimeSniff {
+
+ErrorOr<Resource> Resource::create(ReadonlyBytes data, SniffingConfiguration configuration)
+{
+    // NOTE: Non-standard but for cases where pattern matching fails, let's fall back to the safest MIME type.
+    auto default_computed_mime_type = TRY(MimeType::create("application"_string, "octet-stream"_string));
+    auto resource = Resource { data, configuration.no_sniff, move(default_computed_mime_type) };
+
+    TRY(resource.supplied_mime_type_detection_algorithm(configuration.scheme, move(configuration.supplied_type)));
+    TRY(resource.mime_type_sniffing_algorithm());
+
+    return resource;
+}
+
+ErrorOr<MimeType> Resource::sniff(ReadonlyBytes data, SniffingConfiguration configuration)
+{
+    auto resource = TRY(create(data, move(configuration)));
+    return move(resource.m_computed_mime_type);
+}
+
+Resource::Resource(ReadonlyBytes data, bool no_sniff, MimeType&& default_computed_mime_type)
+    : m_no_sniff(no_sniff)
+    , m_computed_mime_type(move(default_computed_mime_type))
+{
+    read_the_resource_header(data);
+}
+
+Resource::~Resource() = default;
+
+// https://mimesniff.spec.whatwg.org/#supplied-mime-type-detection-algorithm
+// NOTE: Parameters are non-standard.
+ErrorOr<void> Resource::supplied_mime_type_detection_algorithm(StringView scheme, Optional<MimeType> supplied_type)
+{
+    // 1. Let supplied-type be null.
+    // 2. If the resource is retrieved via HTTP, execute the following steps:
+    //        1. If one or more Content-Type headers are associated with the resource, execute the following steps:
+    //               1. Set supplied-type to the value of the last Content-Type header associated with the resource.
+    //               2. Set the check-for-apache-bug flag if supplied-type is exactly equal to one of the values in the following table:
+    // NOTE: Non-standard but this algorithm expects the caller to handle step 2.1.1.
+    if (supplied_type.has_value()) {
+        if (Fetch::Infrastructure::is_http_or_https_scheme(scheme)) {
+            static Array<StringView, 4> constexpr apache_bug_mime_types = {
+                "text/plain"sv,
+                "text/plain; charset=ISO-8859-1"sv,
+                "text/plain; charset=iso-8859-1"sv,
+                "text/plain; charset=UTF-8"sv
+            };
+
+            auto serialized_supplied_type = TRY(supplied_type->serialized());
+            for (auto apache_bug_mime_type : apache_bug_mime_types) {
+                if (serialized_supplied_type == apache_bug_mime_type) {
+                    m_check_for_apache_bug_flag = true;
+                    break;
+                }
+            }
+        }
+    }
+
+    // 3. If the resource is retrieved directly from the file system, set supplied-type
+    //    to the MIME type provided by the file system.
+    // 4. If the resource is retrieved via another protocol (such as FTP), set
+    //    supplied-type to the MIME type as determined by that protocol, if any.
+    // 5. If supplied-type is not a MIME type, the supplied MIME type is undefined.
+    //    Abort these steps.
+    // 6. The supplied MIME type is supplied-type.
+    // NOTE: The expectation is for the caller to handle these spec steps.
+    m_supplied_mime_type = supplied_type;
+
+    return {};
+}
+
+// https://mimesniff.spec.whatwg.org/#read-the-resource-header
+void Resource::read_the_resource_header(ReadonlyBytes data)
+{
+    // 1. Let buffer be a byte sequence.
+    ByteBuffer buffer;
+
+    // 2. Read bytes of the resource into buffer until one of the following conditions is met:
+    //      - the end of the resource is reached.
+    //      - the number of bytes in buffer is greater than or equal to 1445.
+    //      - a reasonable amount of time has elapsed, as determined by the user agent.
+    // FIXME: The spec expects us to be reading from a stream. Reimplement this spec step once
+    //        we have greater support for streaming in areas that calls on this API.
+    static size_t constexpr MAX_SNIFF_SIZE = 1445;
+    buffer.append(data.slice(0, min(data.size(), MAX_SNIFF_SIZE)));
+
+    // 3. The resource header is buffer.
+    m_resource_header = move(buffer);
+}
+
+// https://mimesniff.spec.whatwg.org/#mime-type-sniffing-algorithm
+ErrorOr<void> Resource::mime_type_sniffing_algorithm()
+{
+    // 1. If the supplied MIME type is undefined or if the supplied MIME type’s essence
+    //    is "unknown/unknown", "application/unknown", or "*/*", execute the rules for
+    //    identifying an unknown MIME type with the sniff-scriptable flag equal to the
+    //    inverse of the no-sniff flag and abort these steps.
+    if (!m_supplied_mime_type.has_value() || m_supplied_mime_type->essence().is_one_of("unknown/unknown", "application/unknown", "*/*")) {
+
+        // FIXME: Execute the rules for identifying an unknown MIME type with the
+        // sniff-scriptable flag equal to the inverse of the no-sniff flag and abort
+        // these steps.
+        return {};
+    }
+
+    // 2. If the no-sniff flag is set, the computed MIME type is the supplied MIME type.
+    //    Abort these steps.
+    if (m_no_sniff) {
+        m_computed_mime_type = m_supplied_mime_type.value();
+        return {};
+    }
+
+    // 3. If the check-for-apache-bug flag is set, execute the rules for distinguishing
+    //    if a resource is text or binary and abort these steps.
+    if (m_check_for_apache_bug_flag) {
+        // FIXME: Execute the rules for distinguishing if a resource is text or binary and abort these steps.
+        return {};
+    }
+
+    // 4. If the supplied MIME type is an XML MIME type, the computed MIME type is the supplied MIME type.
+    //    Abort these steps.
+    if (m_supplied_mime_type->is_xml()) {
+        m_computed_mime_type = m_supplied_mime_type.value();
+        return {};
+    }
+
+    // 5. If the supplied MIME type’s essence is "text/html", execute the rules for distinguishing if a
+    //    resource is a feed or HTML and abort these steps.
+    if (m_supplied_mime_type->essence() == "text/html") {
+        // FIXME: Execute the rules for distinguishing if a resource is a feed or HTML and abort these steps.
+        return {};
+    }
+
+    // FIXME: 6. If the supplied MIME type is an image MIME type supported by the user agent, let matched-type be
+    //    the result of executing the image type pattern matching algorithm with the resource header as
+    //    the byte sequence to be matched.
+    Optional<MimeType> matched_type;
+
+    // 7. If matched-type is not undefined, the computed MIME type is matched-type.
+    //    Abort these steps.
+    if (matched_type.has_value()) {
+        m_computed_mime_type = matched_type.release_value();
+        return {};
+    }
+
+    // FIXME: 8. If the supplied MIME type is an audio or video MIME type supported by the user agent, let matched-type be
+    //    the result of executing the audio or video type pattern matching algorithm with the resource header as
+    //    the byte sequence to be matched.
+
+    // 9. If matched-type is not undefined, the computed MIME type is matched-type.
+    //    Abort these steps.
+    if (matched_type.has_value()) {
+        m_computed_mime_type = matched_type.release_value();
+        return {};
+    }
+
+    // 10. The computed MIME type is the supplied MIME type.
+    m_computed_mime_type = m_supplied_mime_type.value();
+
+    return {};
+}
+
+}

+ 58 - 0
Userland/Libraries/LibWeb/MimeSniff/Resource.h

@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023, Kemal Zebari <kemalzebra@gmail.com>.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <LibWeb/MimeSniff/MimeType.h>
+
+namespace Web::MimeSniff {
+
+struct SniffingConfiguration {
+    StringView scheme { ""sv };
+    Optional<MimeType> supplied_type = {};
+    bool no_sniff { false };
+};
+
+// https://mimesniff.spec.whatwg.org/#resource
+class Resource {
+public:
+    static ErrorOr<Resource> create(ReadonlyBytes data, SniffingConfiguration configuration = {});
+    static ErrorOr<MimeType> sniff(ReadonlyBytes data, SniffingConfiguration configuration = {});
+
+    ~Resource();
+
+    MimeType const& computed_mime_type() const { return m_computed_mime_type; }
+    ReadonlyBytes resource_header() const { return m_resource_header; }
+
+private:
+    Resource(ReadonlyBytes data, bool no_sniff, MimeType&& default_computed_mime_type);
+
+    void read_the_resource_header(ReadonlyBytes data);
+    ErrorOr<void> supplied_mime_type_detection_algorithm(StringView scheme, Optional<MimeType> supplied_type);
+    ErrorOr<void> mime_type_sniffing_algorithm();
+
+    // https://mimesniff.spec.whatwg.org/#supplied-mime-type
+    // A supplied MIME type, the MIME type determined by the supplied MIME type detection algorithm.
+    Optional<MimeType> m_supplied_mime_type;
+
+    // https://mimesniff.spec.whatwg.org/#check-for-apache-bug-flag
+    // A check-for-apache-bug flag, which defaults to unset.
+    bool m_check_for_apache_bug_flag { false };
+
+    // https://mimesniff.spec.whatwg.org/#no-sniff-flag
+    // A no-sniff flag, which defaults to set if the user agent does not wish to perform sniffing on the resource and unset otherwise.
+    bool m_no_sniff { false };
+
+    // https://mimesniff.spec.whatwg.org/#computed-mime-type
+    // A computed MIME type, the MIME type determined by the MIME type sniffing algorithm.
+    MimeType m_computed_mime_type;
+
+    // https://mimesniff.spec.whatwg.org/#resource-header
+    // A resource header is the byte sequence at the beginning of a resource, as determined by reading the resource header.
+    ByteBuffer m_resource_header;
+};
+
+}