LibWeb/MimeSniff: Update the MIME type sniffing algo to meet specs

The spec moved/added the xml and html checks to the beginning and
removed sniffing resource feeds.
This commit is contained in:
Kemal Zebari 2024-10-01 16:43:34 -07:00 committed by Tim Flynn
parent ea95e32539
commit b8a5e18a01
Notes: github-actions[bot] 2024-10-02 20:16:13 +00:00
2 changed files with 31 additions and 24 deletions

View file

@ -6,6 +6,7 @@
*/
#include <LibTest/TestCase.h>
#include <LibWeb/MimeSniff/MimeType.h>
#include <LibWeb/MimeSniff/Resource.h>
@ -28,10 +29,10 @@ TEST_CASE(determine_computed_mime_type_given_no_sniff_is_set)
TEST_CASE(determine_computed_mime_type_given_no_sniff_is_unset)
{
auto supplied_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "html"_string));
auto supplied_type = MUST(Web::MimeSniff::MimeType::create("application"_string, "x-this-is-a-test"_string));
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
EXPECT_EQ("application/octet-stream"sv, MUST(computed_mime_type.serialized()));
EXPECT_EQ("application/x-this-is-a-test"sv, MUST(computed_mime_type.serialized()));
}
TEST_CASE(determine_computed_mime_type_given_xml_mime_type_as_supplied_type)
@ -110,6 +111,19 @@ TEST_CASE(determine_computed_mime_type_given_supplied_type_that_is_an_apache_bug
}
}
TEST_CASE(determine_computed_mime_type_given_xml_or_html_supplied_type)
{
// With HTML supplied type.
auto config = Web::MimeSniff::SniffingConfiguration { .supplied_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "html"_string)) };
auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), config));
EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized()));
// With XML supplied type.
config = Web::MimeSniff::SniffingConfiguration { .supplied_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "xml"_string)) };
computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), config));
EXPECT_EQ("text/xml"sv, MUST(computed_mime_type.serialized()));
}
TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_context)
{
HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;

View file

@ -550,7 +550,14 @@ void Resource::read_the_resource_header(ReadonlyBytes data)
// https://mimesniff.spec.whatwg.org/#mime-type-sniffing-algorithm
ErrorOr<void> Resource::mime_type_sniffing_algorithm()
{
// 1. If the supplied MIME type is undefined or if the supplied MIME types essence
// 1. If the supplied MIME type is an XML MIME type or HTML MIME type, the computed MIME type is the supplied MIME type.
// Abort these steps.
if (m_supplied_mime_type.has_value() && (m_supplied_mime_type->is_xml() || m_supplied_mime_type->is_html())) {
m_computed_mime_type = m_supplied_mime_type.value();
return {};
}
// 2. If the supplied MIME type is undefined or if the supplied MIME types essence
// is "unknown/unknown", "application/unknown", or "*/*", execute the rules for
// identifying an unknown MIME type with the sniff-scriptable flag equal to the
// inverse of the no-sniff flag and abort these steps.
@ -559,58 +566,44 @@ ErrorOr<void> Resource::mime_type_sniffing_algorithm()
return {};
}
// 2. If the no-sniff flag is set, the computed MIME type is the supplied MIME type.
// 3. If the no-sniff flag is set, the computed MIME type is the supplied MIME type.
// Abort these steps.
if (m_no_sniff) {
m_computed_mime_type = m_supplied_mime_type.value();
return {};
}
// 3. If the check-for-apache-bug flag is set, execute the rules for distinguishing
// 4. If the check-for-apache-bug flag is set, execute the rules for distinguishing
// if a resource is text or binary and abort these steps.
if (m_check_for_apache_bug_flag) {
TRY(rules_for_distinguishing_if_a_resource_is_text_or_binary());
return {};
}
// 4. If the supplied MIME type is an XML MIME type, the computed MIME type is the supplied MIME type.
// Abort these steps.
if (m_supplied_mime_type->is_xml()) {
m_computed_mime_type = m_supplied_mime_type.value();
return {};
}
// 5. If the supplied MIME types essence is "text/html", execute the rules for distinguishing if a
// resource is a feed or HTML and abort these steps.
if (m_supplied_mime_type->essence() == "text/html") {
// FIXME: Execute the rules for distinguishing if a resource is a feed or HTML and abort these steps.
return {};
}
// FIXME: 6. If the supplied MIME type is an image MIME type supported by the user agent, let matched-type be
// FIXME: 5. If the supplied MIME type is an image MIME type supported by the user agent, let matched-type be
// the result of executing the image type pattern matching algorithm with the resource header as
// the byte sequence to be matched.
Optional<MimeType> matched_type;
// 7. If matched-type is not undefined, the computed MIME type is matched-type.
// 6. If matched-type is not undefined, the computed MIME type is matched-type.
// Abort these steps.
if (matched_type.has_value()) {
m_computed_mime_type = matched_type.release_value();
return {};
}
// FIXME: 8. If the supplied MIME type is an audio or video MIME type supported by the user agent, let matched-type be
// FIXME: 7. If the supplied MIME type is an audio or video MIME type supported by the user agent, let matched-type be
// the result of executing the audio or video type pattern matching algorithm with the resource header as
// the byte sequence to be matched.
// 9. If matched-type is not undefined, the computed MIME type is matched-type.
// 8. If matched-type is not undefined, the computed MIME type is matched-type.
// Abort these steps.
if (matched_type.has_value()) {
m_computed_mime_type = matched_type.release_value();
return {};
}
// 10. The computed MIME type is the supplied MIME type.
// 9. The computed MIME type is the supplied MIME type.
m_computed_mime_type = m_supplied_mime_type.value();
return {};