From 3aaa1c1df7946edcd95b73a3e4557af68064d0d1 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Mon, 8 Jan 2024 01:36:33 -0800 Subject: [PATCH] LibWeb/MimeSniff: Implement MP4 signature matching --- Tests/LibWeb/TestMimeSniff.cpp | 35 +++++++++++- .../Libraries/LibWeb/MimeSniff/Resource.cpp | 54 ++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/Tests/LibWeb/TestMimeSniff.cpp b/Tests/LibWeb/TestMimeSniff.cpp index 5331e8f80e1..540cbacc654 100644 --- a/Tests/LibWeb/TestMimeSniff.cpp +++ b/Tests/LibWeb/TestMimeSniff.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Kemal Zebari . + * Copyright (c) 2023-2024, Kemal Zebari . * * SPDX-License-Identifier: BSD-2-Clause */ @@ -247,6 +247,39 @@ TEST_CASE(determine_computed_mime_type_in_audio_or_video_sniffing_context) EXPECT_EQ(mime_type, computed_mime_type.essence()); } +TEST_CASE(determine_computed_mime_type_when_trying_to_match_mp4_signature) +{ + HashMap> mime_type_to_headers_map; + + mime_type_to_headers_map.set("application/octet-stream"sv, { + // Payload length < 12. + "!= 12"sv, + // Payload length < box size. + "\x00\x00\x00\x1F\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A"sv, + // Box size % 4 != 0. + "\x00\x00\x00\x0D\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"sv, + // 4 bytes after box size header != "ftyp". + "\x00\x00\x00\x0C\x00\x00\x00\x00\x00\x00\x00\x00"sv, + // Sequence "mp4" couldn't be found in ftyp box. + "\x00\x00\x00\x18\x66\x74\x79\x70isom\x00\x00\x00\x00\x61\x76\x63\x31\x00\x00\x00\x00"sv, + }); + mime_type_to_headers_map.set("video/mp4"sv, { + // 3 bytes after "ftyp" sequence == "mp4". + "\x00\x00\x00\x0C\x66\x74\x79\x70mp42"sv, + // "mp4" sequence found while executing while loop (this input covers entire loop) + "\x00\x00\x00\x18\x66\x74\x79\x70isom\x00\x00\x00\x00\x61\x76\x63\x31mp41"sv, + }); + + for (auto const& mime_type_to_headers : mime_type_to_headers_map) { + auto mime_type = mime_type_to_headers.key; + + for (auto const& header : mime_type_to_headers.value) { + auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo })); + EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized())); + } + } +} + TEST_CASE(determine_computed_mime_type_in_a_font_context) { // Cover case where supplied type is an XML MIME type. diff --git a/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp b/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp index 6351d7d7d6f..150e4f88070 100644 --- a/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp +++ b/Userland/Libraries/LibWeb/MimeSniff/Resource.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Kemal Zebari . + * Copyright (c) 2023-2024, Kemal Zebari . * * SPDX-License-Identifier: BSD-2-Clause */ @@ -136,6 +136,53 @@ ErrorOr> match_an_image_type_pattern(ReadonlyBytes input) return OptionalNone {}; } +// https://mimesniff.spec.whatwg.org/#signature-for-mp4 +bool matches_mp4_signature(ReadonlyBytes sequence) +{ + // 1. Let sequence be the byte sequence to be matched, where sequence[s] is byte s in sequence and sequence[0] is the first byte in sequence. + + // 2. Let length be the number of bytes in sequence. + auto length = sequence.size(); + + // 3. If length is less than 12, return false. + if (length < 12) + return false; + + // 4. Let box-size be the four bytes from sequence[0] to sequence[3], interpreted as a 32-bit unsigned big-endian integer. + u32 box_size = 0; + box_size |= static_cast(sequence[0] << 24); + box_size |= box_size + static_cast(sequence[1] << 16); + box_size |= box_size + static_cast(sequence[2] << 8); + box_size |= box_size + sequence[3]; + + // 5. If length is less than box-size or if box-size modulo 4 is not equal to 0, return false. + if ((length < box_size) || (box_size % 4 != 0)) + return false; + + // 6. If the four bytes from sequence[4] to sequence[7] are not equal to 0x66 0x74 0x79 0x70 ("ftyp"), return false. + if (sequence.slice(4, 4) != "\x66\x74\x79\x70"sv.bytes()) + return false; + + // 7. If the three bytes from sequence[8] to sequence[10] are equal to 0x6D 0x70 0x34 ("mp4"), return true. + if (sequence.slice(8, 3) == "\x6D\x70\x34"sv.bytes()) + return true; + + // 8. Let bytes-read be 16. + u32 bytes_read = 16; + + // 9. While bytes-read is less than box-size, continuously loop through these steps: + // 1. If the three bytes from sequence[bytes-read] to sequence[bytes-read + 2] are equal to 0x6D 0x70 0x34 ("mp4"), return true. + // 2. Increment bytes-read by 4. + while (bytes_read < box_size) { + if (sequence.slice(bytes_read, 3) == "\x6D\x70\x34"sv.bytes()) + return true; + bytes_read += 4; + } + + // 10. Return false. + return false; +} + // https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern ErrorOr> match_an_audio_or_video_type_pattern(ReadonlyBytes input) { @@ -174,7 +221,10 @@ ErrorOr> match_an_audio_or_video_type_pattern(ReadonlyBytes i return MimeType::parse(row.mime_type); } - // FIXME: 2. If input matches the signature for MP4, return "video/mp4". + // 2. If input matches the signature for MP4, return "video/mp4". + if (matches_mp4_signature(input)) + return MimeType::create("video"_string, "mp4"_string); + // FIXME: 3. If input matches the signature for WebM, return "video/webm". // FIXME: 4. If input matches the signature for MP3 without ID3, return "audio/mpeg".