TestMimeSniff.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. /*
  2. * Copyright (c) 2023-2024, Kemal Zebari <kemalzebra@gmail.com>.
  3. * Copyright (c) 2024, Jamie Mansfield <jmansfield@cadixdev.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <LibTest/TestCase.h>
  8. #include <LibWeb/MimeSniff/MimeType.h>
  9. #include <LibWeb/MimeSniff/Resource.h>
  10. TEST_CASE(determine_computed_mime_type_given_no_sniff_is_set)
  11. {
  12. auto mime_type = Web::MimeSniff::MimeType::create("text"_string, "html"_string);
  13. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = mime_type, .no_sniff = true }));
  14. EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized()));
  15. // Cover the edge case in the context-specific sniffing algorithm.
  16. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  17. .sniffing_context = Web::MimeSniff::SniffingContext::Image,
  18. .supplied_type = mime_type,
  19. .no_sniff = true,
  20. }));
  21. EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized()));
  22. }
  23. TEST_CASE(determine_computed_mime_type_given_no_sniff_is_unset)
  24. {
  25. auto supplied_type = Web::MimeSniff::MimeType::create("application"_string, "x-this-is-a-test"_string);
  26. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
  27. EXPECT_EQ("application/x-this-is-a-test"sv, MUST(computed_mime_type.serialized()));
  28. }
  29. TEST_CASE(determine_computed_mime_type_given_xml_mime_type_as_supplied_type)
  30. {
  31. auto xml_mime_type = "application/rss+xml"sv;
  32. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(xml_mime_type)).release_value();
  33. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
  34. EXPECT_EQ(xml_mime_type, MUST(computed_mime_type.serialized()));
  35. }
  36. static void set_image_type_mappings(HashMap<StringView, Vector<StringView>>& mime_type_to_headers_map)
  37. {
  38. mime_type_to_headers_map.set("image/x-icon"sv, { "\x00\x00\x01\x00"sv, "\x00\x00\x02\x00"sv });
  39. mime_type_to_headers_map.set("image/bmp"sv, { "BM"sv });
  40. mime_type_to_headers_map.set("image/gif"sv, { "GIF87a"sv, "GIF89a"sv });
  41. mime_type_to_headers_map.set("image/webp"sv, { "RIFF\x00\x00\x00\x00WEBPVP"sv });
  42. mime_type_to_headers_map.set("image/png"sv, { "\x89PNG\x0D\x0A\x1A\x0A"sv });
  43. mime_type_to_headers_map.set("image/jpeg"sv, { "\xFF\xD8\xFF"sv });
  44. }
  45. static void set_audio_or_video_type_mappings(HashMap<StringView, Vector<StringView>>& mime_type_to_headers_map)
  46. {
  47. mime_type_to_headers_map.set("audio/aiff"sv, { "FORM\x00\x00\x00\x00\x41IFF"sv });
  48. mime_type_to_headers_map.set("audio/mpeg"sv, { "ID3"sv });
  49. mime_type_to_headers_map.set("application/ogg"sv, { "OggS\x00"sv });
  50. mime_type_to_headers_map.set("audio/midi"sv, { "MThd\x00\x00\x00\x06"sv });
  51. mime_type_to_headers_map.set("video/avi"sv, { "RIFF\x00\x00\x00\x00\x41\x56\x49\x20"sv });
  52. mime_type_to_headers_map.set("audio/wave"sv, { "RIFF\x00\x00\x00\x00WAVE"sv });
  53. }
  54. static void set_text_plain_type_mappings(HashMap<StringView, Vector<StringView>>& mime_type_to_headers_map)
  55. {
  56. mime_type_to_headers_map.set("text/plain"sv, {
  57. "\xFE\xFF\x00\x00"sv,
  58. "\xFF\xFE\x00\x00"sv,
  59. "\xEF\xBB\xBF\x00"sv,
  60. "Hello world!"sv,
  61. });
  62. }
  63. TEST_CASE(determine_computed_mime_type_given_supplied_type_that_is_an_apache_bug_mime_type)
  64. {
  65. Vector<StringView> apache_bug_mime_types = {
  66. "text/plain"sv,
  67. "text/plain; charset=ISO-8859-1"sv,
  68. "text/plain; charset=iso-8859-1"sv,
  69. "text/plain; charset=UTF-8"sv
  70. };
  71. // Cover all Apache bug MIME types.
  72. for (auto const& apache_bug_mime_type : apache_bug_mime_types) {
  73. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(apache_bug_mime_type)).release_value();
  74. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("Hello world!"sv.bytes(),
  75. Web::MimeSniff::SniffingConfiguration { .scheme = "http"sv, .supplied_type = supplied_type }));
  76. EXPECT_EQ("text/plain"sv, MUST(computed_mime_type.serialized()));
  77. }
  78. // Cover all code paths in "rules for distinguishing if a resource is text or binary".
  79. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  80. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  81. set_text_plain_type_mappings(mime_type_to_headers_map);
  82. auto supplied_type = Web::MimeSniff::MimeType::create("text"_string, "plain"_string);
  83. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  84. auto mime_type = mime_type_to_headers.key;
  85. for (auto const& header : mime_type_to_headers.value) {
  86. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(),
  87. Web::MimeSniff::SniffingConfiguration { .scheme = "http"sv, .supplied_type = supplied_type }));
  88. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  89. }
  90. }
  91. }
  92. TEST_CASE(determine_computed_mime_type_given_xml_or_html_supplied_type)
  93. {
  94. // With HTML supplied type.
  95. auto config = Web::MimeSniff::SniffingConfiguration { .supplied_type = Web::MimeSniff::MimeType::create("text"_string, "html"_string) };
  96. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), config));
  97. EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized()));
  98. // With XML supplied type.
  99. config = Web::MimeSniff::SniffingConfiguration { .supplied_type = Web::MimeSniff::MimeType::create("text"_string, "xml"_string) };
  100. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), config));
  101. EXPECT_EQ("text/xml"sv, MUST(computed_mime_type.serialized()));
  102. }
  103. TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_context)
  104. {
  105. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  106. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  107. mime_type_to_headers_map.set("text/html"sv, {
  108. "\x09\x09<!DOCTYPE HTML\x20"sv,
  109. "\x0A<HTML\x3E"sv,
  110. "\x0C<HEAD\x20"sv,
  111. "\x0D<SCRIPT>"sv,
  112. "\x20<IFRAME>"sv,
  113. "<H1>"sv,
  114. "<DIV>"sv,
  115. "<FONT>"sv,
  116. "<TABLE>"sv,
  117. "<A>"sv,
  118. "<STYLE>"sv,
  119. "<TITLE>"sv,
  120. "<B>"sv,
  121. "<BODY>"sv,
  122. "<BR>"sv,
  123. "<P>"sv,
  124. "<!-->"sv,
  125. });
  126. mime_type_to_headers_map.set("text/xml"sv, { "<?xml"sv });
  127. mime_type_to_headers_map.set("application/pdf"sv, { "%PDF-"sv });
  128. mime_type_to_headers_map.set("application/postscript"sv, { "%!PS-Adobe-"sv });
  129. set_text_plain_type_mappings(mime_type_to_headers_map);
  130. set_image_type_mappings(mime_type_to_headers_map);
  131. set_audio_or_video_type_mappings(mime_type_to_headers_map);
  132. mime_type_to_headers_map.set("application/x-gzip"sv, { "\x1F\x8B\x08"sv });
  133. mime_type_to_headers_map.set("application/zip"sv, { "PK\x03\x04"sv });
  134. mime_type_to_headers_map.set("application/x-rar-compressed"sv, { "Rar\x20\x1A\x07\x00"sv });
  135. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  136. auto mime_type = mime_type_to_headers.key;
  137. for (auto const& header : mime_type_to_headers.value) {
  138. // Test in a non-specific sniffing context.
  139. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes()));
  140. EXPECT_EQ(mime_type, computed_mime_type.essence());
  141. // Test sniffing in a browsing context.
  142. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Browsing }));
  143. EXPECT_EQ(mime_type, computed_mime_type.essence());
  144. }
  145. }
  146. }
  147. TEST_CASE(compute_mime_type_given_unknown_supplied_type)
  148. {
  149. Array<Web::MimeSniff::MimeType, 3> unknown_supplied_types = {
  150. Web::MimeSniff::MimeType::create("unknown"_string, "unknown"_string),
  151. Web::MimeSniff::MimeType::create("application"_string, "unknown"_string),
  152. Web::MimeSniff::MimeType::create("*"_string, "*"_string)
  153. };
  154. auto header_bytes = "<HTML>"sv.bytes();
  155. for (auto const& unknown_supplied_type : unknown_supplied_types) {
  156. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header_bytes, Web::MimeSniff::SniffingConfiguration { .supplied_type = unknown_supplied_type }));
  157. EXPECT_EQ("text/html"sv, computed_mime_type.essence());
  158. }
  159. }
  160. TEST_CASE(determine_computed_mime_type_in_image_sniffing_context)
  161. {
  162. // Cover case where supplied type is an XML MIME type.
  163. auto mime_type = "application/rss+xml"sv;
  164. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(mime_type)).release_value();
  165. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image, .supplied_type = supplied_type }));
  166. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  167. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  168. set_image_type_mappings(mime_type_to_headers_map);
  169. // Also consider a resource that is not an image.
  170. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  171. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  172. mime_type = mime_type_to_headers.key;
  173. for (auto const& header : mime_type_to_headers.value) {
  174. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image }));
  175. EXPECT_EQ(mime_type, computed_mime_type.essence());
  176. }
  177. }
  178. // Cover case where we aren't dealing with an image MIME type.
  179. mime_type = "text/html"sv;
  180. supplied_type = MUST(Web::MimeSniff::MimeType::parse("text/html"sv)).release_value();
  181. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image, .supplied_type = supplied_type }));
  182. EXPECT_EQ(mime_type, computed_mime_type.essence());
  183. }
  184. TEST_CASE(determine_computed_mime_type_in_audio_or_video_sniffing_context)
  185. {
  186. // Cover case where supplied type is an XML MIME type.
  187. auto mime_type = "application/rss+xml"sv;
  188. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(mime_type)).release_value();
  189. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  190. .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo,
  191. .supplied_type = supplied_type,
  192. }));
  193. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  194. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  195. set_audio_or_video_type_mappings(mime_type_to_headers_map);
  196. // Also consider a resource that is not an audio or video.
  197. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  198. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  199. auto mime_type = mime_type_to_headers.key;
  200. for (auto const& header : mime_type_to_headers.value) {
  201. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo }));
  202. EXPECT_EQ(mime_type, computed_mime_type.essence());
  203. }
  204. }
  205. // Cover case where we aren't dealing with an audio or video MIME type.
  206. mime_type = "text/html"sv;
  207. supplied_type = MUST(Web::MimeSniff::MimeType::parse("text/html"sv)).release_value();
  208. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  209. .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo,
  210. .supplied_type = supplied_type,
  211. }));
  212. EXPECT_EQ(mime_type, computed_mime_type.essence());
  213. }
  214. TEST_CASE(determine_computed_mime_type_when_trying_to_match_mp4_signature)
  215. {
  216. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  217. mime_type_to_headers_map.set("application/octet-stream"sv, {
  218. // Payload length < 12.
  219. "!= 12"sv,
  220. // Payload length < box size.
  221. "\x00\x00\x00\x1F\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A"sv,
  222. // Box size % 4 != 0.
  223. "\x00\x00\x00\x0D\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"sv,
  224. // 4 bytes after box size header != "ftyp".
  225. "\x00\x00\x00\x0C\x00\x00\x00\x00\x00\x00\x00\x00"sv,
  226. // Sequence "mp4" couldn't be found in ftyp box.
  227. "\x00\x00\x00\x18\x66\x74\x79\x70isom\x00\x00\x00\x00\x61\x76\x63\x31\x00\x00\x00\x00"sv,
  228. });
  229. mime_type_to_headers_map.set("video/mp4"sv, {
  230. // 3 bytes after "ftyp" sequence == "mp4".
  231. "\x00\x00\x00\x0C\x66\x74\x79\x70mp42"sv,
  232. // "mp4" sequence found while executing while loop (this input covers entire loop)
  233. "\x00\x00\x00\x18\x66\x74\x79\x70isom\x00\x00\x00\x00\x61\x76\x63\x31mp41"sv,
  234. });
  235. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  236. auto mime_type = mime_type_to_headers.key;
  237. for (auto const& header : mime_type_to_headers.value) {
  238. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo }));
  239. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  240. }
  241. }
  242. }
  243. TEST_CASE(determine_computed_mime_type_in_a_font_context)
  244. {
  245. // Cover case where supplied type is an XML MIME type.
  246. auto mime_type = "application/rss+xml"sv;
  247. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(mime_type)).release_value();
  248. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  249. .sniffing_context = Web::MimeSniff::SniffingContext::Font,
  250. .supplied_type = supplied_type,
  251. }));
  252. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  253. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  254. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  255. mime_type_to_headers_map.set("application/vnd.ms-fontobject"sv, { "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LP"sv });
  256. mime_type_to_headers_map.set("font/ttf"sv, { "\x00\x01\x00\x00"sv });
  257. mime_type_to_headers_map.set("font/otf"sv, { "OTTO"sv });
  258. mime_type_to_headers_map.set("font/collection"sv, { "ttcf"sv });
  259. mime_type_to_headers_map.set("font/woff"sv, { "wOFF"sv });
  260. mime_type_to_headers_map.set("font/woff2"sv, { "wOF2"sv });
  261. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  262. auto mime_type = mime_type_to_headers.key;
  263. for (auto const& header : mime_type_to_headers.value) {
  264. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Font }));
  265. EXPECT_EQ(mime_type, computed_mime_type.essence());
  266. }
  267. }
  268. // Cover case where we aren't dealing with a font MIME type.
  269. mime_type = "text/html"sv;
  270. supplied_type = MUST(Web::MimeSniff::MimeType::parse("text/html"sv)).release_value();
  271. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  272. .sniffing_context = Web::MimeSniff::SniffingContext::Font,
  273. .supplied_type = supplied_type,
  274. }));
  275. EXPECT_EQ(mime_type, computed_mime_type.essence());
  276. }
  277. TEST_CASE(determine_computed_mime_type_given_text_or_binary_context)
  278. {
  279. auto supplied_type = Web::MimeSniff::MimeType::create("text"_string, "plain"_string);
  280. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  281. .sniffing_context = Web::MimeSniff::SniffingContext::TextOrBinary,
  282. .supplied_type = supplied_type,
  283. }));
  284. EXPECT_EQ("application/octet-stream"sv, MUST(computed_mime_type.serialized()));
  285. }
  286. TEST_CASE(determine_minimised_mime_type)
  287. {
  288. HashMap<StringView, StringView> mime_type_to_minimised_mime_type_map;
  289. // JavaScript MIME types should always be "text/javascript".
  290. mime_type_to_minimised_mime_type_map.set("text/javascript"sv, "text/javascript"sv);
  291. mime_type_to_minimised_mime_type_map.set("application/javascript"sv, "text/javascript"sv);
  292. mime_type_to_minimised_mime_type_map.set("text/javascript; charset=utf-8"sv, "text/javascript"sv);
  293. // JSON MIME types should always be "application/json".
  294. mime_type_to_minimised_mime_type_map.set("application/json"sv, "application/json"sv);
  295. mime_type_to_minimised_mime_type_map.set("text/json"sv, "application/json"sv);
  296. mime_type_to_minimised_mime_type_map.set("application/json; charset=utf-8"sv, "application/json"sv);
  297. // SVG MIME types should always be "image/svg+xml".
  298. mime_type_to_minimised_mime_type_map.set("image/svg+xml"sv, "image/svg+xml"sv);
  299. mime_type_to_minimised_mime_type_map.set("image/svg+xml; charset=utf-8"sv, "image/svg+xml"sv);
  300. // XML MIME types should always be "application/xml".
  301. mime_type_to_minimised_mime_type_map.set("application/xml"sv, "application/xml"sv);
  302. mime_type_to_minimised_mime_type_map.set("text/xml"sv, "application/xml"sv);
  303. mime_type_to_minimised_mime_type_map.set("application/xml; charset=utf-8"sv, "application/xml"sv);
  304. // MIME types not supported by the user-agent should return an empty string.
  305. mime_type_to_minimised_mime_type_map.set("application/java-archive"sv, ""sv);
  306. mime_type_to_minimised_mime_type_map.set("application/zip"sv, ""sv);
  307. for (auto const& mime_type_to_minimised_mime_type : mime_type_to_minimised_mime_type_map) {
  308. auto mime_type = MUST(Web::MimeSniff::MimeType::parse(mime_type_to_minimised_mime_type.key)).release_value();
  309. EXPECT_EQ(mime_type_to_minimised_mime_type.value, Web::MimeSniff::minimise_a_supported_mime_type(mime_type));
  310. }
  311. }