TestMimeSniff.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. * Copyright (c) 2023-2024, Kemal Zebari <kemalzebra@gmail.com>.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibTest/TestCase.h>
  7. #include <LibWeb/MimeSniff/Resource.h>
  8. TEST_CASE(determine_computed_mime_type_given_no_sniff_is_set)
  9. {
  10. auto mime_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "html"_string));
  11. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = mime_type, .no_sniff = true }));
  12. EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized()));
  13. // Cover the edge case in the context-specific sniffing algorithm.
  14. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  15. .sniffing_context = Web::MimeSniff::SniffingContext::Image,
  16. .supplied_type = mime_type,
  17. .no_sniff = true,
  18. }));
  19. EXPECT_EQ("text/html"sv, MUST(computed_mime_type.serialized()));
  20. }
  21. TEST_CASE(determine_computed_mime_type_given_no_sniff_is_unset)
  22. {
  23. auto supplied_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "html"_string));
  24. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
  25. EXPECT_EQ("application/octet-stream"sv, MUST(computed_mime_type.serialized()));
  26. }
  27. TEST_CASE(determine_computed_mime_type_given_xml_mime_type_as_supplied_type)
  28. {
  29. auto xml_mime_type = "application/rss+xml"sv;
  30. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(xml_mime_type)).release_value();
  31. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("\x00"sv.bytes(), Web::MimeSniff::SniffingConfiguration { .supplied_type = supplied_type }));
  32. EXPECT_EQ(xml_mime_type, MUST(computed_mime_type.serialized()));
  33. }
  34. static void set_image_type_mappings(HashMap<StringView, Vector<StringView>>& mime_type_to_headers_map)
  35. {
  36. mime_type_to_headers_map.set("image/x-icon"sv, { "\x00\x00\x01\x00"sv, "\x00\x00\x02\x00"sv });
  37. mime_type_to_headers_map.set("image/bmp"sv, { "BM"sv });
  38. mime_type_to_headers_map.set("image/gif"sv, { "GIF87a"sv, "GIF89a"sv });
  39. mime_type_to_headers_map.set("image/webp"sv, { "RIFF\x00\x00\x00\x00WEBPVP"sv });
  40. mime_type_to_headers_map.set("image/png"sv, { "\x89PNG\x0D\x0A\x1A\x0A"sv });
  41. mime_type_to_headers_map.set("image/jpeg"sv, { "\xFF\xD8\xFF"sv });
  42. }
  43. static void set_audio_or_video_type_mappings(HashMap<StringView, Vector<StringView>>& mime_type_to_headers_map)
  44. {
  45. mime_type_to_headers_map.set("audio/aiff"sv, { "FORM\x00\x00\x00\x00\x41IFF"sv });
  46. mime_type_to_headers_map.set("audio/mpeg"sv, { "ID3"sv });
  47. mime_type_to_headers_map.set("application/ogg"sv, { "OggS\x00"sv });
  48. mime_type_to_headers_map.set("audio/midi"sv, { "MThd\x00\x00\x00\x06"sv });
  49. mime_type_to_headers_map.set("video/avi"sv, { "RIFF\x00\x00\x00\x00\x41\x56\x49\x20"sv });
  50. mime_type_to_headers_map.set("audio/wave"sv, { "RIFF\x00\x00\x00\x00WAVE"sv });
  51. }
  52. static void set_text_plain_type_mappings(HashMap<StringView, Vector<StringView>>& mime_type_to_headers_map)
  53. {
  54. mime_type_to_headers_map.set("text/plain"sv, {
  55. "\xFE\xFF\x00\x00"sv,
  56. "\xFF\xFE\x00\x00"sv,
  57. "\xEF\xBB\xBF\x00"sv,
  58. "Hello world!"sv,
  59. });
  60. }
  61. TEST_CASE(determine_computed_mime_type_given_supplied_type_that_is_an_apache_bug_mime_type)
  62. {
  63. Vector<StringView> apache_bug_mime_types = {
  64. "text/plain"sv,
  65. "text/plain; charset=ISO-8859-1"sv,
  66. "text/plain; charset=iso-8859-1"sv,
  67. "text/plain; charset=UTF-8"sv
  68. };
  69. // Cover all Apache bug MIME types.
  70. for (auto const& apache_bug_mime_type : apache_bug_mime_types) {
  71. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(apache_bug_mime_type)).release_value();
  72. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff("Hello world!"sv.bytes(),
  73. Web::MimeSniff::SniffingConfiguration { .scheme = "http"sv, .supplied_type = supplied_type }));
  74. EXPECT_EQ("text/plain"sv, MUST(computed_mime_type.serialized()));
  75. }
  76. // Cover all code paths in "rules for distinguishing if a resource is text or binary".
  77. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  78. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  79. set_text_plain_type_mappings(mime_type_to_headers_map);
  80. auto supplied_type = MUST(Web::MimeSniff::MimeType::create("text"_string, "plain"_string));
  81. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  82. auto mime_type = mime_type_to_headers.key;
  83. for (auto const& header : mime_type_to_headers.value) {
  84. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(),
  85. Web::MimeSniff::SniffingConfiguration { .scheme = "http"sv, .supplied_type = supplied_type }));
  86. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  87. }
  88. }
  89. }
  90. TEST_CASE(determine_computed_mime_type_in_both_none_and_browsing_sniffing_context)
  91. {
  92. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  93. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  94. mime_type_to_headers_map.set("text/html"sv, {
  95. "\x09\x09<!DOCTYPE HTML\x20"sv,
  96. "\x0A<HTML\x3E"sv,
  97. "\x0C<HEAD\x20"sv,
  98. "\x0D<SCRIPT>"sv,
  99. "\x20<IFRAME>"sv,
  100. "<H1>"sv,
  101. "<DIV>"sv,
  102. "<FONT>"sv,
  103. "<TABLE>"sv,
  104. "<A>"sv,
  105. "<STYLE>"sv,
  106. "<TITLE>"sv,
  107. "<B>"sv,
  108. "<BODY>"sv,
  109. "<BR>"sv,
  110. "<P>"sv,
  111. "<!-->"sv,
  112. });
  113. mime_type_to_headers_map.set("text/xml"sv, { "<?xml"sv });
  114. mime_type_to_headers_map.set("application/pdf"sv, { "%PDF-"sv });
  115. mime_type_to_headers_map.set("application/postscript"sv, { "%!PS-Adobe-"sv });
  116. set_text_plain_type_mappings(mime_type_to_headers_map);
  117. set_image_type_mappings(mime_type_to_headers_map);
  118. set_audio_or_video_type_mappings(mime_type_to_headers_map);
  119. mime_type_to_headers_map.set("application/x-gzip"sv, { "\x1F\x8B\x08"sv });
  120. mime_type_to_headers_map.set("application/zip"sv, { "PK\x03\x04"sv });
  121. mime_type_to_headers_map.set("application/x-rar-compressed"sv, { "Rar\x20\x1A\x07\x00"sv });
  122. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  123. auto mime_type = mime_type_to_headers.key;
  124. for (auto const& header : mime_type_to_headers.value) {
  125. // Test in a non-specific sniffing context.
  126. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes()));
  127. EXPECT_EQ(mime_type, computed_mime_type.essence());
  128. // Test sniffing in a browsing context.
  129. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Browsing }));
  130. EXPECT_EQ(mime_type, computed_mime_type.essence());
  131. }
  132. }
  133. }
  134. TEST_CASE(compute_mime_type_given_unknown_supplied_type)
  135. {
  136. Array<Web::MimeSniff::MimeType, 3> unknown_supplied_types = {
  137. MUST(Web::MimeSniff::MimeType::create("unknown"_string, "unknown"_string)),
  138. MUST(Web::MimeSniff::MimeType::create("application"_string, "unknown"_string)),
  139. MUST(Web::MimeSniff::MimeType::create("*"_string, "*"_string))
  140. };
  141. auto header_bytes = "<HTML>"sv.bytes();
  142. for (auto const& unknown_supplied_type : unknown_supplied_types) {
  143. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header_bytes, Web::MimeSniff::SniffingConfiguration { .supplied_type = unknown_supplied_type }));
  144. EXPECT_EQ("text/html"sv, computed_mime_type.essence());
  145. }
  146. }
  147. TEST_CASE(determine_computed_mime_type_in_image_sniffing_context)
  148. {
  149. // Cover case where supplied type is an XML MIME type.
  150. auto mime_type = "application/rss+xml"sv;
  151. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(mime_type)).release_value();
  152. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image, .supplied_type = supplied_type }));
  153. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  154. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  155. set_image_type_mappings(mime_type_to_headers_map);
  156. // Also consider a resource that is not an image.
  157. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  158. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  159. mime_type = mime_type_to_headers.key;
  160. for (auto const& header : mime_type_to_headers.value) {
  161. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image }));
  162. EXPECT_EQ(mime_type, computed_mime_type.essence());
  163. }
  164. }
  165. // Cover case where we aren't dealing with an image MIME type.
  166. mime_type = "text/html"sv;
  167. supplied_type = MUST(Web::MimeSniff::MimeType::parse("text/html"sv)).release_value();
  168. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Image, .supplied_type = supplied_type }));
  169. EXPECT_EQ(mime_type, computed_mime_type.essence());
  170. }
  171. TEST_CASE(determine_computed_mime_type_in_audio_or_video_sniffing_context)
  172. {
  173. // Cover case where supplied type is an XML MIME type.
  174. auto mime_type = "application/rss+xml"sv;
  175. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(mime_type)).release_value();
  176. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  177. .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo,
  178. .supplied_type = supplied_type,
  179. }));
  180. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  181. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  182. set_audio_or_video_type_mappings(mime_type_to_headers_map);
  183. // Also consider a resource that is not an audio or video.
  184. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  185. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  186. auto mime_type = mime_type_to_headers.key;
  187. for (auto const& header : mime_type_to_headers.value) {
  188. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo }));
  189. EXPECT_EQ(mime_type, computed_mime_type.essence());
  190. }
  191. }
  192. // Cover case where we aren't dealing with an audio or video MIME type.
  193. mime_type = "text/html"sv;
  194. supplied_type = MUST(Web::MimeSniff::MimeType::parse("text/html"sv)).release_value();
  195. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  196. .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo,
  197. .supplied_type = supplied_type,
  198. }));
  199. EXPECT_EQ(mime_type, computed_mime_type.essence());
  200. }
  201. TEST_CASE(determine_computed_mime_type_when_trying_to_match_mp4_signature)
  202. {
  203. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  204. mime_type_to_headers_map.set("application/octet-stream"sv, {
  205. // Payload length < 12.
  206. "!= 12"sv,
  207. // Payload length < box size.
  208. "\x00\x00\x00\x1F\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A"sv,
  209. // Box size % 4 != 0.
  210. "\x00\x00\x00\x0D\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"sv,
  211. // 4 bytes after box size header != "ftyp".
  212. "\x00\x00\x00\x0C\x00\x00\x00\x00\x00\x00\x00\x00"sv,
  213. // Sequence "mp4" couldn't be found in ftyp box.
  214. "\x00\x00\x00\x18\x66\x74\x79\x70isom\x00\x00\x00\x00\x61\x76\x63\x31\x00\x00\x00\x00"sv,
  215. });
  216. mime_type_to_headers_map.set("video/mp4"sv, {
  217. // 3 bytes after "ftyp" sequence == "mp4".
  218. "\x00\x00\x00\x0C\x66\x74\x79\x70mp42"sv,
  219. // "mp4" sequence found while executing while loop (this input covers entire loop)
  220. "\x00\x00\x00\x18\x66\x74\x79\x70isom\x00\x00\x00\x00\x61\x76\x63\x31mp41"sv,
  221. });
  222. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  223. auto mime_type = mime_type_to_headers.key;
  224. for (auto const& header : mime_type_to_headers.value) {
  225. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::AudioOrVideo }));
  226. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  227. }
  228. }
  229. }
  230. TEST_CASE(determine_computed_mime_type_in_a_font_context)
  231. {
  232. // Cover case where supplied type is an XML MIME type.
  233. auto mime_type = "application/rss+xml"sv;
  234. auto supplied_type = MUST(Web::MimeSniff::MimeType::parse(mime_type)).release_value();
  235. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  236. .sniffing_context = Web::MimeSniff::SniffingContext::Font,
  237. .supplied_type = supplied_type,
  238. }));
  239. EXPECT_EQ(mime_type, MUST(computed_mime_type.serialized()));
  240. HashMap<StringView, Vector<StringView>> mime_type_to_headers_map;
  241. mime_type_to_headers_map.set("application/octet-stream"sv, { "\x00"sv });
  242. mime_type_to_headers_map.set("application/vnd.ms-fontobject"sv, { "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LP"sv });
  243. mime_type_to_headers_map.set("font/ttf"sv, { "\x00\x01\x00\x00"sv });
  244. mime_type_to_headers_map.set("font/otf"sv, { "OTTO"sv });
  245. mime_type_to_headers_map.set("font/collection"sv, { "ttcf"sv });
  246. mime_type_to_headers_map.set("font/woff"sv, { "wOFF"sv });
  247. mime_type_to_headers_map.set("font/woff2"sv, { "wOF2"sv });
  248. for (auto const& mime_type_to_headers : mime_type_to_headers_map) {
  249. auto mime_type = mime_type_to_headers.key;
  250. for (auto const& header : mime_type_to_headers.value) {
  251. auto computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(header.bytes(), Web::MimeSniff::SniffingConfiguration { .sniffing_context = Web::MimeSniff::SniffingContext::Font }));
  252. EXPECT_EQ(mime_type, computed_mime_type.essence());
  253. }
  254. }
  255. // Cover case where we aren't dealing with a font MIME type.
  256. mime_type = "text/html"sv;
  257. supplied_type = MUST(Web::MimeSniff::MimeType::parse("text/html"sv)).release_value();
  258. computed_mime_type = MUST(Web::MimeSniff::Resource::sniff(""sv.bytes(), Web::MimeSniff::SniffingConfiguration {
  259. .sniffing_context = Web::MimeSniff::SniffingContext::Font,
  260. .supplied_type = supplied_type,
  261. }));
  262. EXPECT_EQ(mime_type, computed_mime_type.essence());
  263. }