Resource.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. /*
  2. * Copyright (c) 2023, Kemal Zebari <kemalzebra@gmail.com>.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibWeb/Fetch/Infrastructure/URL.h>
  7. #include <LibWeb/MimeSniff/Resource.h>
  8. namespace Web::MimeSniff {
  9. ErrorOr<Resource> Resource::create(ReadonlyBytes data, SniffingConfiguration configuration)
  10. {
  11. // NOTE: Non-standard but for cases where pattern matching fails, let's fall back to the safest MIME type.
  12. auto default_computed_mime_type = TRY(MimeType::create("application"_string, "octet-stream"_string));
  13. auto resource = Resource { data, configuration.no_sniff, move(default_computed_mime_type) };
  14. TRY(resource.supplied_mime_type_detection_algorithm(configuration.scheme, move(configuration.supplied_type)));
  15. TRY(resource.mime_type_sniffing_algorithm());
  16. return resource;
  17. }
  18. ErrorOr<MimeType> Resource::sniff(ReadonlyBytes data, SniffingConfiguration configuration)
  19. {
  20. auto resource = TRY(create(data, move(configuration)));
  21. return move(resource.m_computed_mime_type);
  22. }
  23. Resource::Resource(ReadonlyBytes data, bool no_sniff, MimeType&& default_computed_mime_type)
  24. : m_no_sniff(no_sniff)
  25. , m_computed_mime_type(move(default_computed_mime_type))
  26. {
  27. read_the_resource_header(data);
  28. }
  29. Resource::~Resource() = default;
  30. // https://mimesniff.spec.whatwg.org/#supplied-mime-type-detection-algorithm
  31. // NOTE: Parameters are non-standard.
  32. ErrorOr<void> Resource::supplied_mime_type_detection_algorithm(StringView scheme, Optional<MimeType> supplied_type)
  33. {
  34. // 1. Let supplied-type be null.
  35. // 2. If the resource is retrieved via HTTP, execute the following steps:
  36. // 1. If one or more Content-Type headers are associated with the resource, execute the following steps:
  37. // 1. Set supplied-type to the value of the last Content-Type header associated with the resource.
  38. // 2. Set the check-for-apache-bug flag if supplied-type is exactly equal to one of the values in the following table:
  39. // NOTE: Non-standard but this algorithm expects the caller to handle step 2.1.1.
  40. if (supplied_type.has_value()) {
  41. if (Fetch::Infrastructure::is_http_or_https_scheme(scheme)) {
  42. static Array<StringView, 4> constexpr apache_bug_mime_types = {
  43. "text/plain"sv,
  44. "text/plain; charset=ISO-8859-1"sv,
  45. "text/plain; charset=iso-8859-1"sv,
  46. "text/plain; charset=UTF-8"sv
  47. };
  48. auto serialized_supplied_type = TRY(supplied_type->serialized());
  49. for (auto apache_bug_mime_type : apache_bug_mime_types) {
  50. if (serialized_supplied_type == apache_bug_mime_type) {
  51. m_check_for_apache_bug_flag = true;
  52. break;
  53. }
  54. }
  55. }
  56. }
  57. // 3. If the resource is retrieved directly from the file system, set supplied-type
  58. // to the MIME type provided by the file system.
  59. // 4. If the resource is retrieved via another protocol (such as FTP), set
  60. // supplied-type to the MIME type as determined by that protocol, if any.
  61. // 5. If supplied-type is not a MIME type, the supplied MIME type is undefined.
  62. // Abort these steps.
  63. // 6. The supplied MIME type is supplied-type.
  64. // NOTE: The expectation is for the caller to handle these spec steps.
  65. m_supplied_mime_type = supplied_type;
  66. return {};
  67. }
  68. // https://mimesniff.spec.whatwg.org/#read-the-resource-header
  69. void Resource::read_the_resource_header(ReadonlyBytes data)
  70. {
  71. // 1. Let buffer be a byte sequence.
  72. ByteBuffer buffer;
  73. // 2. Read bytes of the resource into buffer until one of the following conditions is met:
  74. // - the end of the resource is reached.
  75. // - the number of bytes in buffer is greater than or equal to 1445.
  76. // - a reasonable amount of time has elapsed, as determined by the user agent.
  77. // FIXME: The spec expects us to be reading from a stream. Reimplement this spec step once
  78. // we have greater support for streaming in areas that calls on this API.
  79. static size_t constexpr MAX_SNIFF_SIZE = 1445;
  80. buffer.append(data.slice(0, min(data.size(), MAX_SNIFF_SIZE)));
  81. // 3. The resource header is buffer.
  82. m_resource_header = move(buffer);
  83. }
  84. // https://mimesniff.spec.whatwg.org/#mime-type-sniffing-algorithm
  85. ErrorOr<void> Resource::mime_type_sniffing_algorithm()
  86. {
  87. // 1. If the supplied MIME type is undefined or if the supplied MIME type’s essence
  88. // is "unknown/unknown", "application/unknown", or "*/*", execute the rules for
  89. // identifying an unknown MIME type with the sniff-scriptable flag equal to the
  90. // inverse of the no-sniff flag and abort these steps.
  91. if (!m_supplied_mime_type.has_value() || m_supplied_mime_type->essence().is_one_of("unknown/unknown", "application/unknown", "*/*")) {
  92. // FIXME: Execute the rules for identifying an unknown MIME type with the
  93. // sniff-scriptable flag equal to the inverse of the no-sniff flag and abort
  94. // these steps.
  95. return {};
  96. }
  97. // 2. If the no-sniff flag is set, the computed MIME type is the supplied MIME type.
  98. // Abort these steps.
  99. if (m_no_sniff) {
  100. m_computed_mime_type = m_supplied_mime_type.value();
  101. return {};
  102. }
  103. // 3. If the check-for-apache-bug flag is set, execute the rules for distinguishing
  104. // if a resource is text or binary and abort these steps.
  105. if (m_check_for_apache_bug_flag) {
  106. // FIXME: Execute the rules for distinguishing if a resource is text or binary and abort these steps.
  107. return {};
  108. }
  109. // 4. If the supplied MIME type is an XML MIME type, the computed MIME type is the supplied MIME type.
  110. // Abort these steps.
  111. if (m_supplied_mime_type->is_xml()) {
  112. m_computed_mime_type = m_supplied_mime_type.value();
  113. return {};
  114. }
  115. // 5. If the supplied MIME type’s essence is "text/html", execute the rules for distinguishing if a
  116. // resource is a feed or HTML and abort these steps.
  117. if (m_supplied_mime_type->essence() == "text/html") {
  118. // FIXME: Execute the rules for distinguishing if a resource is a feed or HTML and abort these steps.
  119. return {};
  120. }
  121. // FIXME: 6. If the supplied MIME type is an image MIME type supported by the user agent, let matched-type be
  122. // the result of executing the image type pattern matching algorithm with the resource header as
  123. // the byte sequence to be matched.
  124. Optional<MimeType> matched_type;
  125. // 7. If matched-type is not undefined, the computed MIME type is matched-type.
  126. // Abort these steps.
  127. if (matched_type.has_value()) {
  128. m_computed_mime_type = matched_type.release_value();
  129. return {};
  130. }
  131. // FIXME: 8. If the supplied MIME type is an audio or video MIME type supported by the user agent, let matched-type be
  132. // the result of executing the audio or video type pattern matching algorithm with the resource header as
  133. // the byte sequence to be matched.
  134. // 9. If matched-type is not undefined, the computed MIME type is matched-type.
  135. // Abort these steps.
  136. if (matched_type.has_value()) {
  137. m_computed_mime_type = matched_type.release_value();
  138. return {};
  139. }
  140. // 10. The computed MIME type is the supplied MIME type.
  141. m_computed_mime_type = m_supplied_mime_type.value();
  142. return {};
  143. }
  144. }