Body.cpp 23 KB


  1. /*
  2. * Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
  3. * Copyright (c) 2024, Jamie Mansfield <jmansfield@cadixdev.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/GenericLexer.h>
  8. #include <AK/TypeCasts.h>
  9. #include <LibJS/Runtime/ArrayBuffer.h>
  10. #include <LibJS/Runtime/Completion.h>
  11. #include <LibJS/Runtime/Error.h>
  12. #include <LibJS/Runtime/PromiseCapability.h>
  13. #include <LibJS/Runtime/TypedArray.h>
  14. #include <LibTextCodec/Decoder.h>
  15. #include <LibWeb/Bindings/ExceptionOrUtils.h>
  16. #include <LibWeb/Bindings/MainThreadVM.h>
  17. #include <LibWeb/DOMURL/URLSearchParams.h>
  18. #include <LibWeb/Fetch/Body.h>
  19. #include <LibWeb/Fetch/Infrastructure/HTTP.h>
  20. #include <LibWeb/Fetch/Infrastructure/HTTP/Bodies.h>
  21. #include <LibWeb/FileAPI/Blob.h>
  22. #include <LibWeb/FileAPI/File.h>
  23. #include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
  24. #include <LibWeb/Infra/JSON.h>
  25. #include <LibWeb/Infra/Strings.h>
  26. #include <LibWeb/MimeSniff/MimeType.h>
  27. #include <LibWeb/Streams/ReadableStream.h>
  28. #include <LibWeb/WebIDL/Promise.h>
  29. #include <LibWeb/XHR/FormData.h>
  30. namespace Web::Fetch {
  31. BodyMixin::~BodyMixin() = default;
  32. // https://fetch.spec.whatwg.org/#body-unusable
  33. bool BodyMixin::is_unusable() const
  34. {
  35. // An object including the Body interface mixin is said to be unusable if its body is non-null and its body’s stream is disturbed or locked.
  36. auto const& body = body_impl();
  37. return body && (body->stream()->is_disturbed() || body->stream()->is_locked());
  38. }
  39. // https://fetch.spec.whatwg.org/#dom-body-body
  40. GC::Ptr<Streams::ReadableStream> BodyMixin::body() const
  41. {
  42. // The body getter steps are to return null if this’s body is null; otherwise this’s body’s stream.
  43. auto const& body = body_impl();
  44. return body ? body->stream().ptr() : nullptr;
  45. }
  46. // https://fetch.spec.whatwg.org/#dom-body-bodyused
  47. bool BodyMixin::body_used() const
  48. {
  49. // The bodyUsed getter steps are to return true if this’s body is non-null and this’s body’s stream is disturbed; otherwise false.
  50. auto const& body = body_impl();
  51. return body && body->stream()->is_disturbed();
  52. }
  53. // https://fetch.spec.whatwg.org/#dom-body-arraybuffer
  54. WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::array_buffer() const
  55. {
  56. auto& vm = Bindings::main_thread_vm();
  57. auto& realm = *vm.current_realm();
  58. // The arrayBuffer() method steps are to return the result of running consume body with this and ArrayBuffer.
  59. return consume_body(realm, *this, PackageDataType::ArrayBuffer);
  60. }
  61. // https://fetch.spec.whatwg.org/#dom-body-blob
  62. WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::blob() const
  63. {
  64. auto& vm = Bindings::main_thread_vm();
  65. auto& realm = *vm.current_realm();
  66. // The blob() method steps are to return the result of running consume body with this and Blob.
  67. return consume_body(realm, *this, PackageDataType::Blob);
  68. }
  69. // https://fetch.spec.whatwg.org/#dom-body-bytes
  70. WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::bytes() const
  71. {
  72. auto& vm = Bindings::main_thread_vm();
  73. auto& realm = *vm.current_realm();
  74. // The bytes() method steps are to return the result of running consume body with this and Uint8Array.
  75. return consume_body(realm, *this, PackageDataType::Uint8Array);
  76. }
  77. // https://fetch.spec.whatwg.org/#dom-body-formdata
  78. WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::form_data() const
  79. {
  80. auto& vm = Bindings::main_thread_vm();
  81. auto& realm = *vm.current_realm();
  82. // The formData() method steps are to return the result of running consume body with this and FormData.
  83. return consume_body(realm, *this, PackageDataType::FormData);
  84. }
  85. // https://fetch.spec.whatwg.org/#dom-body-json
  86. WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::json() const
  87. {
  88. auto& vm = Bindings::main_thread_vm();
  89. auto& realm = *vm.current_realm();
  90. // The json() method steps are to return the result of running consume body with this and JSON.
  91. return consume_body(realm, *this, PackageDataType::JSON);
  92. }
  93. // https://fetch.spec.whatwg.org/#dom-body-text
  94. WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::text() const
  95. {
  96. auto& vm = Bindings::main_thread_vm();
  97. auto& realm = *vm.current_realm();
  98. // The text() method steps are to return the result of running consume body with this and text.
  99. return consume_body(realm, *this, PackageDataType::Text);
  100. }
  101. // https://fetch.spec.whatwg.org/#concept-body-package-data
  102. WebIDL::ExceptionOr<JS::Value> package_data(JS::Realm& realm, ByteBuffer bytes, PackageDataType type, Optional<MimeSniff::MimeType> const& mime_type)
  103. {
  104. auto& vm = realm.vm();
  105. switch (type) {
  106. case PackageDataType::ArrayBuffer:
  107. // Return a new ArrayBuffer whose contents are bytes.
  108. return JS::ArrayBuffer::create(realm, move(bytes));
  109. case PackageDataType::Blob: {
  110. // Return a Blob whose contents are bytes and type attribute is mimeType.
  111. // NOTE: If extracting the mime type returns failure, other browsers set it to an empty string - not sure if that's spec'd.
  112. auto mime_type_string = mime_type.has_value() ? mime_type->serialized() : String {};
  113. return FileAPI::Blob::create(realm, move(bytes), move(mime_type_string));
  114. }
  115. case PackageDataType::Uint8Array: {
  116. // Return the result of creating a Uint8Array from bytes in this’s relevant realm.
  117. auto bytes_length = bytes.size();
  118. auto array_buffer = JS::ArrayBuffer::create(realm, move(bytes));
  119. return JS::Uint8Array::create(realm, bytes_length, *array_buffer);
  120. }
  121. case PackageDataType::FormData:
  122. // If mimeType’s essence is "multipart/form-data", then:
  123. if (mime_type.has_value() && mime_type->essence() == "multipart/form-data"sv) {
  124. // 1. Parse bytes, using the value of the `boundary` parameter from mimeType, per the rules set forth in Returning Values from Forms: multipart/form-data. [RFC7578]
  125. auto error_or_entry_list = parse_multipart_form_data(realm, bytes, mime_type.value());
  126. // 2. If that fails for some reason, then throw a TypeError.
  127. if (error_or_entry_list.is_error())
  128. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Failed to parse multipart form data: {}", error_or_entry_list.release_error().message)) };
  129. // 3. Return a new FormData object, appending each entry, resulting from the parsing operation, to its entry list.
  130. return TRY(XHR::FormData::create(realm, error_or_entry_list.release_value()));
  131. }
  132. // Otherwise, if mimeType’s essence is "application/x-www-form-urlencoded", then:
  133. else if (mime_type.has_value() && mime_type->essence() == "application/x-www-form-urlencoded"sv) {
  134. // 1. Let entries be the result of parsing bytes.
  135. auto entries = DOMURL::url_decode(StringView { bytes });
  136. // 2. If entries is failure, then throw a TypeError.
  137. // FIXME: Spec bug? It doesn't seem possible to throw an error here.
  138. // 3. Return a new FormData object whose entry list is entries.
  139. return TRY(XHR::FormData::create(realm, entries));
  140. }
  141. // Otherwise, throw a TypeError.
  142. else {
  143. return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Mime type must be 'multipart/form-data' or 'application/x-www-form-urlencoded'"sv };
  144. }
  145. case PackageDataType::JSON:
  146. // Return the result of running parse JSON from bytes on bytes.
  147. return Infra::parse_json_bytes_to_javascript_value(realm, bytes);
  148. case PackageDataType::Text: {
  149. // Return the result of running UTF-8 decode on bytes.
  150. auto decoder = TextCodec::decoder_for("UTF-8"sv);
  151. VERIFY(decoder.has_value());
  152. auto utf8_text = MUST(TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, bytes));
  153. return JS::PrimitiveString::create(vm, move(utf8_text));
  154. }
  155. default:
  156. VERIFY_NOT_REACHED();
  157. }
  158. }
  159. // https://fetch.spec.whatwg.org/#concept-body-consume-body
  160. WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> consume_body(JS::Realm& realm, BodyMixin const& object, PackageDataType type)
  161. {
  162. // 1. If object is unusable, then return a promise rejected with a TypeError.
  163. if (object.is_unusable()) {
  164. WebIDL::SimpleException exception { WebIDL::SimpleExceptionType::TypeError, "Body is unusable"sv };
  165. return WebIDL::create_rejected_promise_from_exception(realm, move(exception));
  166. }
  167. // 2. Let promise be a new promise.
  168. auto promise = WebIDL::create_promise(realm);
  169. // 3. Let errorSteps given error be to reject promise with error.
  170. // NOTE: `promise` and `realm` is protected by GC::HeapFunction.
  171. auto error_steps = GC::create_function(realm.heap(), [promise, &realm](JS::Value error) {
  172. // AD-HOC: An execution context is required for Promise's reject function.
  173. HTML::TemporaryExecutionContext execution_context { realm };
  174. WebIDL::reject_promise(realm, promise, error);
  175. });
  176. // 4. Let successSteps given a byte sequence data be to resolve promise with the result of running convertBytesToJSValue
  177. // with data. If that threw an exception, then run errorSteps with that exception.
  178. // NOTE: `promise`, `realm` and `object` is protected by GC::HeapFunction.
  179. // FIXME: Refactor this to the new version of the spec introduced with https://github.com/whatwg/fetch/commit/464326e8eb6a602122c030cd40042480a3c0e265
  180. auto success_steps = GC::create_function(realm.heap(), [promise, &realm, &object, type](ByteBuffer data) {
  181. auto& vm = realm.vm();
  182. // AD-HOC: An execution context is required for Promise's reject function and JSON.parse.
  183. HTML::TemporaryExecutionContext execution_context { realm };
  184. auto value_or_error = Bindings::throw_dom_exception_if_needed(vm, [&]() -> WebIDL::ExceptionOr<JS::Value> {
  185. return package_data(realm, data, type, object.mime_type_impl());
  186. });
  187. if (value_or_error.is_error()) {
  188. // We can't call error_steps here without moving it into success_steps, causing a double move when we pause error_steps
  189. // to fully_read, so just reject the promise like error_steps does.
  190. WebIDL::reject_promise(realm, promise, value_or_error.release_error().value().value());
  191. return;
  192. }
  193. WebIDL::resolve_promise(realm, promise, value_or_error.release_value());
  194. });
  195. // 5. If object’s body is null, then run successSteps with an empty byte sequence.
  196. auto const& body = object.body_impl();
  197. if (!body) {
  198. success_steps->function()(ByteBuffer {});
  199. }
  200. // 6. Otherwise, fully read object’s body given successSteps, errorSteps, and object’s relevant global object.
  201. else {
  202. body->fully_read(realm, success_steps, error_steps, GC::Ref { HTML::relevant_global_object(object.as_platform_object()) });
  203. }
  204. // 7. Return promise.
  205. return promise;
  206. }
  207. // https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
  208. static MultipartParsingErrorOr<String> parse_multipart_form_data_name(GenericLexer& lexer)
  209. {
  210. // 1. Assert: The byte at (position - 1) is 0x22 (").
  211. VERIFY(lexer.peek(-1) == '"');
  212. // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
  213. auto name = lexer.consume_until(is_any_of("\n\r\""sv));
  214. // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
  215. if (!lexer.consume_specific('"'))
  216. return MultipartParsingError { MUST(String::formatted("Expected \" at position {}", lexer.tell())) };
  217. // 4. Replace any occurrence of the following subsequences in name with the given byte:
  218. // - "%0A" with 0x0A (LF)
  219. // - "%0D" with 0x0D (CR)
  220. // - "%22" with 0x22 (")
  221. StringBuilder builder;
  222. for (size_t i = 0; i < name.length(); ++i) {
  223. // Check for subsequences starting with '%'
  224. if (name[i] == '%' && i + 2 < name.length()) {
  225. auto subsequence = name.substring_view(i, 3);
  226. if (subsequence == "%0A"sv) {
  227. builder.append(0x0A); // Append LF
  228. i += 2; // Skip the next two characters
  229. continue;
  230. }
  231. if (subsequence == "%0D"sv) {
  232. builder.append(0x0D); // Append CR
  233. i += 2; // Skip the next two characters
  234. continue;
  235. }
  236. if (subsequence == "%22"sv) {
  237. builder.append(0x22); // Append "
  238. i += 2; // Skip the next two characters
  239. continue;
  240. }
  241. }
  242. // Append the current character if no substitution was made
  243. builder.append(name[i]);
  244. }
  245. return builder.to_string_without_validation();
  246. }
  247. // https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
  248. static MultipartParsingErrorOr<MultiPartFormDataHeader> parse_multipart_form_data_header(GenericLexer& lexer)
  249. {
  250. // 1. Let name, filename and contentType be null.
  251. MultiPartFormDataHeader header;
  252. // 2. While true:
  253. while (true) {
  254. // 1. If position points to a sequence of bytes starting with 0x0D 0x0A (CR LF):
  255. if (lexer.next_is("\r\n"sv)) {
  256. // 1. If name is null, return failure.
  257. if (!header.name.has_value())
  258. return MultipartParsingError { "Missing name parameter in Content-Disposition header"_string };
  259. // 2. Return name, filename and contentType.
  260. return header;
  261. }
  262. // 2. Let header name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x3A (:), given position.
  263. auto header_name = lexer.consume_until(is_any_of("\n\r:"sv));
  264. // 3. Remove any HTTP tab or space bytes from the start or end of header name.
  265. header_name = header_name.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Both);
  266. // 4. If header name does not match the field-name token production, return failure.
  267. if (!Infrastructure::is_header_name(header_name.bytes()))
  268. return MultipartParsingError { MUST(String::formatted("Invalid header name {}", header_name)) };
  269. // 5. If the byte at position is not 0x3A (:), return failure.
  270. // 6. Advance position by 1.
  271. if (!lexer.consume_specific(':'))
  272. return MultipartParsingError { MUST(String::formatted("Expected : at position {}", lexer.tell())) };
  273. // 7. Collect a sequence of bytes that are HTTP tab or space bytes given position. (Do nothing with those bytes.)
  274. lexer.ignore_while(Infrastructure::is_http_tab_or_space);
  275. // 8. Byte-lowercase header name and switch on the result:
  276. // -> `content-disposition`
  277. if (header_name.equals_ignoring_ascii_case("content-disposition"sv)) {
  278. // 1. Set name and filename to null.
  279. header.name.clear();
  280. header.filename.clear();
  281. // 2. If position does not point to a sequence of bytes starting with `form-data; name="`, return failure.
  282. // 3. Advance position so it points at the byte after the next 0x22 (") byte (the one in the sequence of bytes matched above).
  283. if (!lexer.consume_specific("form-data; name=\""sv))
  284. return MultipartParsingError { MUST(String::formatted("Expected `form-data; name=\"` at position {}", lexer.tell())) };
  285. // 4. Set name to the result of parsing a multipart/form-data name given input and position, if the result is not failure. Otherwise, return failure.
  286. auto maybe_name = parse_multipart_form_data_name(lexer);
  287. if (maybe_name.is_error())
  288. return maybe_name.release_error();
  289. header.name = maybe_name.release_value();
  290. // 5. If position points to a sequence of bytes starting with `; filename="`:
  291. // 1. Advance position so it points at the byte after the next 0x22 (") byte (the one in the sequence of bytes matched above).
  292. if (lexer.consume_specific("; filename=\""sv)) {
  293. // 2. Set filename to the result of parsing a multipart/form-data name given input and position, if the result is not failure. Otherwise, return failure.
  294. auto maybe_filename = parse_multipart_form_data_name(lexer);
  295. if (maybe_filename.is_error())
  296. return maybe_filename.release_error();
  297. header.filename = maybe_filename.release_value();
  298. }
  299. }
  300. // -> `content-type`
  301. else if (header_name.equals_ignoring_ascii_case("content-type"sv)) {
  302. // 1. Let header value be the result of collecting a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position.
  303. auto header_value = lexer.consume_until(Infrastructure::is_http_newline);
  304. // 2. Remove any HTTP tab or space bytes from the end of header value.
  305. header_value = header_value.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Right);
  306. // 3. Set contentType to the isomorphic decoding of header value.
  307. header.content_type = Infra::isomorphic_decode(header_value.bytes());
  308. }
  309. // -> Otherwise
  310. else {
  311. // 1. Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position. (Do nothing with those bytes.)
  312. lexer.ignore_until(Infrastructure::is_http_newline);
  313. }
  314. // 9. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
  315. if (!lexer.consume_specific("\r\n"sv))
  316. return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
  317. }
  318. return header;
  319. }
  320. // https://andreubotella.github.io/multipart-form-data/#multipart-form-data-parser
  321. MultipartParsingErrorOr<Vector<XHR::FormDataEntry>> parse_multipart_form_data(JS::Realm& realm, StringView input, MimeSniff::MimeType const& mime_type)
  322. {
  323. // 1. Assert: mimeType’s essence is "multipart/form-data".
  324. VERIFY(mime_type.essence() == "multipart/form-data"sv);
  325. // 2. If mimeType’s parameters["boundary"] does not exist, return failure. Otherwise, let boundary be the result of UTF-8 decoding mimeType’s parameters["boundary"].
  326. auto maybe_boundary = mime_type.parameters().get("boundary"sv);
  327. if (!maybe_boundary.has_value())
  328. return MultipartParsingError { "Missing boundary parameter in Content-Type header"_string };
  329. auto boundary = maybe_boundary.release_value();
  330. // 3. Let entry list be an empty entry list.
  331. Vector<XHR::FormDataEntry> entry_list;
  332. // 4. Let position be a pointer to a byte in input, initially pointing at the first byte.
  333. GenericLexer lexer(input);
  334. auto boundary_with_dashes = MUST(String::formatted("--{}", boundary));
  335. // 5. While true:
  336. while (true) {
  337. // 1. If position points to a sequence of bytes starting with 0x2D 0x2D (`--`) followed by boundary, advance position by 2 + the length of boundary. Otherwise, return failure.
  338. if (!lexer.consume_specific(boundary_with_dashes))
  339. return MultipartParsingError { MUST(String::formatted("Expected `--` followed by boundary at position {}", lexer.tell())) };
  340. // 2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A (`--` followed by CR LF) followed by the end of input, return entry list.
  341. if (lexer.next_is("--\r\n"sv))
  342. return entry_list;
  343. // 3. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure.
  344. // 4. Advance position by 2. (This skips past the newline.)
  345. if (!lexer.consume_specific("\r\n"sv))
  346. return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
  347. // 5. Let name, filename and contentType be the result of parsing multipart/form-data headers on input and position, if the result is not failure. Otherwise, return failure.
  348. auto header = TRY(parse_multipart_form_data_header(lexer));
  349. // 6. Advance position by 2. (This skips past the empty line that marks the end of the headers.)
  350. lexer.ignore(2);
  351. // 7. Let body be the empty byte sequence.
  352. // 8. Body loop: While position is not past the end of input:
  353. // 1. Append the code point at position to body.
  354. // 2. If body ends with boundary:
  355. // 1. Remove the last 4 + (length of boundary) bytes from body.
  356. // 2. Decrease position by 4 + (length of boundary).
  357. // 3. Break out of body loop.
  358. auto body = lexer.consume_until(boundary_with_dashes.bytes_as_string_view());
  359. if (lexer.next_is(boundary_with_dashes.bytes_as_string_view())) {
  360. constexpr size_t trailing_crlf_length = 2;
  361. if (body.length() >= trailing_crlf_length) {
  362. body = body.substring_view(0, body.length() - trailing_crlf_length);
  363. lexer.retreat(trailing_crlf_length);
  364. }
  365. }
  366. // 9. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
  367. if (!lexer.consume_specific("\r\n"sv))
  368. return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
  369. // 10. If filename is not null:
  370. Optional<XHR::FormDataEntryValue> value;
  371. if (header.filename.has_value()) {
  372. // 1. If contentType is null, set contentType to "text/plain".
  373. if (!header.content_type.has_value())
  374. header.content_type = "text/plain"_string;
  375. // 2. If contentType is not an ASCII string, set contentType to the empty string.
  376. if (!all_of(header.content_type->code_points(), is_ascii)) {
  377. header.content_type = ""_string;
  378. }
  379. // 3. Let value be a new File object with name filename, type contentType, and body body.
  380. auto blob = FileAPI::Blob::create(realm, MUST(ByteBuffer::copy(body.bytes())), header.content_type.release_value());
  381. FileAPI::FilePropertyBag options {};
  382. options.type = blob->type();
  383. auto file = MUST(FileAPI::File::create(realm, { GC::make_root(blob) }, header.filename.release_value(), move(options)));
  384. value = GC::make_root(file);
  385. }
  386. // 11. Otherwise:
  387. else {
  388. // 1. Let value be the UTF-8 decoding without BOM of body.
  389. value = String::from_utf8_with_replacement_character(body, String::WithBOMHandling::No);
  390. }
  391. // 12. Assert: name is a scalar value string and value is either a scalar value string or a File object.
  392. VERIFY(header.name.has_value() && value.has_value());
  393. // 13. Create an entry with name and value, and append it to entry list.
  394. entry_list.empend(header.name.release_value(), value.release_value());
  395. }
  396. }
  397. }