123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471 |
- /*
- * Copyright (c) 2022-2023, Linus Groh <linusg@serenityos.org>
- * Copyright (c) 2024, Jamie Mansfield <jmansfield@cadixdev.org>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <AK/GenericLexer.h>
- #include <AK/TypeCasts.h>
- #include <LibJS/Runtime/ArrayBuffer.h>
- #include <LibJS/Runtime/Completion.h>
- #include <LibJS/Runtime/Error.h>
- #include <LibJS/Runtime/PromiseCapability.h>
- #include <LibJS/Runtime/TypedArray.h>
- #include <LibTextCodec/Decoder.h>
- #include <LibWeb/Bindings/ExceptionOrUtils.h>
- #include <LibWeb/Bindings/MainThreadVM.h>
- #include <LibWeb/DOMURL/URLSearchParams.h>
- #include <LibWeb/Fetch/Body.h>
- #include <LibWeb/Fetch/Infrastructure/HTTP.h>
- #include <LibWeb/Fetch/Infrastructure/HTTP/Bodies.h>
- #include <LibWeb/FileAPI/Blob.h>
- #include <LibWeb/FileAPI/File.h>
- #include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
- #include <LibWeb/Infra/JSON.h>
- #include <LibWeb/Infra/Strings.h>
- #include <LibWeb/MimeSniff/MimeType.h>
- #include <LibWeb/Streams/ReadableStream.h>
- #include <LibWeb/WebIDL/Promise.h>
- #include <LibWeb/XHR/FormData.h>
- namespace Web::Fetch {
- BodyMixin::~BodyMixin() = default;
- // https://fetch.spec.whatwg.org/#body-unusable
- bool BodyMixin::is_unusable() const
- {
- // An object including the Body interface mixin is said to be unusable if its body is non-null and its body’s stream is disturbed or locked.
- auto const& body = body_impl();
- return body && (body->stream()->is_disturbed() || body->stream()->is_locked());
- }
- // https://fetch.spec.whatwg.org/#dom-body-body
- GC::Ptr<Streams::ReadableStream> BodyMixin::body() const
- {
- // The body getter steps are to return null if this’s body is null; otherwise this’s body’s stream.
- auto const& body = body_impl();
- return body ? body->stream().ptr() : nullptr;
- }
- // https://fetch.spec.whatwg.org/#dom-body-bodyused
- bool BodyMixin::body_used() const
- {
- // The bodyUsed getter steps are to return true if this’s body is non-null and this’s body’s stream is disturbed; otherwise false.
- auto const& body = body_impl();
- return body && body->stream()->is_disturbed();
- }
- // https://fetch.spec.whatwg.org/#dom-body-arraybuffer
- WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::array_buffer() const
- {
- auto& vm = Bindings::main_thread_vm();
- auto& realm = *vm.current_realm();
- // The arrayBuffer() method steps are to return the result of running consume body with this and ArrayBuffer.
- return consume_body(realm, *this, PackageDataType::ArrayBuffer);
- }
- // https://fetch.spec.whatwg.org/#dom-body-blob
- WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::blob() const
- {
- auto& vm = Bindings::main_thread_vm();
- auto& realm = *vm.current_realm();
- // The blob() method steps are to return the result of running consume body with this and Blob.
- return consume_body(realm, *this, PackageDataType::Blob);
- }
- // https://fetch.spec.whatwg.org/#dom-body-bytes
- WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::bytes() const
- {
- auto& vm = Bindings::main_thread_vm();
- auto& realm = *vm.current_realm();
- // The bytes() method steps are to return the result of running consume body with this and Uint8Array.
- return consume_body(realm, *this, PackageDataType::Uint8Array);
- }
- // https://fetch.spec.whatwg.org/#dom-body-formdata
- WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::form_data() const
- {
- auto& vm = Bindings::main_thread_vm();
- auto& realm = *vm.current_realm();
- // The formData() method steps are to return the result of running consume body with this and FormData.
- return consume_body(realm, *this, PackageDataType::FormData);
- }
- // https://fetch.spec.whatwg.org/#dom-body-json
- WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::json() const
- {
- auto& vm = Bindings::main_thread_vm();
- auto& realm = *vm.current_realm();
- // The json() method steps are to return the result of running consume body with this and JSON.
- return consume_body(realm, *this, PackageDataType::JSON);
- }
- // https://fetch.spec.whatwg.org/#dom-body-text
- WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> BodyMixin::text() const
- {
- auto& vm = Bindings::main_thread_vm();
- auto& realm = *vm.current_realm();
- // The text() method steps are to return the result of running consume body with this and text.
- return consume_body(realm, *this, PackageDataType::Text);
- }
- // https://fetch.spec.whatwg.org/#concept-body-package-data
- WebIDL::ExceptionOr<JS::Value> package_data(JS::Realm& realm, ByteBuffer bytes, PackageDataType type, Optional<MimeSniff::MimeType> const& mime_type)
- {
- auto& vm = realm.vm();
- switch (type) {
- case PackageDataType::ArrayBuffer:
- // Return a new ArrayBuffer whose contents are bytes.
- return JS::ArrayBuffer::create(realm, move(bytes));
- case PackageDataType::Blob: {
- // Return a Blob whose contents are bytes and type attribute is mimeType.
- // NOTE: If extracting the mime type returns failure, other browsers set it to an empty string - not sure if that's spec'd.
- auto mime_type_string = mime_type.has_value() ? mime_type->serialized() : String {};
- return FileAPI::Blob::create(realm, move(bytes), move(mime_type_string));
- }
- case PackageDataType::Uint8Array: {
- // Return the result of creating a Uint8Array from bytes in this’s relevant realm.
- auto bytes_length = bytes.size();
- auto array_buffer = JS::ArrayBuffer::create(realm, move(bytes));
- return JS::Uint8Array::create(realm, bytes_length, *array_buffer);
- }
- case PackageDataType::FormData:
- // If mimeType’s essence is "multipart/form-data", then:
- if (mime_type.has_value() && mime_type->essence() == "multipart/form-data"sv) {
- // 1. Parse bytes, using the value of the `boundary` parameter from mimeType, per the rules set forth in Returning Values from Forms: multipart/form-data. [RFC7578]
- auto error_or_entry_list = parse_multipart_form_data(realm, bytes, mime_type.value());
- // 2. If that fails for some reason, then throw a TypeError.
- if (error_or_entry_list.is_error())
- return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Failed to parse multipart form data: {}", error_or_entry_list.release_error().message)) };
- // 3. Return a new FormData object, appending each entry, resulting from the parsing operation, to its entry list.
- return TRY(XHR::FormData::create(realm, error_or_entry_list.release_value()));
- }
- // Otherwise, if mimeType’s essence is "application/x-www-form-urlencoded", then:
- else if (mime_type.has_value() && mime_type->essence() == "application/x-www-form-urlencoded"sv) {
- // 1. Let entries be the result of parsing bytes.
- auto entries = DOMURL::url_decode(StringView { bytes });
- // 2. If entries is failure, then throw a TypeError.
- // FIXME: Spec bug? It doesn't seem possible to throw an error here.
- // 3. Return a new FormData object whose entry list is entries.
- return TRY(XHR::FormData::create(realm, entries));
- }
- // Otherwise, throw a TypeError.
- else {
- return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Mime type must be 'multipart/form-data' or 'application/x-www-form-urlencoded'"sv };
- }
- case PackageDataType::JSON:
- // Return the result of running parse JSON from bytes on bytes.
- return Infra::parse_json_bytes_to_javascript_value(realm, bytes);
- case PackageDataType::Text: {
- // Return the result of running UTF-8 decode on bytes.
- auto decoder = TextCodec::decoder_for("UTF-8"sv);
- VERIFY(decoder.has_value());
- auto utf8_text = MUST(TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, bytes));
- return JS::PrimitiveString::create(vm, move(utf8_text));
- }
- default:
- VERIFY_NOT_REACHED();
- }
- }
- // https://fetch.spec.whatwg.org/#concept-body-consume-body
- WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> consume_body(JS::Realm& realm, BodyMixin const& object, PackageDataType type)
- {
- // 1. If object is unusable, then return a promise rejected with a TypeError.
- if (object.is_unusable()) {
- WebIDL::SimpleException exception { WebIDL::SimpleExceptionType::TypeError, "Body is unusable"sv };
- return WebIDL::create_rejected_promise_from_exception(realm, move(exception));
- }
- // 2. Let promise be a new promise.
- auto promise = WebIDL::create_promise(realm);
- // 3. Let errorSteps given error be to reject promise with error.
- // NOTE: `promise` and `realm` is protected by GC::HeapFunction.
- auto error_steps = GC::create_function(realm.heap(), [promise, &realm](JS::Value error) {
- // AD-HOC: An execution context is required for Promise's reject function.
- HTML::TemporaryExecutionContext execution_context { realm };
- WebIDL::reject_promise(realm, promise, error);
- });
- // 4. Let successSteps given a byte sequence data be to resolve promise with the result of running convertBytesToJSValue
- // with data. If that threw an exception, then run errorSteps with that exception.
- // NOTE: `promise`, `realm` and `object` is protected by GC::HeapFunction.
- // FIXME: Refactor this to the new version of the spec introduced with https://github.com/whatwg/fetch/commit/464326e8eb6a602122c030cd40042480a3c0e265
- auto success_steps = GC::create_function(realm.heap(), [promise, &realm, &object, type](ByteBuffer data) {
- auto& vm = realm.vm();
- // AD-HOC: An execution context is required for Promise's reject function and JSON.parse.
- HTML::TemporaryExecutionContext execution_context { realm };
- auto value_or_error = Bindings::throw_dom_exception_if_needed(vm, [&]() -> WebIDL::ExceptionOr<JS::Value> {
- return package_data(realm, data, type, object.mime_type_impl());
- });
- if (value_or_error.is_error()) {
- // We can't call error_steps here without moving it into success_steps, causing a double move when we pause error_steps
- // to fully_read, so just reject the promise like error_steps does.
- WebIDL::reject_promise(realm, promise, value_or_error.release_error().value().value());
- return;
- }
- WebIDL::resolve_promise(realm, promise, value_or_error.release_value());
- });
- // 5. If object’s body is null, then run successSteps with an empty byte sequence.
- auto const& body = object.body_impl();
- if (!body) {
- success_steps->function()(ByteBuffer {});
- }
- // 6. Otherwise, fully read object’s body given successSteps, errorSteps, and object’s relevant global object.
- else {
- body->fully_read(realm, success_steps, error_steps, GC::Ref { HTML::relevant_global_object(object.as_platform_object()) });
- }
- // 7. Return promise.
- return promise;
- }
- // https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
- static MultipartParsingErrorOr<String> parse_multipart_form_data_name(GenericLexer& lexer)
- {
- // 1. Assert: The byte at (position - 1) is 0x22 (").
- VERIFY(lexer.peek(-1) == '"');
- // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
- auto name = lexer.consume_until(is_any_of("\n\r\""sv));
- // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
- if (!lexer.consume_specific('"'))
- return MultipartParsingError { MUST(String::formatted("Expected \" at position {}", lexer.tell())) };
- // 4. Replace any occurrence of the following subsequences in name with the given byte:
- // - "%0A" with 0x0A (LF)
- // - "%0D" with 0x0D (CR)
- // - "%22" with 0x22 (")
- StringBuilder builder;
- for (size_t i = 0; i < name.length(); ++i) {
- // Check for subsequences starting with '%'
- if (name[i] == '%' && i + 2 < name.length()) {
- auto subsequence = name.substring_view(i, 3);
- if (subsequence == "%0A"sv) {
- builder.append(0x0A); // Append LF
- i += 2; // Skip the next two characters
- continue;
- }
- if (subsequence == "%0D"sv) {
- builder.append(0x0D); // Append CR
- i += 2; // Skip the next two characters
- continue;
- }
- if (subsequence == "%22"sv) {
- builder.append(0x22); // Append "
- i += 2; // Skip the next two characters
- continue;
- }
- }
- // Append the current character if no substitution was made
- builder.append(name[i]);
- }
- return builder.to_string_without_validation();
- }
- // https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
- static MultipartParsingErrorOr<MultiPartFormDataHeader> parse_multipart_form_data_header(GenericLexer& lexer)
- {
- // 1. Let name, filename and contentType be null.
- MultiPartFormDataHeader header;
- // 2. While true:
- while (true) {
- // 1. If position points to a sequence of bytes starting with 0x0D 0x0A (CR LF):
- if (lexer.next_is("\r\n"sv)) {
- // 1. If name is null, return failure.
- if (!header.name.has_value())
- return MultipartParsingError { "Missing name parameter in Content-Disposition header"_string };
- // 2. Return name, filename and contentType.
- return header;
- }
- // 2. Let header name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x3A (:), given position.
- auto header_name = lexer.consume_until(is_any_of("\n\r:"sv));
- // 3. Remove any HTTP tab or space bytes from the start or end of header name.
- header_name = header_name.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Both);
- // 4. If header name does not match the field-name token production, return failure.
- if (!Infrastructure::is_header_name(header_name.bytes()))
- return MultipartParsingError { MUST(String::formatted("Invalid header name {}", header_name)) };
- // 5. If the byte at position is not 0x3A (:), return failure.
- // 6. Advance position by 1.
- if (!lexer.consume_specific(':'))
- return MultipartParsingError { MUST(String::formatted("Expected : at position {}", lexer.tell())) };
- // 7. Collect a sequence of bytes that are HTTP tab or space bytes given position. (Do nothing with those bytes.)
- lexer.ignore_while(Infrastructure::is_http_tab_or_space);
- // 8. Byte-lowercase header name and switch on the result:
- // -> `content-disposition`
- if (header_name.equals_ignoring_ascii_case("content-disposition"sv)) {
- // 1. Set name and filename to null.
- header.name.clear();
- header.filename.clear();
- // 2. If position does not point to a sequence of bytes starting with `form-data; name="`, return failure.
- // 3. Advance position so it points at the byte after the next 0x22 (") byte (the one in the sequence of bytes matched above).
- if (!lexer.consume_specific("form-data; name=\""sv))
- return MultipartParsingError { MUST(String::formatted("Expected `form-data; name=\"` at position {}", lexer.tell())) };
- // 4. Set name to the result of parsing a multipart/form-data name given input and position, if the result is not failure. Otherwise, return failure.
- auto maybe_name = parse_multipart_form_data_name(lexer);
- if (maybe_name.is_error())
- return maybe_name.release_error();
- header.name = maybe_name.release_value();
- // 5. If position points to a sequence of bytes starting with `; filename="`:
- // 1. Advance position so it points at the byte after the next 0x22 (") byte (the one in the sequence of bytes matched above).
- if (lexer.consume_specific("; filename=\""sv)) {
- // 2. Set filename to the result of parsing a multipart/form-data name given input and position, if the result is not failure. Otherwise, return failure.
- auto maybe_filename = parse_multipart_form_data_name(lexer);
- if (maybe_filename.is_error())
- return maybe_filename.release_error();
- header.filename = maybe_filename.release_value();
- }
- }
- // -> `content-type`
- else if (header_name.equals_ignoring_ascii_case("content-type"sv)) {
- // 1. Let header value be the result of collecting a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position.
- auto header_value = lexer.consume_until(Infrastructure::is_http_newline);
- // 2. Remove any HTTP tab or space bytes from the end of header value.
- header_value = header_value.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Right);
- // 3. Set contentType to the isomorphic decoding of header value.
- header.content_type = Infra::isomorphic_decode(header_value.bytes());
- }
- // -> Otherwise
- else {
- // 1. Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position. (Do nothing with those bytes.)
- lexer.ignore_until(Infrastructure::is_http_newline);
- }
- // 9. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
- if (!lexer.consume_specific("\r\n"sv))
- return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
- }
- return header;
- }
- // https://andreubotella.github.io/multipart-form-data/#multipart-form-data-parser
- MultipartParsingErrorOr<Vector<XHR::FormDataEntry>> parse_multipart_form_data(JS::Realm& realm, StringView input, MimeSniff::MimeType const& mime_type)
- {
- // 1. Assert: mimeType’s essence is "multipart/form-data".
- VERIFY(mime_type.essence() == "multipart/form-data"sv);
- // 2. If mimeType’s parameters["boundary"] does not exist, return failure. Otherwise, let boundary be the result of UTF-8 decoding mimeType’s parameters["boundary"].
- auto maybe_boundary = mime_type.parameters().get("boundary"sv);
- if (!maybe_boundary.has_value())
- return MultipartParsingError { "Missing boundary parameter in Content-Type header"_string };
- auto boundary = maybe_boundary.release_value();
- // 3. Let entry list be an empty entry list.
- Vector<XHR::FormDataEntry> entry_list;
- // 4. Let position be a pointer to a byte in input, initially pointing at the first byte.
- GenericLexer lexer(input);
- auto boundary_with_dashes = MUST(String::formatted("--{}", boundary));
- // 5. While true:
- while (true) {
- // 1. If position points to a sequence of bytes starting with 0x2D 0x2D (`--`) followed by boundary, advance position by 2 + the length of boundary. Otherwise, return failure.
- if (!lexer.consume_specific(boundary_with_dashes))
- return MultipartParsingError { MUST(String::formatted("Expected `--` followed by boundary at position {}", lexer.tell())) };
- // 2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A (`--` followed by CR LF) followed by the end of input, return entry list.
- if (lexer.next_is("--\r\n"sv))
- return entry_list;
- // 3. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure.
- // 4. Advance position by 2. (This skips past the newline.)
- if (!lexer.consume_specific("\r\n"sv))
- return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
- // 5. Let name, filename and contentType be the result of parsing multipart/form-data headers on input and position, if the result is not failure. Otherwise, return failure.
- auto header = TRY(parse_multipart_form_data_header(lexer));
- // 6. Advance position by 2. (This skips past the empty line that marks the end of the headers.)
- lexer.ignore(2);
- // 7. Let body be the empty byte sequence.
- // 8. Body loop: While position is not past the end of input:
- // 1. Append the code point at position to body.
- // 2. If body ends with boundary:
- // 1. Remove the last 4 + (length of boundary) bytes from body.
- // 2. Decrease position by 4 + (length of boundary).
- // 3. Break out of body loop.
- auto body = lexer.consume_until(boundary_with_dashes.bytes_as_string_view());
- if (lexer.next_is(boundary_with_dashes.bytes_as_string_view())) {
- constexpr size_t trailing_crlf_length = 2;
- if (body.length() >= trailing_crlf_length) {
- body = body.substring_view(0, body.length() - trailing_crlf_length);
- lexer.retreat(trailing_crlf_length);
- }
- }
- // 9. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
- if (!lexer.consume_specific("\r\n"sv))
- return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
- // 10. If filename is not null:
- Optional<XHR::FormDataEntryValue> value;
- if (header.filename.has_value()) {
- // 1. If contentType is null, set contentType to "text/plain".
- if (!header.content_type.has_value())
- header.content_type = "text/plain"_string;
- // 2. If contentType is not an ASCII string, set contentType to the empty string.
- if (!all_of(header.content_type->code_points(), is_ascii)) {
- header.content_type = ""_string;
- }
- // 3. Let value be a new File object with name filename, type contentType, and body body.
- auto blob = FileAPI::Blob::create(realm, MUST(ByteBuffer::copy(body.bytes())), header.content_type.release_value());
- FileAPI::FilePropertyBag options {};
- options.type = blob->type();
- auto file = MUST(FileAPI::File::create(realm, { GC::make_root(blob) }, header.filename.release_value(), move(options)));
- value = GC::make_root(file);
- }
- // 11. Otherwise:
- else {
- // 1. Let value be the UTF-8 decoding without BOM of body.
- value = String::from_utf8_with_replacement_character(body, String::WithBOMHandling::No);
- }
- // 12. Assert: name is a scalar value string and value is either a scalar value string or a File object.
- VERIFY(header.name.has_value() && value.has_value());
- // 13. Create an entry with name and value, and append it to entry list.
- entry_list.empend(header.name.release_value(), value.release_value());
- }
- }
- }
|