Blob.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. /*
  2. * Copyright (c) 2022-2024, Kenneth Myhra <kennethmyhra@serenityos.org>
  3. * Copyright (c) 2023, Shannon Booth <shannon@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/GenericLexer.h>
  8. #include <LibJS/Runtime/ArrayBuffer.h>
  9. #include <LibJS/Runtime/Completion.h>
  10. #include <LibJS/Runtime/TypedArray.h>
  11. #include <LibTextCodec/Decoder.h>
  12. #include <LibWeb/Bindings/BlobPrototype.h>
  13. #include <LibWeb/Bindings/ExceptionOrUtils.h>
  14. #include <LibWeb/Bindings/Intrinsics.h>
  15. #include <LibWeb/Bindings/PrincipalHostDefined.h>
  16. #include <LibWeb/FileAPI/Blob.h>
  17. #include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
  18. #include <LibWeb/HTML/StructuredSerialize.h>
  19. #include <LibWeb/Infra/Strings.h>
  20. #include <LibWeb/MimeSniff/MimeType.h>
  21. #include <LibWeb/Streams/AbstractOperations.h>
  22. #include <LibWeb/Streams/ReadableStreamDefaultReader.h>
  23. #include <LibWeb/WebIDL/AbstractOperations.h>
  24. #include <LibWeb/WebIDL/Buffers.h>
  25. namespace Web::FileAPI {
  26. GC_DEFINE_ALLOCATOR(Blob);
  27. GC::Ref<Blob> Blob::create(JS::Realm& realm, ByteBuffer byte_buffer, String type)
  28. {
  29. return realm.create<Blob>(realm, move(byte_buffer), move(type));
  30. }
  31. // https://w3c.github.io/FileAPI/#convert-line-endings-to-native
  32. ErrorOr<String> convert_line_endings_to_native(StringView string)
  33. {
  34. // 1. Let native line ending be be the code point U+000A LF.
  35. auto native_line_ending = "\n"sv;
  36. // 2. If the underlying platform’s conventions are to represent newlines as a carriage return and line feed sequence, set native line ending to the code point U+000D CR followed by the code point U+000A LF.
  37. // NOTE: this step is a no-op since LibWeb does not compile on Windows, which is the only platform we know of that that uses a carriage return and line feed sequence for line endings.
  38. // 3. Set result to the empty string.
  39. StringBuilder result;
  40. // 4. Let position be a position variable for s, initially pointing at the start of s.
  41. auto lexer = GenericLexer { string };
  42. // 5. Let token be the result of collecting a sequence of code points that are not equal to U+000A LF or U+000D CR from s given position.
  43. // 6. Append token to result.
  44. TRY(result.try_append(lexer.consume_until(is_any_of("\n\r"sv))));
  45. // 7. While position is not past the end of s:
  46. while (!lexer.is_eof()) {
  47. // 1. If the code point at position within s equals U+000D CR:
  48. if (lexer.peek() == '\r') {
  49. // 1. Append native line ending to result.
  50. TRY(result.try_append(native_line_ending));
  51. // 2. Advance position by 1.
  52. lexer.ignore(1);
  53. // 3. If position is not past the end of s and the code point at position within s equals U+000A LF advance position by 1.
  54. if (!lexer.is_eof() && lexer.peek() == '\n')
  55. lexer.ignore(1);
  56. }
  57. // 2. Otherwise if the code point at position within s equals U+000A LF, advance position by 1 and append native line ending to result.
  58. else if (lexer.peek() == '\n') {
  59. lexer.ignore(1);
  60. TRY(result.try_append(native_line_ending));
  61. }
  62. // 3. Let token be the result of collecting a sequence of code points that are not equal to U+000A LF or U+000D CR from s given position.
  63. // 4. Append token to result.
  64. TRY(result.try_append(lexer.consume_until(is_any_of("\n\r"sv))));
  65. }
  66. // 5. Return result.
  67. return result.to_string();
  68. }
  69. // https://w3c.github.io/FileAPI/#process-blob-parts
  70. ErrorOr<ByteBuffer> process_blob_parts(Vector<BlobPart> const& blob_parts, Optional<BlobPropertyBag> const& options)
  71. {
  72. // 1. Let bytes be an empty sequence of bytes.
  73. ByteBuffer bytes {};
  74. // 2. For each element in parts:
  75. for (auto const& blob_part : blob_parts) {
  76. TRY(blob_part.visit(
  77. // 1. If element is a USVString, run the following sub-steps:
  78. [&](String const& string) -> ErrorOr<void> {
  79. // 1. Let s be element.
  80. auto s = string;
  81. // 2. If the endings member of options is "native", set s to the result of converting line endings to native of element.
  82. if (options.has_value() && options->endings == Bindings::EndingType::Native)
  83. s = TRY(convert_line_endings_to_native(s));
  84. // NOTE: The AK::String is always UTF-8.
  85. // 3. Append the result of UTF-8 encoding s to bytes.
  86. return bytes.try_append(s.bytes());
  87. },
  88. // 2. If element is a BufferSource, get a copy of the bytes held by the buffer source, and append those bytes to bytes.
  89. [&](GC::Root<WebIDL::BufferSource> const& buffer_source) -> ErrorOr<void> {
  90. auto data_buffer = TRY(WebIDL::get_buffer_source_copy(*buffer_source->raw_object()));
  91. return bytes.try_append(data_buffer.bytes());
  92. },
  93. // 3. If element is a Blob, append the bytes it represents to bytes.
  94. [&](GC::Root<Blob> const& blob) -> ErrorOr<void> {
  95. return bytes.try_append(blob->raw_bytes());
  96. }));
  97. }
  98. // 3. Return bytes.
  99. return bytes;
  100. }
  101. bool is_basic_latin(StringView view)
  102. {
  103. for (auto code_point : view) {
  104. if (code_point < 0x0020 || code_point > 0x007E)
  105. return false;
  106. }
  107. return true;
  108. }
  109. Blob::Blob(JS::Realm& realm)
  110. : PlatformObject(realm)
  111. {
  112. }
  113. Blob::Blob(JS::Realm& realm, ByteBuffer byte_buffer, String type)
  114. : PlatformObject(realm)
  115. , m_byte_buffer(move(byte_buffer))
  116. , m_type(move(type))
  117. {
  118. }
  119. Blob::Blob(JS::Realm& realm, ByteBuffer byte_buffer)
  120. : PlatformObject(realm)
  121. , m_byte_buffer(move(byte_buffer))
  122. {
  123. }
  124. Blob::~Blob() = default;
  125. void Blob::initialize(JS::Realm& realm)
  126. {
  127. Base::initialize(realm);
  128. WEB_SET_PROTOTYPE_FOR_INTERFACE(Blob);
  129. }
  130. WebIDL::ExceptionOr<void> Blob::serialization_steps(HTML::SerializationRecord& record, bool, HTML::SerializationMemory&)
  131. {
  132. auto& vm = this->vm();
  133. // FIXME: 1. Set serialized.[[SnapshotState]] to value’s snapshot state.
  134. // NON-STANDARD: FileAPI spec doesn't specify that type should be serialized, although
  135. // to be conformant with other browsers this needs to be serialized.
  136. TRY(HTML::serialize_string(vm, record, m_type));
  137. // 2. Set serialized.[[ByteSequence]] to value’s underlying byte sequence.
  138. TRY(HTML::serialize_bytes(vm, record, m_byte_buffer.bytes()));
  139. return {};
  140. }
  141. WebIDL::ExceptionOr<void> Blob::deserialization_steps(ReadonlySpan<u32> const& record, size_t& position, HTML::DeserializationMemory&)
  142. {
  143. auto& vm = this->vm();
  144. // FIXME: 1. Set value’s snapshot state to serialized.[[SnapshotState]].
  145. // NON-STANDARD: FileAPI spec doesn't specify that type should be deserialized, although
  146. // to be conformant with other browsers this needs to be deserialized.
  147. m_type = TRY(HTML::deserialize_string(vm, record, position));
  148. // 2. Set value’s underlying byte sequence to serialized.[[ByteSequence]].
  149. m_byte_buffer = TRY(HTML::deserialize_bytes(vm, record, position));
  150. return {};
  151. }
  152. // https://w3c.github.io/FileAPI/#ref-for-dom-blob-blob
  153. GC::Ref<Blob> Blob::create(JS::Realm& realm, Optional<Vector<BlobPart>> const& blob_parts, Optional<BlobPropertyBag> const& options)
  154. {
  155. // 1. If invoked with zero parameters, return a new Blob object consisting of 0 bytes, with size set to 0, and with type set to the empty string.
  156. if (!blob_parts.has_value() && !options.has_value())
  157. return realm.create<Blob>(realm);
  158. ByteBuffer byte_buffer {};
  159. // 2. Let bytes be the result of processing blob parts given blobParts and options.
  160. if (blob_parts.has_value()) {
  161. byte_buffer = MUST(process_blob_parts(blob_parts.value(), options));
  162. }
  163. auto type = String {};
  164. // 3. If the type member of the options argument is not the empty string, run the following sub-steps:
  165. if (options.has_value() && !options->type.is_empty()) {
  166. // FIXME: 1. If the type member is provided and is not the empty string, let t be set to the type dictionary member.
  167. // If t contains any characters outside the range U+0020 to U+007E, then set t to the empty string and return from these substeps.
  168. // FIXME: 2. Convert every character in t to ASCII lowercase.
  169. // NOTE: The spec is out of date, and we are supposed to call into the MimeType parser here.
  170. if (!options->type.is_empty()) {
  171. auto maybe_parsed_type = Web::MimeSniff::MimeType::parse(options->type);
  172. if (maybe_parsed_type.has_value())
  173. type = maybe_parsed_type->serialized();
  174. }
  175. }
  176. // 4. Return a Blob object referring to bytes as its associated byte sequence, with its size set to the length of bytes, and its type set to the value of t from the substeps above.
  177. return realm.create<Blob>(realm, move(byte_buffer), move(type));
  178. }
  179. WebIDL::ExceptionOr<GC::Ref<Blob>> Blob::construct_impl(JS::Realm& realm, Optional<Vector<BlobPart>> const& blob_parts, Optional<BlobPropertyBag> const& options)
  180. {
  181. return Blob::create(realm, blob_parts, options);
  182. }
  183. // https://w3c.github.io/FileAPI/#dfn-slice
  184. WebIDL::ExceptionOr<GC::Ref<Blob>> Blob::slice(Optional<i64> start, Optional<i64> end, Optional<String> const& content_type)
  185. {
  186. // 1. Let sliceStart, sliceEnd, and sliceContentType be null.
  187. // 2. If start is given, set sliceStart to start.
  188. // 3. If end is given, set sliceEnd to end.
  189. // 3. If contentType is given, set sliceContentType to contentType.
  190. // 4. Return the result of slice blob given this, sliceStart, sliceEnd, and sliceContentType.
  191. return slice_blob(start, end, content_type);
  192. }
  193. // https://w3c.github.io/FileAPI/#slice-blob
  194. WebIDL::ExceptionOr<GC::Ref<Blob>> Blob::slice_blob(Optional<i64> start, Optional<i64> end, Optional<String> const& content_type)
  195. {
  196. auto& vm = realm().vm();
  197. // 1. Let originalSize be blob’s size.
  198. auto original_size = size();
  199. // 2. The start parameter, if non-null, is a value for the start point of a slice blob call, and must be treated as a byte-order position,
  200. // with the zeroth position representing the first byte. User agents must normalize start according to the following:
  201. i64 relative_start;
  202. if (!start.has_value()) {
  203. // a. If start is null, let relativeStart be 0.
  204. relative_start = 0;
  205. } else {
  206. auto start_value = start.value();
  207. // b. If start is negative, let relativeStart be max((originalSize + start), 0).
  208. if (start_value < 0) {
  209. relative_start = max((static_cast<i64>(original_size) + start_value), 0);
  210. }
  211. // c. Otherwise, let relativeStart be min(start, originalSize).
  212. else {
  213. relative_start = min(start_value, original_size);
  214. }
  215. }
  216. // 3. The end parameter, if non-null. is a value for the end point of a slice blob call. User agents must normalize end according to the following:
  217. i64 relative_end;
  218. if (!end.has_value()) {
  219. // a. If end is null, let relativeEnd be originalSize.
  220. relative_end = original_size;
  221. } else {
  222. auto end_value = end.value();
  223. // b. If end is negative, let relativeEnd be max((originalSize + end), 0).
  224. if (end_value < 0) {
  225. relative_end = max((static_cast<i64>(original_size) + end_value), 0);
  226. }
  227. // c. Otherwise, let relativeEnd be min(end, originalSize).
  228. else {
  229. relative_end = min(end_value, original_size);
  230. }
  231. }
  232. // 4. The contentType parameter, if non-null, is used to set the ASCII-encoded string in lower case representing the media type of the Blob.
  233. // User agents must normalize contentType according to the following:
  234. String relative_content_type;
  235. if (!content_type.has_value()) {
  236. // a. If contentType is null, let relativeContentType be set to the empty string.
  237. relative_content_type = {};
  238. } else {
  239. // b. Otherwise, let relativeContentType be set to contentType and run the substeps below:
  240. // 1. If relativeContentType contains any characters outside the range of U+0020 to U+007E, then set relativeContentType to the empty string
  241. // and return from these substeps:
  242. if (!is_basic_latin(content_type.value())) {
  243. relative_content_type = {};
  244. }
  245. // 2. Convert every character in relativeContentType to ASCII lowercase.
  246. else {
  247. relative_content_type = content_type.value().to_ascii_lowercase();
  248. }
  249. }
  250. // 5. Let span be max((relativeEnd - relativeStart), 0).
  251. auto span = max((relative_end - relative_start), 0);
  252. // 6. Return a new Blob object S with the following characteristics:
  253. // a. S refers to span consecutive bytes from blob’s associated byte sequence, beginning with the byte at byte-order position relativeStart.
  254. // b. S.size = span.
  255. // c. S.type = relativeContentType.
  256. auto byte_buffer = TRY_OR_THROW_OOM(vm, m_byte_buffer.slice(relative_start, span));
  257. return realm().create<Blob>(realm(), move(byte_buffer), move(relative_content_type));
  258. }
  259. // https://w3c.github.io/FileAPI/#dom-blob-stream
  260. GC::Ref<Streams::ReadableStream> Blob::stream()
  261. {
  262. // The stream() method, when invoked, must return the result of calling get stream on this.
  263. return get_stream();
  264. }
  265. // https://w3c.github.io/FileAPI/#blob-get-stream
  266. GC::Ref<Streams::ReadableStream> Blob::get_stream()
  267. {
  268. auto& realm = this->realm();
  269. // 1. Let stream be a new ReadableStream created in blob’s relevant Realm.
  270. auto stream = realm.create<Streams::ReadableStream>(realm);
  271. // 2. Set up stream with byte reading support.
  272. set_up_readable_stream_controller_with_byte_reading_support(stream);
  273. // FIXME: 3. Run the following steps in parallel:
  274. {
  275. // 1. While not all bytes of blob have been read:
  276. // NOTE: for simplicity the chunk is the entire buffer for now.
  277. {
  278. // 1. Let bytes be the byte sequence that results from reading a chunk from blob, or failure if a chunk cannot be read.
  279. auto bytes = m_byte_buffer;
  280. // 2. Queue a global task on the file reading task source given blob’s relevant global object to perform the following steps:
  281. HTML::queue_global_task(HTML::Task::Source::FileReading, realm.global_object(), GC::create_function(heap(), [stream, bytes = move(bytes)]() {
  282. auto& realm = stream->realm();
  283. HTML::TemporaryExecutionContext const execution_context { realm, HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
  284. // 1. If bytes is failure, then error stream with a failure reason and abort these steps.
  285. // 2. Let chunk be a new Uint8Array wrapping an ArrayBuffer containing bytes. If creating the ArrayBuffer throws an exception, then error stream with that exception and abort these steps.
  286. auto array_buffer = JS::ArrayBuffer::create(realm, bytes);
  287. auto chunk = JS::Uint8Array::create(realm, bytes.size(), *array_buffer);
  288. // 3. Enqueue chunk in stream.
  289. auto maybe_error = Bindings::throw_dom_exception_if_needed(realm.vm(), [&]() {
  290. return readable_stream_enqueue(*stream->controller(), chunk);
  291. });
  292. if (maybe_error.is_error()) {
  293. readable_stream_error(*stream, maybe_error.release_error().value().value());
  294. return;
  295. }
  296. // FIXME: Close the stream now that we have finished enqueuing all chunks to the stream. Without this, ReadableStream.read will never resolve the second time around with 'done' set.
  297. // Nowhere in the spec seems to mention this - but testing against other implementations the stream does appear to be closed after reading all data (closed callback is fired).
  298. // Probably there is a better way of doing this.
  299. readable_stream_close(*stream);
  300. }));
  301. }
  302. }
  303. // 4. Return stream.
  304. return stream;
  305. }
  306. // https://w3c.github.io/FileAPI/#dom-blob-text
  307. GC::Ref<WebIDL::Promise> Blob::text()
  308. {
  309. auto& realm = this->realm();
  310. auto& vm = realm.vm();
  311. // 1. Let stream be the result of calling get stream on this.
  312. auto stream = get_stream();
  313. // 2. Let reader be the result of getting a reader from stream. If that threw an exception, return a new promise rejected with that exception.
  314. auto reader_or_exception = acquire_readable_stream_default_reader(*stream);
  315. if (reader_or_exception.is_exception())
  316. return WebIDL::create_rejected_promise_from_exception(realm, reader_or_exception.release_error());
  317. auto reader = reader_or_exception.release_value();
  318. // 3. Let promise be the result of reading all bytes from stream with reader
  319. auto promise = reader->read_all_bytes_deprecated();
  320. // 4. Return the result of transforming promise by a fulfillment handler that returns the result of running UTF-8 decode on its first argument.
  321. return WebIDL::upon_fulfillment(*promise, GC::create_function(heap(), [&vm](JS::Value first_argument) -> WebIDL::ExceptionOr<JS::Value> {
  322. auto const& object = first_argument.as_object();
  323. VERIFY(is<JS::ArrayBuffer>(object));
  324. auto const& buffer = static_cast<const JS::ArrayBuffer&>(object).buffer();
  325. auto decoder = TextCodec::decoder_for("UTF-8"sv);
  326. auto utf8_text = TRY_OR_THROW_OOM(vm, TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, buffer));
  327. return JS::PrimitiveString::create(vm, move(utf8_text));
  328. }));
  329. }
  330. // https://w3c.github.io/FileAPI/#dom-blob-arraybuffer
  331. GC::Ref<WebIDL::Promise> Blob::array_buffer()
  332. {
  333. auto& realm = this->realm();
  334. // 1. Let stream be the result of calling get stream on this.
  335. auto stream = get_stream();
  336. // 2. Let reader be the result of getting a reader from stream. If that threw an exception, return a new promise rejected with that exception.
  337. auto reader_or_exception = acquire_readable_stream_default_reader(*stream);
  338. if (reader_or_exception.is_exception())
  339. return WebIDL::create_rejected_promise_from_exception(realm, reader_or_exception.release_error());
  340. auto reader = reader_or_exception.release_value();
  341. // 3. Let promise be the result of reading all bytes from stream with reader.
  342. auto promise = reader->read_all_bytes_deprecated();
  343. // 4. Return the result of transforming promise by a fulfillment handler that returns a new ArrayBuffer whose contents are its first argument.
  344. return WebIDL::upon_fulfillment(*promise, GC::create_function(heap(), [&realm](JS::Value first_argument) -> WebIDL::ExceptionOr<JS::Value> {
  345. auto const& object = first_argument.as_object();
  346. VERIFY(is<JS::ArrayBuffer>(object));
  347. auto const& buffer = static_cast<const JS::ArrayBuffer&>(object).buffer();
  348. return JS::ArrayBuffer::create(realm, buffer);
  349. }));
  350. }
  351. // https://w3c.github.io/FileAPI/#dom-blob-bytes
  352. GC::Ref<WebIDL::Promise> Blob::bytes()
  353. {
  354. auto& realm = this->realm();
  355. // 1. Let stream be the result of calling get stream on this.
  356. auto stream = get_stream();
  357. // 2. Let reader be the result of getting a reader from stream. If that threw an exception, return a new promise rejected with that exception.
  358. auto reader_or_exception = acquire_readable_stream_default_reader(*stream);
  359. if (reader_or_exception.is_exception())
  360. return WebIDL::create_rejected_promise_from_exception(realm, reader_or_exception.release_error());
  361. auto reader = reader_or_exception.release_value();
  362. // 3. Let promise be the result of reading all bytes from stream with reader.
  363. auto promise = reader->read_all_bytes_deprecated();
  364. // 4. Return the result of transforming promise by a fulfillment handler that returns a new Uint8Array wrapping an ArrayBuffer containing its first argument.
  365. return WebIDL::upon_fulfillment(*promise, GC::create_function(heap(), [&realm](JS::Value first_argument) -> WebIDL::ExceptionOr<JS::Value> {
  366. auto& object = first_argument.as_object();
  367. VERIFY(is<JS::ArrayBuffer>(object));
  368. auto& array_buffer = static_cast<JS::ArrayBuffer&>(object);
  369. return JS::Uint8Array::create(realm, array_buffer.byte_length(), array_buffer);
  370. }));
  371. }
  372. }