DocumentLoading.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2023, Aliaksandr Kalenik <kalenik.aliaksandr@gmail.com>
  4. * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #include <AK/Debug.h>
  9. #include <AK/LexicalPath.h>
  10. #include <LibGemini/Document.h>
  11. #include <LibGfx/ImageFormats/ImageDecoder.h>
  12. #include <LibMarkdown/Document.h>
  13. #include <LibTextCodec/Decoder.h>
  14. #include <LibWeb/DOM/Document.h>
  15. #include <LibWeb/DOM/DocumentLoading.h>
  16. #include <LibWeb/HTML/Navigable.h>
  17. #include <LibWeb/HTML/NavigationParams.h>
  18. #include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
  19. #include <LibWeb/HTML/Parser/HTMLParser.h>
  20. #include <LibWeb/Loader/GeneratedPagesLoader.h>
  21. #include <LibWeb/Namespace.h>
  22. #include <LibWeb/Platform/ImageCodecPlugin.h>
  23. #include <LibWeb/XML/XMLDocumentBuilder.h>
  24. namespace Web {
  25. static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data)
  26. {
  27. auto markdown_document = Markdown::Document::parse(data);
  28. if (!markdown_document)
  29. return false;
  30. auto extra_head_contents = R"~~~(
  31. <style>
  32. .zoomable {
  33. cursor: zoom-in;
  34. max-width: 100%;
  35. }
  36. .zoomable.zoomed-in {
  37. cursor: zoom-out;
  38. max-width: none;
  39. }
  40. </style>
  41. <script>
  42. function imageClickEventListener(event) {
  43. let image = event.target;
  44. if (image.classList.contains("zoomable")) {
  45. image.classList.toggle("zoomed-in");
  46. }
  47. }
  48. function processImages() {
  49. let images = document.querySelectorAll("img");
  50. let windowWidth = window.innerWidth;
  51. images.forEach((image) => {
  52. if (image.naturalWidth > windowWidth) {
  53. image.classList.add("zoomable");
  54. } else {
  55. image.classList.remove("zoomable");
  56. image.classList.remove("zoomed-in");
  57. }
  58. image.addEventListener("click", imageClickEventListener);
  59. });
  60. }
  61. document.addEventListener("load", () => {
  62. processImages();
  63. });
  64. window.addEventListener("resize", () => {
  65. processImages();
  66. });
  67. </script>
  68. )~~~"sv;
  69. auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8");
  70. parser->run(document.url());
  71. return true;
  72. }
  73. static bool build_text_document(DOM::Document& document, ByteBuffer const& data)
  74. {
  75. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  76. MUST(document.append_child(html_element));
  77. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  78. MUST(html_element->append_child(head_element));
  79. auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  80. MUST(head_element->append_child(title_element));
  81. auto title_text = document.create_text_node(MUST(String::from_byte_string(document.url().basename())));
  82. MUST(title_element->append_child(title_text));
  83. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  84. MUST(html_element->append_child(body_element));
  85. auto pre_element = DOM::create_element(document, HTML::TagNames::pre, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  86. MUST(body_element->append_child(pre_element));
  87. MUST(pre_element->append_child(document.create_text_node(String::from_utf8(StringView { data }).release_value_but_fixme_should_propagate_errors())));
  88. return true;
  89. }
  90. static bool build_image_document(DOM::Document& document, ByteBuffer const& data)
  91. {
  92. auto image = Platform::ImageCodecPlugin::the().decode_image(data);
  93. if (!image.has_value() || image->frames.is_empty())
  94. return false;
  95. auto const& frame = image->frames[0];
  96. auto const& bitmap = frame.bitmap;
  97. if (!bitmap)
  98. return false;
  99. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  100. MUST(document.append_child(html_element));
  101. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  102. MUST(html_element->append_child(head_element));
  103. auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  104. MUST(head_element->append_child(title_element));
  105. auto basename = LexicalPath::basename(document.url().serialize_path());
  106. auto title_text = document.heap().allocate<DOM::Text>(document.realm(), document, MUST(String::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height())));
  107. MUST(title_element->append_child(*title_text));
  108. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  109. MUST(html_element->append_child(body_element));
  110. auto image_element = DOM::create_element(document, HTML::TagNames::img, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  111. MUST(image_element->set_attribute(HTML::AttributeNames::src, MUST(document.url().to_string())));
  112. MUST(body_element->append_child(image_element));
  113. return true;
  114. }
  115. static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data)
  116. {
  117. StringView gemini_data { data };
  118. auto gemini_document = Gemini::Document::parse(gemini_data, document.url());
  119. ByteString html_data = gemini_document->render_to_html();
  120. dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data);
  121. dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data);
  122. auto parser = HTML::HTMLParser::create(document, html_data, "utf-8");
  123. parser->run(document.url());
  124. return true;
  125. }
  126. bool build_xml_document(DOM::Document& document, ByteBuffer const& data, Optional<String> content_encoding)
  127. {
  128. Optional<TextCodec::Decoder&> decoder;
  129. // The actual HTTP headers and other metadata, not the headers as mutated or implied by the algorithms given in this specification,
  130. // are the ones that must be used when determining the character encoding according to the rules given in the above specifications.
  131. if (content_encoding.has_value())
  132. decoder = TextCodec::decoder_for(*content_encoding);
  133. if (!decoder.has_value()) {
  134. auto encoding = HTML::run_encoding_sniffing_algorithm(document, data);
  135. decoder = TextCodec::decoder_for(encoding);
  136. }
  137. VERIFY(decoder.has_value());
  138. // Well-formed XML documents contain only properly encoded characters
  139. if (!decoder->validate(data))
  140. return false;
  141. auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors();
  142. XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource });
  143. XMLDocumentBuilder builder { document };
  144. auto result = parser.parse_with_listener(builder);
  145. return !result.is_error() && !builder.has_error();
  146. }
  147. static bool build_video_document(DOM::Document& document)
  148. {
  149. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  150. MUST(document.append_child(html_element));
  151. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  152. MUST(html_element->append_child(head_element));
  153. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  154. MUST(html_element->append_child(body_element));
  155. auto video_element = DOM::create_element(document, HTML::TagNames::video, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  156. MUST(video_element->set_attribute(HTML::AttributeNames::src, MUST(document.url().to_string())));
  157. MUST(video_element->set_attribute(HTML::AttributeNames::autoplay, String {}));
  158. MUST(video_element->set_attribute(HTML::AttributeNames::controls, String {}));
  159. MUST(body_element->append_child(video_element));
  160. return true;
  161. }
  162. static bool build_audio_document(DOM::Document& document)
  163. {
  164. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  165. MUST(document.append_child(html_element));
  166. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  167. MUST(html_element->append_child(head_element));
  168. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  169. MUST(html_element->append_child(body_element));
  170. auto video_element = DOM::create_element(document, HTML::TagNames::audio, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  171. MUST(video_element->set_attribute(HTML::AttributeNames::src, MUST(document.url().to_string())));
  172. MUST(video_element->set_attribute(HTML::AttributeNames::autoplay, String {}));
  173. MUST(video_element->set_attribute(HTML::AttributeNames::controls, String {}));
  174. MUST(body_element->append_child(video_element));
  175. return true;
  176. }
  177. bool parse_document(DOM::Document& document, ByteBuffer const& data, Optional<String> content_encoding)
  178. {
  179. auto& mime_type = document.content_type();
  180. if (mime_type == "text/html") {
  181. auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data);
  182. parser->run(document.url());
  183. return true;
  184. }
  185. if (mime_type.ends_with_bytes("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
  186. return build_xml_document(document, data, move(content_encoding));
  187. if (mime_type.starts_with_bytes("image/"sv))
  188. return build_image_document(document, data);
  189. if (mime_type.starts_with_bytes("video/"sv))
  190. return build_video_document(document);
  191. if (mime_type.starts_with_bytes("audio/"sv))
  192. return build_audio_document(document);
  193. if (mime_type == "text/plain" || mime_type == "application/json")
  194. return build_text_document(document, data);
  195. if (mime_type == "text/markdown")
  196. return build_markdown_document(document, data);
  197. if (mime_type == "text/gemini")
  198. return build_gemini_document(document, data);
  199. return false;
  200. }
  201. static bool is_supported_document_mime_type(StringView mime_type)
  202. {
  203. if (mime_type == "text/html")
  204. return true;
  205. if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
  206. return true;
  207. if (mime_type.starts_with("image/"sv))
  208. return true;
  209. if (mime_type.starts_with("video/"sv))
  210. return true;
  211. if (mime_type.starts_with("audio/"sv))
  212. return true;
  213. if (mime_type == "text/plain" || mime_type == "application/json")
  214. return true;
  215. if (mime_type == "text/markdown")
  216. return true;
  217. if (mime_type == "text/gemini")
  218. return true;
  219. return false;
  220. }
  221. // https://html.spec.whatwg.org/multipage/browsing-the-web.html#loading-a-document
  222. JS::GCPtr<DOM::Document> load_document(HTML::NavigationParams navigation_params)
  223. {
  224. // To load a document given navigation params navigationParams, source snapshot params sourceSnapshotParams,
  225. // and origin initiatorOrigin, perform the following steps. They return a Document or null.
  226. // 1. Let type be the computed type of navigationParams's response.
  227. auto extracted_mime_type = navigation_params.response->header_list()->extract_mime_type().release_value_but_fixme_should_propagate_errors();
  228. if (!extracted_mime_type.has_value())
  229. return nullptr;
  230. auto type = extracted_mime_type.release_value();
  231. VERIFY(navigation_params.response->body());
  232. // 2. If the user agent has been configured to process resources of the given type using some mechanism other than
  233. // rendering the content in a navigable, then skip this step.
  234. // Otherwise, if the type is one of the following types:
  235. // -> an HTML MIME type
  236. if (type.is_html()) {
  237. // FIXME: Return the result of loading an HTML document, given navigationParams.
  238. }
  239. // -> an XML MIME type that is not an explicitly supported XML MIME type
  240. // FIXME: that is not an explicitly supported XML MIME type
  241. if (type.is_xml()) {
  242. // FIXME: Return the result of loading an XML document given navigationParams and type.
  243. }
  244. // -> a JavaScript MIME type
  245. // -> a JSON MIME type that is not an explicitly supported JSON MIME type
  246. // -> "text/css"
  247. // -> "text/plain"
  248. // -> "text/vtt"
  249. if (type.is_javascript()
  250. || type.is_json()
  251. || type.essence() == "text/css"_string
  252. || type.essence() == "text/plain"_string
  253. || type.essence() == "text/vtt"_string) {
  254. // FIXME: Return the result of loading a text document given navigationParams and type.
  255. }
  256. // -> "multipart/x-mixed-replace"
  257. if (type.essence() == "multipart/x-mixed-replace"_string) {
  258. // FIXME: Return the result of loading a multipart/x-mixed-replace document, given navigationParams,
  259. // sourceSnapshotParams, and initiatorOrigin.
  260. }
  261. // -> A supported image, video, or audio type
  262. if (type.is_image()
  263. || type.is_audio_or_video()) {
  264. // FIXME: Return the result of loading a media document given navigationParams and type.
  265. }
  266. // -> "application/pdf"
  267. // -> "text/pdf"
  268. if (type.essence() == "application/pdf"_string
  269. || type.essence() == "text/pdf"_string) {
  270. // FIXME: If the user agent's PDF viewer supported is true, return the result of creating a document for inline
  271. // content that doesn't have a DOM given navigationParams's navigable.
  272. }
  273. // Otherwise, proceed onward.
  274. // FIXME: 3. If, given type, the new resource is to be handled by displaying some sort of inline content, e.g., a
  275. // native rendering of the content or an error message because the specified type is not supported, then
  276. // return the result of creating a document for inline content that doesn't have a DOM given navigationParams's
  277. // navigable, navigationParams's id, and navigationParams's navigation timing type.
  278. // FIXME: 4. Otherwise, the document's type is such that the resource will not affect navigationParams's navigable,
  279. // e.g., because the resource is to be handed to an external application or because it is an unknown type
  280. // that will be processed as a download. Hand-off to external software given navigationParams's response,
  281. // navigationParams's navigable, navigationParams's final sandboxing flag set, sourceSnapshotParams's has
  282. // transient activation, and initiatorOrigin.
  283. // FIXME: Start of old, ad-hoc code
  284. if (!is_supported_document_mime_type(type.essence()))
  285. return nullptr;
  286. auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html"_string, navigation_params).release_value_but_fixme_should_propagate_errors();
  287. document->set_content_type(type.essence());
  288. auto& realm = document->realm();
  289. if (navigation_params.response->body()) {
  290. Optional<String> content_encoding = type.parameters().get("charset"sv);
  291. auto process_body = [document, url = navigation_params.response->url().value(), encoding = move(content_encoding)](ByteBuffer bytes) {
  292. if (parse_document(*document, bytes, move(encoding)))
  293. return;
  294. document->remove_all_children(true);
  295. auto error_html = load_error_page(url).release_value_but_fixme_should_propagate_errors();
  296. auto parser = HTML::HTMLParser::create(document, error_html, "utf-8");
  297. document->set_url(AK::URL("about:error"));
  298. parser->run();
  299. };
  300. auto process_body_error = [](auto) {
  301. dbgln("FIXME: Load html page with an error if read of body failed.");
  302. };
  303. navigation_params.response->body()->fully_read(
  304. realm,
  305. move(process_body),
  306. move(process_body_error),
  307. JS::NonnullGCPtr { realm.global_object() })
  308. .release_value_but_fixme_should_propagate_errors();
  309. }
  310. return document;
  311. // FIXME: End of old, ad-hoc code
  312. // 5. Return null.
  313. return nullptr;
  314. }
  315. // https://html.spec.whatwg.org/multipage/document-lifecycle.html#read-ua-inline
  316. JS::GCPtr<DOM::Document> create_document_for_inline_content(JS::GCPtr<HTML::Navigable> navigable, Optional<String> navigation_id, StringView content_html)
  317. {
  318. auto& vm = navigable->vm();
  319. // 1. Let origin be a new opaque origin.
  320. HTML::Origin origin {};
  321. // 2. Let coop be a new cross-origin opener policy.
  322. auto coop = HTML::CrossOriginOpenerPolicy {};
  323. // 3. Let coopEnforcementResult be a new cross-origin opener policy enforcement result with
  324. // url: response's URL
  325. // origin: origin
  326. // cross-origin opener policy: coop
  327. HTML::CrossOriginOpenerPolicyEnforcementResult coop_enforcement_result {
  328. .url = AK::URL("about:error"), // AD-HOC
  329. .origin = origin,
  330. .cross_origin_opener_policy = coop
  331. };
  332. // 4. Let navigationParams be a new navigation params with
  333. // id: navigationId
  334. // navigable: navigable
  335. // request: null
  336. // response: a new response
  337. // origin: origin
  338. // fetch controller: null
  339. // commit early hints: null
  340. // COOP enforcement result: coopEnforcementResult
  341. // reserved environment: null
  342. // policy container: a new policy container
  343. // final sandboxing flag set: an empty set
  344. // cross-origin opener policy: coop
  345. // FIXME: navigation timing type: navTimingType
  346. // about base URL: null
  347. auto response = Fetch::Infrastructure::Response::create(vm);
  348. response->url_list().append(AK::URL("about:error")); // AD-HOC: https://github.com/whatwg/html/issues/9122
  349. HTML::NavigationParams navigation_params {
  350. .id = navigation_id,
  351. .navigable = navigable,
  352. .request = {},
  353. .response = *response,
  354. .fetch_controller = nullptr,
  355. .commit_early_hints = nullptr,
  356. .coop_enforcement_result = move(coop_enforcement_result),
  357. .reserved_environment = {},
  358. .origin = move(origin),
  359. .policy_container = HTML::PolicyContainer {},
  360. .final_sandboxing_flag_set = HTML::SandboxingFlagSet {},
  361. .cross_origin_opener_policy = move(coop),
  362. .about_base_url = {},
  363. };
  364. // 5. Let document be the result of creating and initializing a Document object given "html", "text/html", and navigationParams.
  365. auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html"_string, navigation_params).release_value_but_fixme_should_propagate_errors();
  366. // 6. Either associate document with a custom rendering that is not rendered using the normal Document rendering rules, or mutate document until it represents the content the
  367. // user agent wants to render.
  368. auto parser = HTML::HTMLParser::create(document, content_html, "utf-8");
  369. document->set_url(AK::URL("about:error"));
  370. parser->run();
  371. // 7. Return document.
  372. return document;
  373. }
  374. }