DocumentLoading.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2023, Aliaksandr Kalenik <kalenik.aliaksandr@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/Debug.h>
  8. #include <AK/LexicalPath.h>
  9. #include <LibGemini/Document.h>
  10. #include <LibGfx/ImageFormats/ImageDecoder.h>
  11. #include <LibMarkdown/Document.h>
  12. #include <LibTextCodec/Decoder.h>
  13. #include <LibWeb/DOM/Document.h>
  14. #include <LibWeb/DOM/DocumentLoading.h>
  15. #include <LibWeb/HTML/Navigable.h>
  16. #include <LibWeb/HTML/NavigationParams.h>
  17. #include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
  18. #include <LibWeb/HTML/Parser/HTMLParser.h>
  19. #include <LibWeb/Loader/GeneratedPagesLoader.h>
  20. #include <LibWeb/Namespace.h>
  21. #include <LibWeb/Platform/ImageCodecPlugin.h>
  22. #include <LibWeb/XML/XMLDocumentBuilder.h>
  23. namespace Web {
  24. static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data)
  25. {
  26. auto markdown_document = Markdown::Document::parse(data);
  27. if (!markdown_document)
  28. return false;
  29. auto extra_head_contents = R"~~~(
  30. <style>
  31. .zoomable {
  32. cursor: zoom-in;
  33. max-width: 100%;
  34. }
  35. .zoomable.zoomed-in {
  36. cursor: zoom-out;
  37. max-width: none;
  38. }
  39. </style>
  40. <script>
  41. function imageClickEventListener(event) {
  42. let image = event.target;
  43. if (image.classList.contains("zoomable")) {
  44. image.classList.toggle("zoomed-in");
  45. }
  46. }
  47. function processImages() {
  48. let images = document.querySelectorAll("img");
  49. let windowWidth = window.innerWidth;
  50. images.forEach((image) => {
  51. if (image.naturalWidth > windowWidth) {
  52. image.classList.add("zoomable");
  53. } else {
  54. image.classList.remove("zoomable");
  55. image.classList.remove("zoomed-in");
  56. }
  57. image.addEventListener("click", imageClickEventListener);
  58. });
  59. }
  60. document.addEventListener("load", () => {
  61. processImages();
  62. });
  63. window.addEventListener("resize", () => {
  64. processImages();
  65. });
  66. </script>
  67. )~~~"sv;
  68. auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8");
  69. parser->run(document.url());
  70. return true;
  71. }
  72. static bool build_text_document(DOM::Document& document, ByteBuffer const& data)
  73. {
  74. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  75. MUST(document.append_child(html_element));
  76. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  77. MUST(html_element->append_child(head_element));
  78. auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  79. MUST(head_element->append_child(title_element));
  80. auto title_text = document.create_text_node(MUST(String::from_deprecated_string(document.url().basename())));
  81. MUST(title_element->append_child(title_text));
  82. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  83. MUST(html_element->append_child(body_element));
  84. auto pre_element = DOM::create_element(document, HTML::TagNames::pre, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  85. MUST(body_element->append_child(pre_element));
  86. MUST(pre_element->append_child(document.create_text_node(String::from_utf8(StringView { data }).release_value_but_fixme_should_propagate_errors())));
  87. return true;
  88. }
  89. static bool build_image_document(DOM::Document& document, ByteBuffer const& data)
  90. {
  91. auto image = Platform::ImageCodecPlugin::the().decode_image(data);
  92. if (!image.has_value() || image->frames.is_empty())
  93. return false;
  94. auto const& frame = image->frames[0];
  95. auto const& bitmap = frame.bitmap;
  96. if (!bitmap)
  97. return false;
  98. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  99. MUST(document.append_child(html_element));
  100. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  101. MUST(html_element->append_child(head_element));
  102. auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  103. MUST(head_element->append_child(title_element));
  104. auto basename = LexicalPath::basename(document.url().serialize_path());
  105. auto title_text = document.heap().allocate<DOM::Text>(document.realm(), document, MUST(String::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height())));
  106. MUST(title_element->append_child(*title_text));
  107. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  108. MUST(html_element->append_child(body_element));
  109. auto image_element = DOM::create_element(document, HTML::TagNames::img, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  110. MUST(image_element->set_attribute(HTML::AttributeNames::src, MUST(document.url().to_string())));
  111. MUST(body_element->append_child(image_element));
  112. return true;
  113. }
  114. static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data)
  115. {
  116. StringView gemini_data { data };
  117. auto gemini_document = Gemini::Document::parse(gemini_data, document.url());
  118. DeprecatedString html_data = gemini_document->render_to_html();
  119. dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data);
  120. dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data);
  121. auto parser = HTML::HTMLParser::create(document, html_data, "utf-8");
  122. parser->run(document.url());
  123. return true;
  124. }
  125. bool build_xml_document(DOM::Document& document, ByteBuffer const& data, Optional<String> content_encoding)
  126. {
  127. Optional<TextCodec::Decoder&> decoder;
  128. // The actual HTTP headers and other metadata, not the headers as mutated or implied by the algorithms given in this specification,
  129. // are the ones that must be used when determining the character encoding according to the rules given in the above specifications.
  130. if (content_encoding.has_value())
  131. decoder = TextCodec::decoder_for(*content_encoding);
  132. if (!decoder.has_value()) {
  133. auto encoding = HTML::run_encoding_sniffing_algorithm(document, data);
  134. decoder = TextCodec::decoder_for(encoding);
  135. }
  136. VERIFY(decoder.has_value());
  137. // Well-formed XML documents contain only properly encoded characters
  138. if (!decoder->validate(data))
  139. return false;
  140. auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors();
  141. XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource });
  142. XMLDocumentBuilder builder { document };
  143. auto result = parser.parse_with_listener(builder);
  144. return !result.is_error() && !builder.has_error();
  145. }
  146. static bool build_video_document(DOM::Document& document)
  147. {
  148. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  149. MUST(document.append_child(html_element));
  150. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  151. MUST(html_element->append_child(head_element));
  152. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  153. MUST(html_element->append_child(body_element));
  154. auto video_element = DOM::create_element(document, HTML::TagNames::video, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  155. MUST(video_element->set_attribute(HTML::AttributeNames::src, MUST(document.url().to_string())));
  156. MUST(video_element->set_attribute(HTML::AttributeNames::autoplay, String {}));
  157. MUST(video_element->set_attribute(HTML::AttributeNames::controls, String {}));
  158. MUST(body_element->append_child(video_element));
  159. return true;
  160. }
  161. static bool build_audio_document(DOM::Document& document)
  162. {
  163. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  164. MUST(document.append_child(html_element));
  165. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  166. MUST(html_element->append_child(head_element));
  167. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  168. MUST(html_element->append_child(body_element));
  169. auto video_element = DOM::create_element(document, HTML::TagNames::audio, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  170. MUST(video_element->set_attribute(HTML::AttributeNames::src, MUST(document.url().to_string())));
  171. MUST(video_element->set_attribute(HTML::AttributeNames::autoplay, String {}));
  172. MUST(video_element->set_attribute(HTML::AttributeNames::controls, String {}));
  173. MUST(body_element->append_child(video_element));
  174. return true;
  175. }
  176. bool parse_document(DOM::Document& document, ByteBuffer const& data, Optional<String> content_encoding)
  177. {
  178. auto& mime_type = document.content_type();
  179. if (mime_type == "text/html") {
  180. auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data);
  181. parser->run(document.url());
  182. return true;
  183. }
  184. if (mime_type.ends_with_bytes("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
  185. return build_xml_document(document, data, move(content_encoding));
  186. if (mime_type.starts_with_bytes("image/"sv))
  187. return build_image_document(document, data);
  188. if (mime_type.starts_with_bytes("video/"sv))
  189. return build_video_document(document);
  190. if (mime_type.starts_with_bytes("audio/"sv))
  191. return build_audio_document(document);
  192. if (mime_type == "text/plain" || mime_type == "application/json")
  193. return build_text_document(document, data);
  194. if (mime_type == "text/markdown")
  195. return build_markdown_document(document, data);
  196. if (mime_type == "text/gemini")
  197. return build_gemini_document(document, data);
  198. return false;
  199. }
  200. static bool is_supported_document_mime_type(StringView mime_type)
  201. {
  202. if (mime_type == "text/html")
  203. return true;
  204. if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
  205. return true;
  206. if (mime_type.starts_with("image/"sv))
  207. return true;
  208. if (mime_type.starts_with("video/"sv))
  209. return true;
  210. if (mime_type.starts_with("audio/"sv))
  211. return true;
  212. if (mime_type == "text/plain" || mime_type == "application/json")
  213. return true;
  214. if (mime_type == "text/markdown")
  215. return true;
  216. if (mime_type == "text/gemini")
  217. return true;
  218. return false;
  219. }
  220. // https://html.spec.whatwg.org/multipage/browsing-the-web.html#loading-a-document
  221. JS::GCPtr<DOM::Document> load_document(Optional<HTML::NavigationParams> navigation_params)
  222. {
  223. VERIFY(navigation_params.has_value());
  224. auto extracted_mime_type = navigation_params->response->header_list()->extract_mime_type().release_value_but_fixme_should_propagate_errors();
  225. if (!extracted_mime_type.has_value())
  226. return nullptr;
  227. auto mime_type = extracted_mime_type.release_value();
  228. if (!is_supported_document_mime_type(mime_type.essence()))
  229. return nullptr;
  230. auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html"_string, *navigation_params).release_value_but_fixme_should_propagate_errors();
  231. document->set_content_type(mime_type.essence());
  232. auto& realm = document->realm();
  233. if (navigation_params->response->body()) {
  234. Optional<String> content_encoding = mime_type.parameters().get("charset"sv);
  235. auto process_body = [document, url = navigation_params->response->url().value(), encoding = move(content_encoding)](ByteBuffer bytes) {
  236. if (parse_document(*document, bytes, move(encoding)))
  237. return;
  238. document->remove_all_children(true);
  239. auto error_html = load_error_page(url).release_value_but_fixme_should_propagate_errors();
  240. auto parser = HTML::HTMLParser::create(document, error_html, "utf-8");
  241. document->set_url(AK::URL("about:error"));
  242. parser->run();
  243. };
  244. auto process_body_error = [](auto) {
  245. dbgln("FIXME: Load html page with an error if read of body failed.");
  246. };
  247. navigation_params->response->body()->fully_read(
  248. realm,
  249. move(process_body),
  250. move(process_body_error),
  251. JS::NonnullGCPtr { realm.global_object() })
  252. .release_value_but_fixme_should_propagate_errors();
  253. }
  254. return document;
  255. }
  256. // https://html.spec.whatwg.org/multipage/document-lifecycle.html#read-ua-inline
  257. JS::GCPtr<DOM::Document> create_document_for_inline_content(JS::GCPtr<HTML::Navigable> navigable, Optional<String> navigation_id, StringView content_html)
  258. {
  259. auto& vm = navigable->vm();
  260. // 1. Let origin be a new opaque origin.
  261. HTML::Origin origin {};
  262. // 2. Let coop be a new cross-origin opener policy.
  263. auto coop = HTML::CrossOriginOpenerPolicy {};
  264. // 3. Let coopEnforcementResult be a new cross-origin opener policy enforcement result with
  265. // url: response's URL
  266. // origin: origin
  267. // cross-origin opener policy: coop
  268. HTML::CrossOriginOpenerPolicyEnforcementResult coop_enforcement_result {
  269. .url = AK::URL("about:error"), // AD-HOC
  270. .origin = origin,
  271. .cross_origin_opener_policy = coop
  272. };
  273. // 4. Let navigationParams be a new navigation params with
  274. // id: navigationId
  275. // navigable: navigable
  276. // request: null
  277. // response: a new response
  278. // origin: origin
  279. // fetch controller: null
  280. // commit early hints: null
  281. // COOP enforcement result: coopEnforcementResult
  282. // reserved environment: null
  283. // policy container: a new policy container
  284. // final sandboxing flag set: an empty set
  285. // cross-origin opener policy: coop
  286. // FIXME: navigation timing type: navTimingType
  287. // about base URL: null
  288. auto response = Fetch::Infrastructure::Response::create(vm);
  289. response->url_list().append(AK::URL("about:error")); // AD-HOC: https://github.com/whatwg/html/issues/9122
  290. HTML::NavigationParams navigation_params {
  291. .id = navigation_id,
  292. .navigable = navigable,
  293. .request = {},
  294. .response = *response,
  295. .fetch_controller = nullptr,
  296. .commit_early_hints = nullptr,
  297. .coop_enforcement_result = move(coop_enforcement_result),
  298. .reserved_environment = {},
  299. .origin = move(origin),
  300. .policy_container = HTML::PolicyContainer {},
  301. .final_sandboxing_flag_set = HTML::SandboxingFlagSet {},
  302. .cross_origin_opener_policy = move(coop),
  303. .about_base_url = {},
  304. };
  305. // 5. Let document be the result of creating and initializing a Document object given "html", "text/html", and navigationParams.
  306. auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html"_string, navigation_params).release_value_but_fixme_should_propagate_errors();
  307. // 6. Either associate document with a custom rendering that is not rendered using the normal Document rendering rules, or mutate document until it represents the content the
  308. // user agent wants to render.
  309. auto parser = HTML::HTMLParser::create(document, content_html, "utf-8");
  310. document->set_url(AK::URL("about:error"));
  311. parser->run();
  312. // 7. Return document.
  313. return document;
  314. }
  315. }