FrameLoader.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /*
  2. * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <AK/JsonArray.h>
  8. #include <AK/LexicalPath.h>
  9. #include <AK/SourceGenerator.h>
  10. #include <LibGemini/Document.h>
  11. #include <LibGfx/ImageFormats/ImageDecoder.h>
  12. #include <LibMarkdown/Document.h>
  13. #include <LibTextCodec/Decoder.h>
  14. #include <LibWeb/Bindings/MainThreadVM.h>
  15. #include <LibWeb/DOM/Document.h>
  16. #include <LibWeb/DOM/ElementFactory.h>
  17. #include <LibWeb/DOM/Text.h>
  18. #include <LibWeb/HTML/BrowsingContext.h>
  19. #include <LibWeb/HTML/HTMLIFrameElement.h>
  20. #include <LibWeb/HTML/NavigationParams.h>
  21. #include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
  22. #include <LibWeb/HTML/Parser/HTMLParser.h>
  23. #include <LibWeb/Loader/FrameLoader.h>
  24. #include <LibWeb/Loader/ResourceLoader.h>
  25. #include <LibWeb/Namespace.h>
  26. #include <LibWeb/Page/Page.h>
  27. #include <LibWeb/Platform/ImageCodecPlugin.h>
  28. #include <LibWeb/XML/XMLDocumentBuilder.h>
  29. namespace Web {
  30. static DeprecatedString s_default_favicon_path = "/res/icons/16x16/app-browser.png";
  31. static RefPtr<Gfx::Bitmap> s_default_favicon_bitmap;
  32. void FrameLoader::set_default_favicon_path(DeprecatedString path)
  33. {
  34. s_default_favicon_path = move(path);
  35. }
  36. FrameLoader::FrameLoader(HTML::BrowsingContext& browsing_context)
  37. : m_browsing_context(browsing_context)
  38. {
  39. if (!s_default_favicon_bitmap) {
  40. s_default_favicon_bitmap = Gfx::Bitmap::load_from_file(s_default_favicon_path).release_value_but_fixme_should_propagate_errors();
  41. VERIFY(s_default_favicon_bitmap);
  42. }
  43. }
  44. FrameLoader::~FrameLoader() = default;
  45. static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data)
  46. {
  47. auto markdown_document = Markdown::Document::parse(data);
  48. if (!markdown_document)
  49. return false;
  50. auto extra_head_contents = R"~~~(
  51. <style>
  52. .zoomable {
  53. cursor: zoom-in;
  54. max-width: 100%;
  55. }
  56. .zoomable.zoomed-in {
  57. cursor: zoom-out;
  58. max-width: none;
  59. }
  60. </style>
  61. <script>
  62. function imageClickEventListener(event) {
  63. let image = event.target;
  64. if (image.classList.contains("zoomable")) {
  65. image.classList.toggle("zoomed-in");
  66. }
  67. }
  68. function processImages() {
  69. let images = document.querySelectorAll("img");
  70. let windowWidth = window.innerWidth;
  71. images.forEach((image) => {
  72. if (image.naturalWidth > windowWidth) {
  73. image.classList.add("zoomable");
  74. } else {
  75. image.classList.remove("zoomable");
  76. image.classList.remove("zoomed-in");
  77. }
  78. image.addEventListener("click", imageClickEventListener);
  79. });
  80. }
  81. document.addEventListener("load", () => {
  82. processImages();
  83. });
  84. window.addEventListener("resize", () => {
  85. processImages();
  86. });
  87. </script>
  88. )~~~"sv;
  89. auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8");
  90. parser->run(document.url());
  91. return true;
  92. }
  93. static bool build_text_document(DOM::Document& document, ByteBuffer const& data)
  94. {
  95. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  96. MUST(document.append_child(html_element));
  97. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  98. MUST(html_element->append_child(head_element));
  99. auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  100. MUST(head_element->append_child(title_element));
  101. auto title_text = document.create_text_node(document.url().basename());
  102. MUST(title_element->append_child(title_text));
  103. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  104. MUST(html_element->append_child(body_element));
  105. auto pre_element = DOM::create_element(document, HTML::TagNames::pre, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  106. MUST(body_element->append_child(pre_element));
  107. MUST(pre_element->append_child(document.create_text_node(DeprecatedString::copy(data))));
  108. return true;
  109. }
  110. static bool build_image_document(DOM::Document& document, ByteBuffer const& data)
  111. {
  112. auto image = Platform::ImageCodecPlugin::the().decode_image(data);
  113. if (!image.has_value() || image->frames.is_empty())
  114. return false;
  115. auto const& frame = image->frames[0];
  116. auto const& bitmap = frame.bitmap;
  117. if (!bitmap)
  118. return false;
  119. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  120. MUST(document.append_child(html_element));
  121. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  122. MUST(html_element->append_child(head_element));
  123. auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  124. MUST(head_element->append_child(title_element));
  125. auto basename = LexicalPath::basename(document.url().serialize_path());
  126. auto title_text = document.heap().allocate<DOM::Text>(document.realm(), document, DeprecatedString::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height())).release_allocated_value_but_fixme_should_propagate_errors();
  127. MUST(title_element->append_child(*title_text));
  128. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  129. MUST(html_element->append_child(body_element));
  130. auto image_element = DOM::create_element(document, HTML::TagNames::img, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  131. MUST(image_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string()));
  132. MUST(body_element->append_child(image_element));
  133. return true;
  134. }
  135. static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data)
  136. {
  137. StringView gemini_data { data };
  138. auto gemini_document = Gemini::Document::parse(gemini_data, document.url());
  139. DeprecatedString html_data = gemini_document->render_to_html();
  140. dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data);
  141. dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data);
  142. auto parser = HTML::HTMLParser::create(document, html_data, "utf-8");
  143. parser->run(document.url());
  144. return true;
  145. }
  146. static bool build_xml_document(DOM::Document& document, ByteBuffer const& data)
  147. {
  148. auto encoding = HTML::run_encoding_sniffing_algorithm(document, data);
  149. auto decoder = TextCodec::decoder_for(encoding);
  150. VERIFY(decoder.has_value());
  151. auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors();
  152. XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource });
  153. XMLDocumentBuilder builder { document };
  154. auto result = parser.parse_with_listener(builder);
  155. return !result.is_error() && !builder.has_error();
  156. }
  157. static bool build_video_document(DOM::Document& document)
  158. {
  159. auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  160. MUST(document.append_child(html_element));
  161. auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  162. MUST(html_element->append_child(head_element));
  163. auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  164. MUST(html_element->append_child(body_element));
  165. auto video_element = DOM::create_element(document, HTML::TagNames::video, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
  166. MUST(video_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string()));
  167. MUST(video_element->set_attribute(HTML::AttributeNames::autoplay, DeprecatedString::empty()));
  168. MUST(video_element->set_attribute(HTML::AttributeNames::controls, DeprecatedString::empty()));
  169. MUST(body_element->append_child(video_element));
  170. return true;
  171. }
  172. bool FrameLoader::parse_document(DOM::Document& document, ByteBuffer const& data)
  173. {
  174. auto& mime_type = document.content_type();
  175. if (mime_type == "text/html" || mime_type == "image/svg+xml") {
  176. auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data);
  177. parser->run(document.url());
  178. return true;
  179. }
  180. if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
  181. return build_xml_document(document, data);
  182. if (mime_type.starts_with("image/"sv))
  183. return build_image_document(document, data);
  184. if (mime_type.starts_with("video/"sv))
  185. return build_video_document(document);
  186. if (mime_type == "text/plain" || mime_type == "application/json")
  187. return build_text_document(document, data);
  188. if (mime_type == "text/markdown")
  189. return build_markdown_document(document, data);
  190. if (mime_type == "text/gemini")
  191. return build_gemini_document(document, data);
  192. return false;
  193. }
  194. bool FrameLoader::load(LoadRequest& request, Type type)
  195. {
  196. if (!request.is_valid()) {
  197. load_error_page(request.url(), "Invalid request");
  198. return false;
  199. }
  200. if (!m_browsing_context->is_frame_nesting_allowed(request.url())) {
  201. dbgln("No further recursion is allowed for the frame, abort load!");
  202. return false;
  203. }
  204. request.set_main_resource(true);
  205. auto& url = request.url();
  206. if (type == Type::Navigation || type == Type::Reload || type == Type::Redirect) {
  207. if (auto* page = browsing_context().page()) {
  208. if (&page->top_level_browsing_context() == m_browsing_context)
  209. page->client().page_did_start_loading(url, type == Type::Redirect);
  210. }
  211. }
  212. // https://fetch.spec.whatwg.org/#concept-fetch
  213. // Step 12: If request’s header list does not contain `Accept`, then:
  214. // 1. Let value be `*/*`. (NOTE: Not necessary as we're about to override it)
  215. // 2. A user agent should set value to the first matching statement, if any, switching on request’s destination:
  216. // -> "document"
  217. // -> "frame"
  218. // -> "iframe"
  219. // `text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8`
  220. if (!request.headers().contains("Accept"))
  221. request.set_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
  222. set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, request));
  223. if (type == Type::IFrame)
  224. return true;
  225. auto* document = browsing_context().active_document();
  226. if (document && document->has_active_favicon())
  227. return true;
  228. if (url.scheme() == "http" || url.scheme() == "https") {
  229. AK::URL favicon_url;
  230. favicon_url.set_scheme(url.scheme());
  231. favicon_url.set_host(url.host());
  232. favicon_url.set_port(url.port_or_default());
  233. favicon_url.set_paths({ "favicon.ico" });
  234. ResourceLoader::the().load(
  235. favicon_url,
  236. [this, favicon_url](auto data, auto&, auto) {
  237. // Always fetch the current document
  238. auto* document = this->browsing_context().active_document();
  239. if (document && document->has_active_favicon())
  240. return;
  241. dbgln_if(SPAM_DEBUG, "Favicon downloaded, {} bytes from {}", data.size(), favicon_url);
  242. if (data.is_empty())
  243. return;
  244. RefPtr<Gfx::Bitmap> favicon_bitmap;
  245. auto decoded_image = Platform::ImageCodecPlugin::the().decode_image(data);
  246. if (!decoded_image.has_value() || decoded_image->frames.is_empty()) {
  247. dbgln("Could not decode favicon {}", favicon_url);
  248. } else {
  249. favicon_bitmap = decoded_image->frames[0].bitmap;
  250. dbgln_if(IMAGE_DECODER_DEBUG, "Decoded favicon, {}", favicon_bitmap->size());
  251. }
  252. load_favicon(favicon_bitmap);
  253. },
  254. [this](auto&, auto) {
  255. // Always fetch the current document
  256. auto* document = this->browsing_context().active_document();
  257. if (document && document->has_active_favicon())
  258. return;
  259. load_favicon();
  260. });
  261. } else {
  262. load_favicon();
  263. }
  264. return true;
  265. }
  266. bool FrameLoader::load(const AK::URL& url, Type type)
  267. {
  268. dbgln_if(SPAM_DEBUG, "FrameLoader::load: {}", url);
  269. if (!url.is_valid()) {
  270. load_error_page(url, "Invalid URL");
  271. return false;
  272. }
  273. auto request = LoadRequest::create_for_url_on_page(url, browsing_context().page());
  274. return load(request, type);
  275. }
  276. void FrameLoader::load_html(StringView html, const AK::URL& url)
  277. {
  278. auto& vm = Bindings::main_thread_vm();
  279. auto response = Fetch::Infrastructure::Response::create(vm);
  280. response->url_list().append(url);
  281. HTML::NavigationParams navigation_params {
  282. .id = {},
  283. .request = nullptr,
  284. .response = response,
  285. .origin = HTML::Origin {},
  286. .policy_container = HTML::PolicyContainer {},
  287. .final_sandboxing_flag_set = HTML::SandboxingFlagSet {},
  288. .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
  289. .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
  290. .reserved_environment = {},
  291. .browsing_context = browsing_context(),
  292. };
  293. auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", move(navigation_params)).release_value_but_fixme_should_propagate_errors();
  294. browsing_context().set_active_document(document);
  295. auto parser = HTML::HTMLParser::create(document, html, "utf-8");
  296. parser->run(url);
  297. }
  298. static DeprecatedString s_error_page_url = "file:///res/html/error.html";
  299. void FrameLoader::set_error_page_url(DeprecatedString error_page_url)
  300. {
  301. s_error_page_url = error_page_url;
  302. }
  303. // FIXME: Use an actual templating engine (our own one when it's built, preferably
  304. // with a way to check these usages at compile time)
  305. void FrameLoader::load_error_page(const AK::URL& failed_url, DeprecatedString const& error)
  306. {
  307. LoadRequest request = LoadRequest::create_for_url_on_page(s_error_page_url, browsing_context().page());
  308. ResourceLoader::the().load(
  309. request,
  310. [this, failed_url, error](auto data, auto&, auto) {
  311. VERIFY(!data.is_null());
  312. StringBuilder builder;
  313. SourceGenerator generator { builder };
  314. generator.set("failed_url", escape_html_entities(failed_url.to_deprecated_string()));
  315. generator.set("error", escape_html_entities(error));
  316. generator.append(data);
  317. load_html(generator.as_string_view(), s_error_page_url);
  318. },
  319. [](auto& error, auto) {
  320. dbgln("Failed to load error page: {}", error);
  321. VERIFY_NOT_REACHED();
  322. });
  323. }
  324. void FrameLoader::load_favicon(RefPtr<Gfx::Bitmap> bitmap)
  325. {
  326. if (auto* page = browsing_context().page()) {
  327. if (bitmap)
  328. page->client().page_did_change_favicon(*bitmap);
  329. else if (s_default_favicon_bitmap)
  330. page->client().page_did_change_favicon(*s_default_favicon_bitmap);
  331. }
  332. }
  333. void FrameLoader::resource_did_load()
  334. {
  335. // This prevents us setting up the document of a removed browsing context container (BCC, e.g. <iframe>), which will cause a crash
  336. // if the document contains a script that inserts another BCC as this will use the stale browsing context it previously set up,
  337. // even if it's reinserted.
  338. // Example:
  339. // index.html:
  340. // ```
  341. // <body><script>
  342. // var i = document.createElement("iframe");
  343. // i.src = "b.html";
  344. // document.body.append(i);
  345. // i.remove();
  346. // </script>
  347. // ```
  348. // b.html:
  349. // ```
  350. // <body><script>
  351. // var i = document.createElement("iframe");
  352. // document.body.append(i);
  353. // </script>
  354. // ```
  355. // Required by Prebid.js, which does this by inserting an <iframe> into a <div> in the active document via innerHTML,
  356. // then transfers it to the <html> element:
  357. // https://github.com/prebid/Prebid.js/blob/7b7389c5abdd05626f71c3df606a93713d1b9f85/src/utils.js#L597
  358. // This is done in the spec by removing all tasks and aborting all fetches when a document is destroyed:
  359. // https://html.spec.whatwg.org/multipage/document-lifecycle.html#destroy-a-document
  360. if (browsing_context().has_been_discarded())
  361. return;
  362. auto url = resource()->url();
  363. // For 3xx (Redirection) responses, the Location value refers to the preferred target resource for automatically redirecting the request.
  364. auto status_code = resource()->status_code();
  365. if (status_code.has_value() && *status_code >= 300 && *status_code <= 399) {
  366. auto location = resource()->response_headers().get("Location");
  367. if (location.has_value()) {
  368. if (m_redirects_count > maximum_redirects_allowed) {
  369. m_redirects_count = 0;
  370. load_error_page(url, "Too many redirects");
  371. return;
  372. }
  373. m_redirects_count++;
  374. load(url.complete_url(location.value()), Type::Redirect);
  375. return;
  376. }
  377. }
  378. m_redirects_count = 0;
  379. if (resource()->has_encoding()) {
  380. dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding '{}'", resource()->mime_type(), resource()->encoding().value());
  381. } else {
  382. dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding unknown", resource()->mime_type());
  383. }
  384. auto final_sandboxing_flag_set = HTML::SandboxingFlagSet {};
  385. // (Part of https://html.spec.whatwg.org/#navigating-across-documents)
  386. // 3. Let responseOrigin be the result of determining the origin given browsingContext, resource's url, finalSandboxFlags, and incumbentNavigationOrigin.
  387. // FIXME: Pass incumbentNavigationOrigin
  388. auto response_origin = HTML::determine_the_origin(browsing_context(), url, final_sandboxing_flag_set, {});
  389. auto& vm = Bindings::main_thread_vm();
  390. auto response = Fetch::Infrastructure::Response::create(vm);
  391. response->url_list().append(url);
  392. HTML::NavigationParams navigation_params {
  393. .id = {},
  394. .request = nullptr,
  395. .response = response,
  396. .origin = move(response_origin),
  397. .policy_container = HTML::PolicyContainer {},
  398. .final_sandboxing_flag_set = final_sandboxing_flag_set,
  399. .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
  400. .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
  401. .reserved_environment = {},
  402. .browsing_context = browsing_context(),
  403. };
  404. auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", move(navigation_params)).release_value_but_fixme_should_propagate_errors();
  405. document->set_url(url);
  406. document->set_encoding(resource()->encoding());
  407. document->set_content_type(resource()->mime_type());
  408. browsing_context().set_active_document(document);
  409. if (auto* page = browsing_context().page())
  410. page->client().page_did_create_main_document();
  411. if (!parse_document(*document, resource()->encoded_data())) {
  412. load_error_page(url, "Failed to parse content.");
  413. return;
  414. }
  415. if (!url.fragment().is_empty())
  416. browsing_context().scroll_to_anchor(url.fragment());
  417. else
  418. browsing_context().scroll_to({ 0, 0 });
  419. if (auto* page = browsing_context().page())
  420. page->client().page_did_finish_loading(url);
  421. }
  422. void FrameLoader::resource_did_fail()
  423. {
  424. // See comment in resource_did_load() about why this is done.
  425. if (browsing_context().has_been_discarded())
  426. return;
  427. load_error_page(resource()->url(), resource()->error());
  428. }
  429. }