FrameLoader.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /*
  2. * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <AK/JsonArray.h>
  8. #include <AK/LexicalPath.h>
  9. #include <AK/SourceGenerator.h>
  10. #include <LibWeb/Bindings/MainThreadVM.h>
  11. #include <LibWeb/DOM/Document.h>
  12. #include <LibWeb/DOM/DocumentLoading.h>
  13. #include <LibWeb/DOM/ElementFactory.h>
  14. #include <LibWeb/DOM/Text.h>
  15. #include <LibWeb/HTML/NavigationParams.h>
  16. #include <LibWeb/HTML/Parser/HTMLParser.h>
  17. #include <LibWeb/Loader/FrameLoader.h>
  18. #include <LibWeb/Loader/ResourceLoader.h>
  19. #include <LibWeb/Namespace.h>
  20. #include <LibWeb/Page/Page.h>
  21. #include <LibWeb/Platform/ImageCodecPlugin.h>
  22. #include <LibWeb/ReferrerPolicy/AbstractOperations.h>
  23. #include <LibWeb/XML/XMLDocumentBuilder.h>
  24. namespace Web {
  25. static DeprecatedString s_default_favicon_path = "/res/icons/16x16/app-browser.png";
  26. static RefPtr<Gfx::Bitmap> s_default_favicon_bitmap;
  27. void FrameLoader::set_default_favicon_path(DeprecatedString path)
  28. {
  29. s_default_favicon_path = move(path);
  30. }
  31. FrameLoader::FrameLoader(HTML::BrowsingContext& browsing_context)
  32. : m_browsing_context(browsing_context)
  33. {
  34. if (!s_default_favicon_bitmap) {
  35. s_default_favicon_bitmap = Gfx::Bitmap::load_from_file(s_default_favicon_path).release_value_but_fixme_should_propagate_errors();
  36. VERIFY(s_default_favicon_bitmap);
  37. }
  38. }
  39. FrameLoader::~FrameLoader() = default;
  40. bool FrameLoader::load(LoadRequest& request, Type type)
  41. {
  42. if (!request.is_valid()) {
  43. load_error_page(request.url(), "Invalid request");
  44. return false;
  45. }
  46. if (!m_browsing_context->is_frame_nesting_allowed(request.url())) {
  47. dbgln("No further recursion is allowed for the frame, abort load!");
  48. return false;
  49. }
  50. request.set_main_resource(true);
  51. auto& url = request.url();
  52. if (type == Type::Navigation || type == Type::Reload || type == Type::Redirect) {
  53. if (auto* page = browsing_context().page()) {
  54. if (&page->top_level_browsing_context() == m_browsing_context)
  55. page->client().page_did_start_loading(url, type == Type::Redirect);
  56. }
  57. }
  58. // https://fetch.spec.whatwg.org/#concept-fetch
  59. // Step 12: If request’s header list does not contain `Accept`, then:
  60. // 1. Let value be `*/*`. (NOTE: Not necessary as we're about to override it)
  61. // 2. A user agent should set value to the first matching statement, if any, switching on request’s destination:
  62. // -> "document"
  63. // -> "frame"
  64. // -> "iframe"
  65. // `text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8`
  66. if (!request.headers().contains("Accept"))
  67. request.set_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
  68. // HACK: We're crudely computing the referer value and shoving it into the
  69. // request until fetch infrastructure is used here.
  70. auto referrer_url = ReferrerPolicy::strip_url_for_use_as_referrer(url);
  71. if (referrer_url.has_value() && !request.headers().contains("Referer"))
  72. request.set_header("Referer", referrer_url->serialize());
  73. set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, request));
  74. if (type == Type::IFrame)
  75. return true;
  76. if (url.scheme() == "http" || url.scheme() == "https") {
  77. AK::URL favicon_url;
  78. favicon_url.set_scheme(url.scheme());
  79. favicon_url.set_host(url.host());
  80. favicon_url.set_port(url.port_or_default());
  81. favicon_url.set_paths({ "favicon.ico" });
  82. ResourceLoader::the().load(
  83. favicon_url,
  84. [this, favicon_url](auto data, auto&, auto) {
  85. // Always fetch the current document
  86. auto* document = this->browsing_context().active_document();
  87. if (document && document->has_active_favicon())
  88. return;
  89. dbgln_if(SPAM_DEBUG, "Favicon downloaded, {} bytes from {}", data.size(), favicon_url);
  90. if (data.is_empty())
  91. return;
  92. RefPtr<Gfx::Bitmap> favicon_bitmap;
  93. auto decoded_image = Platform::ImageCodecPlugin::the().decode_image(data);
  94. if (!decoded_image.has_value() || decoded_image->frames.is_empty()) {
  95. dbgln("Could not decode favicon {}", favicon_url);
  96. } else {
  97. favicon_bitmap = decoded_image->frames[0].bitmap;
  98. dbgln_if(IMAGE_DECODER_DEBUG, "Decoded favicon, {}", favicon_bitmap->size());
  99. }
  100. load_favicon(favicon_bitmap);
  101. },
  102. [this](auto&, auto) {
  103. // Always fetch the current document
  104. auto* document = this->browsing_context().active_document();
  105. if (document && document->has_active_favicon())
  106. return;
  107. load_favicon();
  108. });
  109. } else {
  110. load_favicon();
  111. }
  112. return true;
  113. }
  114. bool FrameLoader::load(const AK::URL& url, Type type)
  115. {
  116. dbgln_if(SPAM_DEBUG, "FrameLoader::load: {}", url);
  117. if (!url.is_valid()) {
  118. load_error_page(url, "Invalid URL");
  119. return false;
  120. }
  121. auto request = LoadRequest::create_for_url_on_page(url, browsing_context().page());
  122. return load(request, type);
  123. }
  124. void FrameLoader::load_html(StringView html, const AK::URL& url)
  125. {
  126. if (auto* page = browsing_context().page())
  127. page->client().page_did_start_loading(url, false);
  128. auto& vm = Bindings::main_thread_vm();
  129. auto response = Fetch::Infrastructure::Response::create(vm);
  130. response->url_list().append(url);
  131. HTML::NavigationParams navigation_params {
  132. .id = {},
  133. .request = nullptr,
  134. .response = response,
  135. .origin = HTML::Origin {},
  136. .policy_container = HTML::PolicyContainer {},
  137. .final_sandboxing_flag_set = HTML::SandboxingFlagSet {},
  138. .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
  139. .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
  140. .reserved_environment = {},
  141. .browsing_context = browsing_context(),
  142. .navigable = nullptr,
  143. };
  144. auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", navigation_params).release_value_but_fixme_should_propagate_errors();
  145. // browsing_context().set_active_document(document);
  146. auto parser = HTML::HTMLParser::create(document, html, "utf-8");
  147. parser->run(url);
  148. if (auto* page = browsing_context().page())
  149. page->client().page_did_finish_loading(url);
  150. }
  151. static DeprecatedString s_resource_directory_url = "file:///res";
  152. DeprecatedString FrameLoader::resource_directory_url()
  153. {
  154. return s_resource_directory_url;
  155. }
  156. void FrameLoader::set_resource_directory_url(DeprecatedString resource_directory_url)
  157. {
  158. s_resource_directory_url = resource_directory_url;
  159. }
  160. static DeprecatedString s_error_page_url = "file:///res/html/error.html";
  161. DeprecatedString FrameLoader::error_page_url()
  162. {
  163. return s_error_page_url;
  164. }
  165. void FrameLoader::set_error_page_url(DeprecatedString error_page_url)
  166. {
  167. s_error_page_url = error_page_url;
  168. }
  169. static DeprecatedString s_directory_page_url = "file:///res/html/directory.html";
  170. DeprecatedString FrameLoader::directory_page_url()
  171. {
  172. return s_directory_page_url;
  173. }
  174. void FrameLoader::set_directory_page_url(DeprecatedString directory_page_url)
  175. {
  176. s_directory_page_url = directory_page_url;
  177. }
  178. // FIXME: Use an actual templating engine (our own one when it's built, preferably
  179. // with a way to check these usages at compile time)
  180. void FrameLoader::load_error_page(const AK::URL& failed_url, DeprecatedString const& error)
  181. {
  182. LoadRequest request = LoadRequest::create_for_url_on_page(s_error_page_url, browsing_context().page());
  183. ResourceLoader::the().load(
  184. request,
  185. [this, failed_url, error](auto data, auto&, auto) {
  186. VERIFY(!data.is_null());
  187. StringBuilder builder;
  188. SourceGenerator generator { builder };
  189. generator.set("resource_directory_url", resource_directory_url());
  190. generator.set("failed_url", escape_html_entities(failed_url.to_deprecated_string()));
  191. generator.set("error", escape_html_entities(error));
  192. generator.append(data);
  193. load_html(generator.as_string_view(), s_error_page_url);
  194. },
  195. [](auto& error, auto) {
  196. dbgln("Failed to load error page: {}", error);
  197. VERIFY_NOT_REACHED();
  198. });
  199. }
  200. void FrameLoader::load_favicon(RefPtr<Gfx::Bitmap> bitmap)
  201. {
  202. if (auto* page = browsing_context().page()) {
  203. if (bitmap)
  204. page->client().page_did_change_favicon(*bitmap);
  205. else if (s_default_favicon_bitmap)
  206. page->client().page_did_change_favicon(*s_default_favicon_bitmap);
  207. }
  208. }
  209. void FrameLoader::resource_did_load()
  210. {
  211. // This prevents us setting up the document of a removed browsing context container (BCC, e.g. <iframe>), which will cause a crash
  212. // if the document contains a script that inserts another BCC as this will use the stale browsing context it previously set up,
  213. // even if it's reinserted.
  214. // Example:
  215. // index.html:
  216. // ```
  217. // <body><script>
  218. // var i = document.createElement("iframe");
  219. // i.src = "b.html";
  220. // document.body.append(i);
  221. // i.remove();
  222. // </script>
  223. // ```
  224. // b.html:
  225. // ```
  226. // <body><script>
  227. // var i = document.createElement("iframe");
  228. // document.body.append(i);
  229. // </script>
  230. // ```
  231. // Required by Prebid.js, which does this by inserting an <iframe> into a <div> in the active document via innerHTML,
  232. // then transfers it to the <html> element:
  233. // https://github.com/prebid/Prebid.js/blob/7b7389c5abdd05626f71c3df606a93713d1b9f85/src/utils.js#L597
  234. // This is done in the spec by removing all tasks and aborting all fetches when a document is destroyed:
  235. // https://html.spec.whatwg.org/multipage/document-lifecycle.html#destroy-a-document
  236. if (browsing_context().has_been_discarded())
  237. return;
  238. auto url = resource()->url();
  239. // For 3xx (Redirection) responses, the Location value refers to the preferred target resource for automatically redirecting the request.
  240. auto status_code = resource()->status_code();
  241. if (status_code.has_value() && *status_code >= 300 && *status_code <= 399) {
  242. auto location = resource()->response_headers().get("Location");
  243. if (location.has_value()) {
  244. if (m_redirects_count > maximum_redirects_allowed) {
  245. m_redirects_count = 0;
  246. load_error_page(url, "Too many redirects");
  247. return;
  248. }
  249. m_redirects_count++;
  250. load(url.complete_url(location.value()), Type::Redirect);
  251. return;
  252. }
  253. }
  254. m_redirects_count = 0;
  255. if (resource()->has_encoding()) {
  256. dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding '{}'", resource()->mime_type(), resource()->encoding().value());
  257. } else {
  258. dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding unknown", resource()->mime_type());
  259. }
  260. auto final_sandboxing_flag_set = HTML::SandboxingFlagSet {};
  261. // (Part of https://html.spec.whatwg.org/#navigating-across-documents)
  262. // 3. Let responseOrigin be the result of determining the origin given browsingContext, resource's url, finalSandboxFlags, and incumbentNavigationOrigin.
  263. // FIXME: Pass incumbentNavigationOrigin
  264. auto response_origin = HTML::determine_the_origin(browsing_context(), url, final_sandboxing_flag_set, {});
  265. auto& vm = Bindings::main_thread_vm();
  266. auto response = Fetch::Infrastructure::Response::create(vm);
  267. response->url_list().append(url);
  268. HTML::NavigationParams navigation_params {
  269. .id = {},
  270. .request = nullptr,
  271. .response = response,
  272. .origin = move(response_origin),
  273. .policy_container = HTML::PolicyContainer {},
  274. .final_sandboxing_flag_set = final_sandboxing_flag_set,
  275. .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
  276. .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
  277. .reserved_environment = {},
  278. .browsing_context = browsing_context(),
  279. .navigable = nullptr,
  280. };
  281. auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", navigation_params).release_value_but_fixme_should_propagate_errors();
  282. document->set_url(url);
  283. if (resource()->encoding().has_value())
  284. document->set_encoding(MUST(String::from_deprecated_string(resource()->encoding().value())));
  285. else
  286. document->set_encoding({});
  287. document->set_content_type(MUST(String::from_deprecated_string(resource()->mime_type())));
  288. // browsing_context().set_active_document(document);
  289. if (auto* page = browsing_context().page())
  290. page->client().page_did_create_new_document(*document);
  291. if (!parse_document(*document, resource()->encoded_data())) {
  292. load_error_page(url, "Failed to parse content.");
  293. return;
  294. }
  295. if (url.fragment().has_value() && !url.fragment()->is_empty())
  296. browsing_context().scroll_to_anchor(url.fragment()->to_deprecated_string());
  297. else
  298. browsing_context().scroll_to({ 0, 0 });
  299. if (auto* page = browsing_context().page())
  300. page->client().page_did_finish_loading(url);
  301. }
  302. void FrameLoader::resource_did_fail()
  303. {
  304. // See comment in resource_did_load() about why this is done.
  305. if (browsing_context().has_been_discarded())
  306. return;
  307. load_error_page(resource()->url(), resource()->error());
  308. }
  309. }