FrameLoader.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /*
  2. * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <AK/JsonArray.h>
  8. #include <AK/LexicalPath.h>
  9. #include <AK/SourceGenerator.h>
  10. #include <LibGemini/Document.h>
  11. #include <LibGfx/ImageDecoder.h>
  12. #include <LibMarkdown/Document.h>
  13. #include <LibWeb/Bindings/MainThreadVM.h>
  14. #include <LibWeb/Cookie/ParsedCookie.h>
  15. #include <LibWeb/DOM/Document.h>
  16. #include <LibWeb/DOM/ElementFactory.h>
  17. #include <LibWeb/DOM/Text.h>
  18. #include <LibWeb/HTML/BrowsingContext.h>
  19. #include <LibWeb/HTML/HTMLIFrameElement.h>
  20. #include <LibWeb/HTML/NavigationParams.h>
  21. #include <LibWeb/HTML/Parser/HTMLParser.h>
  22. #include <LibWeb/Loader/FrameLoader.h>
  23. #include <LibWeb/Loader/ResourceLoader.h>
  24. #include <LibWeb/Page/Page.h>
  25. #include <LibWeb/Platform/ImageCodecPlugin.h>
  26. #include <LibWeb/XML/XMLDocumentBuilder.h>
  27. namespace Web {
  28. static String s_default_favicon_path = "/res/icons/16x16/app-browser.png";
  29. static RefPtr<Gfx::Bitmap> s_default_favicon_bitmap;
  30. void FrameLoader::set_default_favicon_path(String path)
  31. {
  32. s_default_favicon_path = move(path);
  33. }
  34. FrameLoader::FrameLoader(HTML::BrowsingContext& browsing_context)
  35. : m_browsing_context(browsing_context)
  36. {
  37. if (!s_default_favicon_bitmap) {
  38. s_default_favicon_bitmap = Gfx::Bitmap::try_load_from_file(s_default_favicon_path).release_value_but_fixme_should_propagate_errors();
  39. VERIFY(s_default_favicon_bitmap);
  40. }
  41. }
  42. FrameLoader::~FrameLoader() = default;
  43. static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data)
  44. {
  45. auto markdown_document = Markdown::Document::parse(data);
  46. if (!markdown_document)
  47. return false;
  48. auto extra_head_contents = R"~~~(
  49. <style>
  50. .zoomable {
  51. cursor: zoom-in;
  52. max-width: 100%;
  53. }
  54. .zoomable.zoomed-in {
  55. cursor: zoom-out;
  56. max-width: none;
  57. }
  58. </style>
  59. <script>
  60. function imageClickEventListener(event) {
  61. let image = event.target;
  62. if (image.classList.contains("zoomable")) {
  63. image.classList.toggle("zoomed-in");
  64. }
  65. }
  66. function processImages() {
  67. let images = document.querySelectorAll("img");
  68. let windowWidth = window.innerWidth;
  69. images.forEach((image) => {
  70. if (image.naturalWidth > windowWidth) {
  71. image.classList.add("zoomable");
  72. } else {
  73. image.classList.remove("zoomable");
  74. image.classList.remove("zoomed-in");
  75. }
  76. image.addEventListener("click", imageClickEventListener);
  77. });
  78. }
  79. document.addEventListener("load", () => {
  80. processImages();
  81. });
  82. window.addEventListener("resize", () => {
  83. processImages();
  84. });
  85. </script>
  86. )~~~"sv;
  87. auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8");
  88. parser->run(document.url());
  89. return true;
  90. }
  91. static bool build_text_document(DOM::Document& document, ByteBuffer const& data)
  92. {
  93. auto html_element = document.create_element("html").release_value();
  94. MUST(document.append_child(html_element));
  95. auto head_element = document.create_element("head").release_value();
  96. MUST(html_element->append_child(head_element));
  97. auto title_element = document.create_element("title").release_value();
  98. MUST(head_element->append_child(title_element));
  99. auto title_text = document.create_text_node(document.url().basename());
  100. MUST(title_element->append_child(title_text));
  101. auto body_element = document.create_element("body").release_value();
  102. MUST(html_element->append_child(body_element));
  103. auto pre_element = document.create_element("pre").release_value();
  104. MUST(body_element->append_child(pre_element));
  105. MUST(pre_element->append_child(document.create_text_node(String::copy(data))));
  106. return true;
  107. }
  108. static bool build_image_document(DOM::Document& document, ByteBuffer const& data)
  109. {
  110. auto image = Platform::ImageCodecPlugin::the().decode_image(data);
  111. if (!image.has_value() || image->frames.is_empty())
  112. return false;
  113. auto const& frame = image->frames[0];
  114. auto const& bitmap = frame.bitmap;
  115. if (!bitmap)
  116. return false;
  117. auto html_element = document.create_element("html").release_value();
  118. MUST(document.append_child(html_element));
  119. auto head_element = document.create_element("head").release_value();
  120. MUST(html_element->append_child(head_element));
  121. auto title_element = document.create_element("title").release_value();
  122. MUST(head_element->append_child(title_element));
  123. auto basename = LexicalPath::basename(document.url().path());
  124. auto title_text = document.heap().allocate<DOM::Text>(document.realm(), document, String::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height()));
  125. MUST(title_element->append_child(*title_text));
  126. auto body_element = document.create_element("body").release_value();
  127. MUST(html_element->append_child(body_element));
  128. auto image_element = document.create_element("img").release_value();
  129. MUST(image_element->set_attribute(HTML::AttributeNames::src, document.url().to_string()));
  130. MUST(body_element->append_child(image_element));
  131. return true;
  132. }
  133. static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data)
  134. {
  135. StringView gemini_data { data };
  136. auto gemini_document = Gemini::Document::parse(gemini_data, document.url());
  137. String html_data = gemini_document->render_to_html();
  138. dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data);
  139. dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data);
  140. auto parser = HTML::HTMLParser::create(document, html_data, "utf-8");
  141. parser->run(document.url());
  142. return true;
  143. }
  144. static bool build_xml_document(DOM::Document& document, ByteBuffer const& data)
  145. {
  146. XML::Parser parser(data, { .resolve_external_resource = resolve_xml_resource });
  147. XMLDocumentBuilder builder { document };
  148. auto result = parser.parse_with_listener(builder);
  149. return !result.is_error() && !builder.has_error();
  150. }
  151. bool FrameLoader::parse_document(DOM::Document& document, ByteBuffer const& data)
  152. {
  153. auto& mime_type = document.content_type();
  154. if (mime_type == "text/html" || mime_type == "image/svg+xml") {
  155. auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data);
  156. parser->run(document.url());
  157. return true;
  158. }
  159. if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
  160. return build_xml_document(document, data);
  161. if (mime_type.starts_with("image/"sv))
  162. return build_image_document(document, data);
  163. if (mime_type == "text/plain" || mime_type == "application/json")
  164. return build_text_document(document, data);
  165. if (mime_type == "text/markdown")
  166. return build_markdown_document(document, data);
  167. if (mime_type == "text/gemini")
  168. return build_gemini_document(document, data);
  169. return false;
  170. }
  171. bool FrameLoader::load(LoadRequest& request, Type type)
  172. {
  173. if (!request.is_valid()) {
  174. load_error_page(request.url(), "Invalid request");
  175. return false;
  176. }
  177. if (!m_browsing_context.is_frame_nesting_allowed(request.url())) {
  178. dbgln("No further recursion is allowed for the frame, abort load!");
  179. return false;
  180. }
  181. auto& url = request.url();
  182. if (type == Type::Navigation || type == Type::Reload) {
  183. if (auto* page = browsing_context().page()) {
  184. if (&page->top_level_browsing_context() == &m_browsing_context)
  185. page->client().page_did_start_loading(url);
  186. }
  187. }
  188. // https://fetch.spec.whatwg.org/#concept-fetch
  189. // Step 12: If request’s header list does not contain `Accept`, then:
  190. // 1. Let value be `*/*`. (NOTE: Not necessary as we're about to override it)
  191. // 2. A user agent should set value to the first matching statement, if any, switching on request’s destination:
  192. // -> "document"
  193. // -> "frame"
  194. // -> "iframe"
  195. // `text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8`
  196. if (!request.headers().contains("Accept"))
  197. request.set_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
  198. set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, request));
  199. if (type == Type::IFrame)
  200. return true;
  201. auto* document = browsing_context().active_document();
  202. if (document && document->has_active_favicon())
  203. return true;
  204. if (url.scheme() == "http" || url.scheme() == "https") {
  205. AK::URL favicon_url;
  206. favicon_url.set_scheme(url.scheme());
  207. favicon_url.set_host(url.host());
  208. favicon_url.set_port(url.port_or_default());
  209. favicon_url.set_paths({ "favicon.ico" });
  210. ResourceLoader::the().load(
  211. favicon_url,
  212. [this, favicon_url](auto data, auto&, auto) {
  213. // Always fetch the current document
  214. auto* document = this->browsing_context().active_document();
  215. if (document && document->has_active_favicon())
  216. return;
  217. dbgln_if(SPAM_DEBUG, "Favicon downloaded, {} bytes from {}", data.size(), favicon_url);
  218. if (data.is_empty())
  219. return;
  220. RefPtr<Gfx::Bitmap> favicon_bitmap;
  221. auto decoded_image = Platform::ImageCodecPlugin::the().decode_image(data);
  222. if (!decoded_image.has_value() || decoded_image->frames.is_empty()) {
  223. dbgln("Could not decode favicon {}", favicon_url);
  224. } else {
  225. favicon_bitmap = decoded_image->frames[0].bitmap;
  226. dbgln_if(IMAGE_DECODER_DEBUG, "Decoded favicon, {}", favicon_bitmap->size());
  227. }
  228. load_favicon(favicon_bitmap);
  229. },
  230. [this](auto&, auto) {
  231. // Always fetch the current document
  232. auto* document = this->browsing_context().active_document();
  233. if (document && document->has_active_favicon())
  234. return;
  235. load_favicon();
  236. });
  237. } else {
  238. load_favicon();
  239. }
  240. return true;
  241. }
  242. bool FrameLoader::load(const AK::URL& url, Type type)
  243. {
  244. dbgln_if(SPAM_DEBUG, "FrameLoader::load: {}", url);
  245. if (!url.is_valid()) {
  246. load_error_page(url, "Invalid URL");
  247. return false;
  248. }
  249. auto request = LoadRequest::create_for_url_on_page(url, browsing_context().page());
  250. return load(request, type);
  251. }
  252. void FrameLoader::load_html(StringView html, const AK::URL& url)
  253. {
  254. auto& vm = Bindings::main_thread_vm();
  255. auto response = Fetch::Infrastructure::Response::create(vm);
  256. response->url_list().append(url);
  257. HTML::NavigationParams navigation_params {
  258. .id = {},
  259. .request = nullptr,
  260. .response = response,
  261. .origin = HTML::Origin {},
  262. .policy_container = HTML::PolicyContainer {},
  263. .final_sandboxing_flag_set = HTML::SandboxingFlagSet {},
  264. .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
  265. .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
  266. .reserved_environment = {},
  267. .browsing_context = browsing_context(),
  268. };
  269. auto document = DOM::Document::create_and_initialize(
  270. DOM::Document::Type::HTML,
  271. "text/html",
  272. move(navigation_params));
  273. browsing_context().set_active_document(document);
  274. auto parser = HTML::HTMLParser::create(document, html, "utf-8");
  275. parser->run(url);
  276. }
  277. static String s_error_page_url = "file:///res/html/error.html";
  278. void FrameLoader::set_error_page_url(String error_page_url)
  279. {
  280. s_error_page_url = error_page_url;
  281. }
  282. // FIXME: Use an actual templating engine (our own one when it's built, preferably
  283. // with a way to check these usages at compile time)
  284. void FrameLoader::load_error_page(const AK::URL& failed_url, String const& error)
  285. {
  286. ResourceLoader::the().load(
  287. s_error_page_url,
  288. [this, failed_url, error](auto data, auto&, auto) {
  289. VERIFY(!data.is_null());
  290. StringBuilder builder;
  291. SourceGenerator generator { builder };
  292. generator.set("failed_url", escape_html_entities(failed_url.to_string()));
  293. generator.set("error", escape_html_entities(error));
  294. generator.append(data);
  295. load_html(generator.as_string_view(), failed_url);
  296. },
  297. [](auto& error, auto) {
  298. dbgln("Failed to load error page: {}", error);
  299. VERIFY_NOT_REACHED();
  300. });
  301. }
  302. void FrameLoader::load_favicon(RefPtr<Gfx::Bitmap> bitmap)
  303. {
  304. if (auto* page = browsing_context().page()) {
  305. if (bitmap)
  306. page->client().page_did_change_favicon(*bitmap);
  307. else if (s_default_favicon_bitmap)
  308. page->client().page_did_change_favicon(*s_default_favicon_bitmap);
  309. }
  310. }
  311. void FrameLoader::store_response_cookies(AK::URL const& url, String const& cookies)
  312. {
  313. auto* page = browsing_context().page();
  314. if (!page)
  315. return;
  316. auto set_cookie_json_value = MUST(JsonValue::from_string(cookies));
  317. VERIFY(set_cookie_json_value.type() == JsonValue::Type::Array);
  318. for (auto const& set_cookie_entry : set_cookie_json_value.as_array().values()) {
  319. VERIFY(set_cookie_entry.type() == JsonValue::Type::String);
  320. auto cookie = Cookie::parse_cookie(set_cookie_entry.as_string());
  321. if (!cookie.has_value())
  322. continue;
  323. page->client().page_did_set_cookie(url, cookie.value(), Cookie::Source::Http); // FIXME: Determine cookie source correctly
  324. }
  325. }
  326. void FrameLoader::resource_did_load()
  327. {
  328. auto url = resource()->url();
  329. if (auto set_cookie = resource()->response_headers().get("Set-Cookie"); set_cookie.has_value())
  330. store_response_cookies(url, *set_cookie);
  331. // For 3xx (Redirection) responses, the Location value refers to the preferred target resource for automatically redirecting the request.
  332. auto status_code = resource()->status_code();
  333. if (status_code.has_value() && *status_code >= 300 && *status_code <= 399) {
  334. auto location = resource()->response_headers().get("Location");
  335. if (location.has_value()) {
  336. if (m_redirects_count > maximum_redirects_allowed) {
  337. m_redirects_count = 0;
  338. load_error_page(url, "Too many redirects");
  339. return;
  340. }
  341. m_redirects_count++;
  342. load(url.complete_url(location.value()), FrameLoader::Type::Navigation);
  343. return;
  344. }
  345. }
  346. m_redirects_count = 0;
  347. if (resource()->has_encoding()) {
  348. dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding '{}'", resource()->mime_type(), resource()->encoding().value());
  349. } else {
  350. dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding unknown", resource()->mime_type());
  351. }
  352. auto final_sandboxing_flag_set = HTML::SandboxingFlagSet {};
  353. // (Part of https://html.spec.whatwg.org/#navigating-across-documents)
  354. // 3. Let responseOrigin be the result of determining the origin given browsingContext, resource's url, finalSandboxFlags, and incumbentNavigationOrigin.
  355. // FIXME: Pass incumbentNavigationOrigin
  356. auto response_origin = HTML::determine_the_origin(browsing_context(), url, final_sandboxing_flag_set, {});
  357. auto& vm = Bindings::main_thread_vm();
  358. auto response = Fetch::Infrastructure::Response::create(vm);
  359. response->url_list().append(url);
  360. HTML::NavigationParams navigation_params {
  361. .id = {},
  362. .request = nullptr,
  363. .response = response,
  364. .origin = move(response_origin),
  365. .policy_container = HTML::PolicyContainer {},
  366. .final_sandboxing_flag_set = final_sandboxing_flag_set,
  367. .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
  368. .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
  369. .reserved_environment = {},
  370. .browsing_context = browsing_context(),
  371. };
  372. auto document = DOM::Document::create_and_initialize(
  373. DOM::Document::Type::HTML,
  374. "text/html",
  375. move(navigation_params));
  376. document->set_url(url);
  377. document->set_encoding(resource()->encoding());
  378. document->set_content_type(resource()->mime_type());
  379. browsing_context().set_active_document(document);
  380. if (auto* page = browsing_context().page())
  381. page->client().page_did_create_main_document();
  382. if (!parse_document(*document, resource()->encoded_data())) {
  383. load_error_page(url, "Failed to parse content.");
  384. return;
  385. }
  386. if (!url.fragment().is_empty())
  387. browsing_context().scroll_to_anchor(url.fragment());
  388. else
  389. browsing_context().scroll_to({ 0, 0 });
  390. if (auto* page = browsing_context().page())
  391. page->client().page_did_finish_loading(url);
  392. }
  393. void FrameLoader::resource_did_fail()
  394. {
  395. load_error_page(resource()->url(), resource()->error());
  396. }
  397. }