ladybird/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp
MacDue 35612c6a7f AK+Everywhere: Change URL::path() to serialize_path()
This now defaults to serializing the path with percent decoded segments
(which is what all callers expect), but has an option not to. This fixes
`file://` URLs with spaces in their paths.

The name has been changed to serialize_path() path to make it more clear
that this method will generate a new string each call (except for the
cannot_be_a_base_url() case). A few callers have then been updated to
avoid repeatedly calling this function.
2023-04-15 06:37:04 +02:00

503 lines
20 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Debug.h>
#include <AK/JsonArray.h>
#include <AK/LexicalPath.h>
#include <AK/SourceGenerator.h>
#include <LibGemini/Document.h>
#include <LibGfx/ImageFormats/ImageDecoder.h>
#include <LibMarkdown/Document.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/Bindings/MainThreadVM.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/ElementFactory.h>
#include <LibWeb/DOM/Text.h>
#include <LibWeb/HTML/BrowsingContext.h>
#include <LibWeb/HTML/HTMLIFrameElement.h>
#include <LibWeb/HTML/NavigationParams.h>
#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
#include <LibWeb/HTML/Parser/HTMLParser.h>
#include <LibWeb/Loader/FrameLoader.h>
#include <LibWeb/Loader/ResourceLoader.h>
#include <LibWeb/Namespace.h>
#include <LibWeb/Page/Page.h>
#include <LibWeb/Platform/ImageCodecPlugin.h>
#include <LibWeb/XML/XMLDocumentBuilder.h>
namespace Web {
static DeprecatedString s_default_favicon_path = "/res/icons/16x16/app-browser.png";
static RefPtr<Gfx::Bitmap> s_default_favicon_bitmap;
void FrameLoader::set_default_favicon_path(DeprecatedString path)
{
s_default_favicon_path = move(path);
}
FrameLoader::FrameLoader(HTML::BrowsingContext& browsing_context)
: m_browsing_context(browsing_context)
{
if (!s_default_favicon_bitmap) {
s_default_favicon_bitmap = Gfx::Bitmap::load_from_file(s_default_favicon_path).release_value_but_fixme_should_propagate_errors();
VERIFY(s_default_favicon_bitmap);
}
}
FrameLoader::~FrameLoader() = default;
static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data)
{
auto markdown_document = Markdown::Document::parse(data);
if (!markdown_document)
return false;
auto extra_head_contents = R"~~~(
<style>
.zoomable {
cursor: zoom-in;
max-width: 100%;
}
.zoomable.zoomed-in {
cursor: zoom-out;
max-width: none;
}
</style>
<script>
function imageClickEventListener(event) {
let image = event.target;
if (image.classList.contains("zoomable")) {
image.classList.toggle("zoomed-in");
}
}
function processImages() {
let images = document.querySelectorAll("img");
let windowWidth = window.innerWidth;
images.forEach((image) => {
if (image.naturalWidth > windowWidth) {
image.classList.add("zoomable");
} else {
image.classList.remove("zoomable");
image.classList.remove("zoomed-in");
}
image.addEventListener("click", imageClickEventListener);
});
}
document.addEventListener("load", () => {
processImages();
});
window.addEventListener("resize", () => {
processImages();
});
</script>
)~~~"sv;
auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8");
parser->run(document.url());
return true;
}
static bool build_text_document(DOM::Document& document, ByteBuffer const& data)
{
auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(document.append_child(html_element));
auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(html_element->append_child(head_element));
auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(head_element->append_child(title_element));
auto title_text = document.create_text_node(document.url().basename());
MUST(title_element->append_child(title_text));
auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(html_element->append_child(body_element));
auto pre_element = DOM::create_element(document, HTML::TagNames::pre, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(body_element->append_child(pre_element));
MUST(pre_element->append_child(document.create_text_node(DeprecatedString::copy(data))));
return true;
}
static bool build_image_document(DOM::Document& document, ByteBuffer const& data)
{
auto image = Platform::ImageCodecPlugin::the().decode_image(data);
if (!image.has_value() || image->frames.is_empty())
return false;
auto const& frame = image->frames[0];
auto const& bitmap = frame.bitmap;
if (!bitmap)
return false;
auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(document.append_child(html_element));
auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(html_element->append_child(head_element));
auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(head_element->append_child(title_element));
auto basename = LexicalPath::basename(document.url().serialize_path());
auto title_text = document.heap().allocate<DOM::Text>(document.realm(), document, DeprecatedString::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height())).release_allocated_value_but_fixme_should_propagate_errors();
MUST(title_element->append_child(*title_text));
auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(html_element->append_child(body_element));
auto image_element = DOM::create_element(document, HTML::TagNames::img, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(image_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string()));
MUST(body_element->append_child(image_element));
return true;
}
static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data)
{
StringView gemini_data { data };
auto gemini_document = Gemini::Document::parse(gemini_data, document.url());
DeprecatedString html_data = gemini_document->render_to_html();
dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data);
dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data);
auto parser = HTML::HTMLParser::create(document, html_data, "utf-8");
parser->run(document.url());
return true;
}
static bool build_xml_document(DOM::Document& document, ByteBuffer const& data)
{
auto encoding = HTML::run_encoding_sniffing_algorithm(document, data);
auto decoder = TextCodec::decoder_for(encoding);
VERIFY(decoder.has_value());
auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors();
XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource });
XMLDocumentBuilder builder { document };
auto result = parser.parse_with_listener(builder);
return !result.is_error() && !builder.has_error();
}
static bool build_video_document(DOM::Document& document)
{
auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(document.append_child(html_element));
auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(html_element->append_child(head_element));
auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(html_element->append_child(body_element));
auto video_element = DOM::create_element(document, HTML::TagNames::video, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
MUST(video_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string()));
MUST(video_element->set_attribute(HTML::AttributeNames::autoplay, DeprecatedString::empty()));
MUST(video_element->set_attribute(HTML::AttributeNames::controls, DeprecatedString::empty()));
MUST(body_element->append_child(video_element));
return true;
}
bool FrameLoader::parse_document(DOM::Document& document, ByteBuffer const& data)
{
auto& mime_type = document.content_type();
if (mime_type == "text/html" || mime_type == "image/svg+xml") {
auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data);
parser->run(document.url());
return true;
}
if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
return build_xml_document(document, data);
if (mime_type.starts_with("image/"sv))
return build_image_document(document, data);
if (mime_type.starts_with("video/"sv))
return build_video_document(document);
if (mime_type == "text/plain" || mime_type == "application/json")
return build_text_document(document, data);
if (mime_type == "text/markdown")
return build_markdown_document(document, data);
if (mime_type == "text/gemini")
return build_gemini_document(document, data);
return false;
}
bool FrameLoader::load(LoadRequest& request, Type type)
{
if (!request.is_valid()) {
load_error_page(request.url(), "Invalid request");
return false;
}
if (!m_browsing_context->is_frame_nesting_allowed(request.url())) {
dbgln("No further recursion is allowed for the frame, abort load!");
return false;
}
request.set_main_resource(true);
auto& url = request.url();
if (type == Type::Navigation || type == Type::Reload || type == Type::Redirect) {
if (auto* page = browsing_context().page()) {
if (&page->top_level_browsing_context() == m_browsing_context)
page->client().page_did_start_loading(url, type == Type::Redirect);
}
}
// https://fetch.spec.whatwg.org/#concept-fetch
// Step 12: If requests header list does not contain `Accept`, then:
// 1. Let value be `*/*`. (NOTE: Not necessary as we're about to override it)
// 2. A user agent should set value to the first matching statement, if any, switching on requests destination:
// -> "document"
// -> "frame"
// -> "iframe"
// `text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8`
if (!request.headers().contains("Accept"))
request.set_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, request));
if (type == Type::IFrame)
return true;
auto* document = browsing_context().active_document();
if (document && document->has_active_favicon())
return true;
if (url.scheme() == "http" || url.scheme() == "https") {
AK::URL favicon_url;
favicon_url.set_scheme(url.scheme());
favicon_url.set_host(url.host());
favicon_url.set_port(url.port_or_default());
favicon_url.set_paths({ "favicon.ico" });
ResourceLoader::the().load(
favicon_url,
[this, favicon_url](auto data, auto&, auto) {
// Always fetch the current document
auto* document = this->browsing_context().active_document();
if (document && document->has_active_favicon())
return;
dbgln_if(SPAM_DEBUG, "Favicon downloaded, {} bytes from {}", data.size(), favicon_url);
if (data.is_empty())
return;
RefPtr<Gfx::Bitmap> favicon_bitmap;
auto decoded_image = Platform::ImageCodecPlugin::the().decode_image(data);
if (!decoded_image.has_value() || decoded_image->frames.is_empty()) {
dbgln("Could not decode favicon {}", favicon_url);
} else {
favicon_bitmap = decoded_image->frames[0].bitmap;
dbgln_if(IMAGE_DECODER_DEBUG, "Decoded favicon, {}", favicon_bitmap->size());
}
load_favicon(favicon_bitmap);
},
[this](auto&, auto) {
// Always fetch the current document
auto* document = this->browsing_context().active_document();
if (document && document->has_active_favicon())
return;
load_favicon();
});
} else {
load_favicon();
}
return true;
}
bool FrameLoader::load(const AK::URL& url, Type type)
{
dbgln_if(SPAM_DEBUG, "FrameLoader::load: {}", url);
if (!url.is_valid()) {
load_error_page(url, "Invalid URL");
return false;
}
auto request = LoadRequest::create_for_url_on_page(url, browsing_context().page());
return load(request, type);
}
void FrameLoader::load_html(StringView html, const AK::URL& url)
{
auto& vm = Bindings::main_thread_vm();
auto response = Fetch::Infrastructure::Response::create(vm);
response->url_list().append(url);
HTML::NavigationParams navigation_params {
.id = {},
.request = nullptr,
.response = response,
.origin = HTML::Origin {},
.policy_container = HTML::PolicyContainer {},
.final_sandboxing_flag_set = HTML::SandboxingFlagSet {},
.cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
.coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
.reserved_environment = {},
.browsing_context = browsing_context(),
};
auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", move(navigation_params)).release_value_but_fixme_should_propagate_errors();
browsing_context().set_active_document(document);
auto parser = HTML::HTMLParser::create(document, html, "utf-8");
parser->run(url);
}
static DeprecatedString s_error_page_url = "file:///res/html/error.html";
void FrameLoader::set_error_page_url(DeprecatedString error_page_url)
{
s_error_page_url = error_page_url;
}
// FIXME: Use an actual templating engine (our own one when it's built, preferably
// with a way to check these usages at compile time)
void FrameLoader::load_error_page(const AK::URL& failed_url, DeprecatedString const& error)
{
LoadRequest request = LoadRequest::create_for_url_on_page(s_error_page_url, browsing_context().page());
ResourceLoader::the().load(
request,
[this, failed_url, error](auto data, auto&, auto) {
VERIFY(!data.is_null());
StringBuilder builder;
SourceGenerator generator { builder };
generator.set("failed_url", escape_html_entities(failed_url.to_deprecated_string()));
generator.set("error", escape_html_entities(error));
generator.append(data);
load_html(generator.as_string_view(), s_error_page_url);
},
[](auto& error, auto) {
dbgln("Failed to load error page: {}", error);
VERIFY_NOT_REACHED();
});
}
void FrameLoader::load_favicon(RefPtr<Gfx::Bitmap> bitmap)
{
if (auto* page = browsing_context().page()) {
if (bitmap)
page->client().page_did_change_favicon(*bitmap);
else if (s_default_favicon_bitmap)
page->client().page_did_change_favicon(*s_default_favicon_bitmap);
}
}
void FrameLoader::resource_did_load()
{
// This prevents us setting up the document of a removed browsing context container (BCC, e.g. <iframe>), which will cause a crash
// if the document contains a script that inserts another BCC as this will use the stale browsing context it previously set up,
// even if it's reinserted.
// Example:
// index.html:
// ```
// <body><script>
// var i = document.createElement("iframe");
// i.src = "b.html";
// document.body.append(i);
// i.remove();
// </script>
// ```
// b.html:
// ```
// <body><script>
// var i = document.createElement("iframe");
// document.body.append(i);
// </script>
// ```
// Required by Prebid.js, which does this by inserting an <iframe> into a <div> in the active document via innerHTML,
// then transfers it to the <html> element:
// https://github.com/prebid/Prebid.js/blob/7b7389c5abdd05626f71c3df606a93713d1b9f85/src/utils.js#L597
// This is done in the spec by removing all tasks and aborting all fetches when a document is destroyed:
// https://html.spec.whatwg.org/multipage/document-lifecycle.html#destroy-a-document
if (browsing_context().has_been_discarded())
return;
auto url = resource()->url();
// For 3xx (Redirection) responses, the Location value refers to the preferred target resource for automatically redirecting the request.
auto status_code = resource()->status_code();
if (status_code.has_value() && *status_code >= 300 && *status_code <= 399) {
auto location = resource()->response_headers().get("Location");
if (location.has_value()) {
if (m_redirects_count > maximum_redirects_allowed) {
m_redirects_count = 0;
load_error_page(url, "Too many redirects");
return;
}
m_redirects_count++;
load(url.complete_url(location.value()), Type::Redirect);
return;
}
}
m_redirects_count = 0;
if (resource()->has_encoding()) {
dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding '{}'", resource()->mime_type(), resource()->encoding().value());
} else {
dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding unknown", resource()->mime_type());
}
auto final_sandboxing_flag_set = HTML::SandboxingFlagSet {};
// (Part of https://html.spec.whatwg.org/#navigating-across-documents)
// 3. Let responseOrigin be the result of determining the origin given browsingContext, resource's url, finalSandboxFlags, and incumbentNavigationOrigin.
// FIXME: Pass incumbentNavigationOrigin
auto response_origin = HTML::determine_the_origin(browsing_context(), url, final_sandboxing_flag_set, {});
auto& vm = Bindings::main_thread_vm();
auto response = Fetch::Infrastructure::Response::create(vm);
response->url_list().append(url);
HTML::NavigationParams navigation_params {
.id = {},
.request = nullptr,
.response = response,
.origin = move(response_origin),
.policy_container = HTML::PolicyContainer {},
.final_sandboxing_flag_set = final_sandboxing_flag_set,
.cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
.coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
.reserved_environment = {},
.browsing_context = browsing_context(),
};
auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", move(navigation_params)).release_value_but_fixme_should_propagate_errors();
document->set_url(url);
document->set_encoding(resource()->encoding());
document->set_content_type(resource()->mime_type());
browsing_context().set_active_document(document);
if (auto* page = browsing_context().page())
page->client().page_did_create_main_document();
if (!parse_document(*document, resource()->encoded_data())) {
load_error_page(url, "Failed to parse content.");
return;
}
if (!url.fragment().is_empty())
browsing_context().scroll_to_anchor(url.fragment());
else
browsing_context().scroll_to({ 0, 0 });
if (auto* page = browsing_context().page())
page->client().page_did_finish_loading(url);
}
void FrameLoader::resource_did_fail()
{
// See comment in resource_did_load() about why this is done.
if (browsing_context().has_been_discarded())
return;
load_error_page(resource()->url(), resource()->error());
}
}