ladybird/Userland/Libraries/LibWeb/Loader/ResourceLoader.cpp
Shannon Booth e800605ad3 AK+LibURL: Move AK::URL into a new URL library
This URL library ends up being a relatively fundamental base library of
the system, as LibCore depends on LibURL.

This change has two main benefits:
 * Moving AK back more towards being an agnostic library that can
   be used between the kernel and userspace. URL has never really fit
   that description - and is not used in the kernel.
 * URL _should_ depend on LibUnicode, as it needs punnycode support.
   However, it's not really possible to do this inside of AK as it can't
   depend on any external library. This change brings us a little closer
   to being able to do that, but unfortunately we aren't there quite
   yet, as the code generators depend on LibCore.
2024-03-18 14:06:28 -04:00

471 lines
18 KiB
C++

/*
* Copyright (c) 2018-2023, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2022, Dex♪ <dexes.ttp@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Debug.h>
#include <AK/JsonObject.h>
#include <LibCore/Directory.h>
#include <LibCore/ElapsedTimer.h>
#include <LibCore/MimeData.h>
#include <LibCore/Resource.h>
#include <LibWeb/Cookie/Cookie.h>
#include <LibWeb/Cookie/ParsedCookie.h>
#include <LibWeb/Loader/ContentFilter.h>
#include <LibWeb/Loader/GeneratedPagesLoader.h>
#include <LibWeb/Loader/LoadRequest.h>
#include <LibWeb/Loader/ProxyMappings.h>
#include <LibWeb/Loader/Resource.h>
#include <LibWeb/Loader/ResourceLoader.h>
#include <LibWeb/Platform/EventLoopPlugin.h>
#include <LibWeb/Platform/Timer.h>
#ifdef AK_OS_SERENITY
# include <serenity.h>
#endif
namespace Web {
ResourceLoaderConnectorRequest::ResourceLoaderConnectorRequest() = default;
ResourceLoaderConnectorRequest::~ResourceLoaderConnectorRequest() = default;
ResourceLoaderConnector::ResourceLoaderConnector() = default;
ResourceLoaderConnector::~ResourceLoaderConnector() = default;
static RefPtr<ResourceLoader> s_resource_loader;
void ResourceLoader::initialize(RefPtr<ResourceLoaderConnector> connector)
{
if (connector)
s_resource_loader = ResourceLoader::try_create(connector.release_nonnull()).release_value_but_fixme_should_propagate_errors();
}
ResourceLoader& ResourceLoader::the()
{
if (!s_resource_loader) {
dbgln("Web::ResourceLoader was not initialized");
VERIFY_NOT_REACHED();
}
return *s_resource_loader;
}
ErrorOr<NonnullRefPtr<ResourceLoader>> ResourceLoader::try_create(NonnullRefPtr<ResourceLoaderConnector> connector)
{
return adopt_nonnull_ref_or_enomem(new (nothrow) ResourceLoader(move(connector)));
}
ResourceLoader::ResourceLoader(NonnullRefPtr<ResourceLoaderConnector> connector)
: m_connector(move(connector))
, m_user_agent(MUST(String::from_utf8(default_user_agent)))
, m_platform(MUST(String::from_utf8(default_platform)))
{
}
void ResourceLoader::prefetch_dns(URL::URL const& url)
{
if (ContentFilter::the().is_filtered(url)) {
dbgln("ResourceLoader: Refusing to prefetch DNS for '{}': \033[31;1mURL was filtered\033[0m", url);
return;
}
m_connector->prefetch_dns(url);
}
void ResourceLoader::preconnect(URL::URL const& url)
{
if (url.scheme().is_one_of("file"sv, "data"sv))
return;
if (ContentFilter::the().is_filtered(url)) {
dbgln("ResourceLoader: Refusing to pre-connect to '{}': \033[31;1mURL was filtered\033[0m", url);
return;
}
m_connector->preconnect(url);
}
static HashMap<LoadRequest, NonnullRefPtr<Resource>> s_resource_cache;
RefPtr<Resource> ResourceLoader::load_resource(Resource::Type type, LoadRequest& request)
{
if (!request.is_valid())
return nullptr;
bool use_cache = request.url().scheme() != "file";
if (use_cache) {
auto it = s_resource_cache.find(request);
if (it != s_resource_cache.end()) {
if (it->value->type() != type) {
dbgln("FIXME: Not using cached resource for {} since there's a type mismatch.", request.url());
} else {
dbgln_if(CACHE_DEBUG, "Reusing cached resource for: {}", request.url());
return it->value;
}
}
}
auto resource = Resource::create({}, type, request);
if (use_cache)
s_resource_cache.set(request, resource);
load(
request,
[=](auto data, auto& headers, auto status_code) {
const_cast<Resource&>(*resource).did_load({}, data, headers, status_code);
},
[=](auto& error, auto status_code, auto, auto) {
const_cast<Resource&>(*resource).did_fail({}, error, status_code);
});
return resource;
}
static ByteString sanitized_url_for_logging(URL::URL const& url)
{
if (url.scheme() == "data"sv)
return "[data URL]"sv;
return url.to_byte_string();
}
static void emit_signpost(ByteString const& message, int id)
{
#ifdef AK_OS_SERENITY
auto string_id = perf_register_string(message.characters(), message.length());
perf_event(PERF_EVENT_SIGNPOST, string_id, id);
#else
(void)message;
(void)id;
#endif
}
static void store_response_cookies(Page& page, URL::URL const& url, ByteString const& cookies)
{
auto set_cookie_json_value = MUST(JsonValue::from_string(cookies));
VERIFY(set_cookie_json_value.type() == JsonValue::Type::Array);
for (auto const& set_cookie_entry : set_cookie_json_value.as_array().values()) {
VERIFY(set_cookie_entry.type() == JsonValue::Type::String);
auto cookie = Cookie::parse_cookie(set_cookie_entry.as_string());
if (!cookie.has_value())
continue;
page.client().page_did_set_cookie(url, cookie.value(), Cookie::Source::Http); // FIXME: Determine cookie source correctly
}
}
static size_t resource_id = 0;
static HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers_for_file(StringView path)
{
// For file:// and resource:// URLs, we have to guess the MIME type, since there's no HTTP header to tell us what
// it is. We insert a fake Content-Type header here, so that clients can use it to learn the MIME type.
auto mime_type = Core::guess_mime_type_based_on_filename(path);
HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers;
response_headers.set("Content-Type"sv, mime_type);
return response_headers;
}
void ResourceLoader::load(LoadRequest& request, SuccessCallback success_callback, ErrorCallback error_callback, Optional<u32> timeout, TimeoutCallback timeout_callback)
{
auto& url = request.url();
request.start_timer();
auto id = resource_id++;
auto url_for_logging = sanitized_url_for_logging(url);
emit_signpost(ByteString::formatted("Starting load: {}", url_for_logging), id);
dbgln_if(SPAM_DEBUG, "ResourceLoader: Starting load of: \"{}\"", url_for_logging);
auto const log_success = [url_for_logging, id](auto const& request) {
auto load_time_ms = request.load_time().to_milliseconds();
emit_signpost(ByteString::formatted("Finished load: {}", url_for_logging), id);
dbgln_if(SPAM_DEBUG, "ResourceLoader: Finished load of: \"{}\", Duration: {}ms", url_for_logging, load_time_ms);
};
auto const log_failure = [url_for_logging, id](auto const& request, auto const& error_message) {
auto load_time_ms = request.load_time().to_milliseconds();
emit_signpost(ByteString::formatted("Failed load: {}", url_for_logging), id);
dbgln("ResourceLoader: Failed load of: \"{}\", \033[31;1mError: {}\033[0m, Duration: {}ms", url_for_logging, error_message, load_time_ms);
};
auto respond_directory_page = [log_success, log_failure](LoadRequest const& request, URL::URL const& url, SuccessCallback const& success_callback, ErrorCallback const& error_callback) {
auto maybe_response = load_file_directory_page(url);
if (maybe_response.is_error()) {
log_failure(request, maybe_response.error());
if (error_callback)
error_callback(ByteString::formatted("{}", maybe_response.error()), 500u, {}, {});
return;
}
log_success(request);
HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers;
response_headers.set("Content-Type"sv, "text/html"sv);
success_callback(maybe_response.release_value().bytes(), response_headers, {});
};
if (is_port_blocked(url.port_or_default())) {
log_failure(request, ByteString::formatted("The port #{} is blocked", url.port_or_default()));
return;
}
if (ContentFilter::the().is_filtered(url)) {
auto filter_message = "URL was filtered"sv;
log_failure(request, filter_message);
error_callback(filter_message, {}, {}, {});
return;
}
if (url.scheme() == "about") {
dbgln_if(SPAM_DEBUG, "Loading about: URL {}", url);
log_success(request);
HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers;
response_headers.set("Content-Type", "text/html; charset=UTF-8");
// About version page
if (url.path_segment_at_index(0) == "version") {
success_callback(MUST(load_about_version_page()).bytes(), response_headers, {});
return;
}
// Other about static HTML pages
auto resource = Core::Resource::load_from_uri(MUST(String::formatted("resource://ladybird/{}.html", url.path_segment_at_index(0))));
if (!resource.is_error()) {
auto data = resource.value()->data();
success_callback(data, response_headers, {});
return;
}
Platform::EventLoopPlugin::the().deferred_invoke([success_callback = move(success_callback), response_headers = move(response_headers)] {
success_callback(ByteString::empty().to_byte_buffer(), response_headers, {});
});
return;
}
if (url.scheme() == "data") {
auto data_url_or_error = url.process_data_url();
if (data_url_or_error.is_error()) {
auto error_message = data_url_or_error.error().string_literal();
log_failure(request, error_message);
error_callback(error_message, {}, {}, {});
return;
}
auto data_url = data_url_or_error.release_value();
dbgln_if(SPAM_DEBUG, "ResourceLoader loading a data URL with mime-type: '{}', payload='{}'",
data_url.mime_type,
StringView(data_url.body.bytes()));
HashMap<ByteString, ByteString, CaseInsensitiveStringTraits> response_headers;
response_headers.set("Content-Type", data_url.mime_type.to_byte_string());
log_success(request);
Platform::EventLoopPlugin::the().deferred_invoke([data = move(data_url.body), response_headers = move(response_headers), success_callback = move(success_callback)] {
success_callback(data, response_headers, {});
});
return;
}
if (url.scheme() == "resource") {
auto resource = Core::Resource::load_from_uri(url.serialize());
if (resource.is_error()) {
log_failure(request, resource.error());
return;
}
// When resource URI is a directory use file directory loader to generate response
if (resource.value()->is_directory()) {
respond_directory_page(request, resource.value()->file_url(), success_callback, error_callback);
return;
}
auto data = resource.value()->data();
auto response_headers = response_headers_for_file(url.serialize_path());
log_success(request);
success_callback(data, response_headers, {});
return;
}
if (url.scheme() == "file") {
if (request.page())
m_page = request.page();
if (!m_page.has_value()) {
log_failure(request, "INTERNAL ERROR: No Page for request");
return;
}
FileRequest file_request(url.serialize_path(), [this, success_callback = move(success_callback), error_callback = move(error_callback), log_success, log_failure, request, respond_directory_page](ErrorOr<i32> file_or_error) {
--m_pending_loads;
if (on_load_counter_change)
on_load_counter_change();
if (file_or_error.is_error()) {
log_failure(request, file_or_error.error());
if (error_callback) {
auto status = file_or_error.error().code() == ENOENT ? 404u : 500u;
error_callback(ByteString::formatted("{}", file_or_error.error()), status, {}, {});
}
return;
}
auto const fd = file_or_error.value();
// When local file is a directory use file directory loader to generate response
auto maybe_is_valid_directory = Core::Directory::is_valid_directory(fd);
if (!maybe_is_valid_directory.is_error() && maybe_is_valid_directory.value()) {
respond_directory_page(request, request.url(), success_callback, error_callback);
return;
}
// Try to read file normally
auto maybe_file = Core::File::adopt_fd(fd, Core::File::OpenMode::Read);
if (maybe_file.is_error()) {
log_failure(request, maybe_file.error());
if (error_callback)
error_callback(ByteString::formatted("{}", maybe_file.error()), 500u, {}, {});
return;
}
auto file = maybe_file.release_value();
auto maybe_data = file->read_until_eof();
if (maybe_data.is_error()) {
log_failure(request, maybe_data.error());
if (error_callback)
error_callback(ByteString::formatted("{}", maybe_data.error()), 500u, {}, {});
return;
}
auto data = maybe_data.release_value();
auto response_headers = response_headers_for_file(request.url().serialize_path());
log_success(request);
success_callback(data, response_headers, {});
});
(*m_page)->client().request_file(move(file_request));
++m_pending_loads;
if (on_load_counter_change)
on_load_counter_change();
return;
}
if (url.scheme() == "http" || url.scheme() == "https" || url.scheme() == "gemini") {
auto proxy = ProxyMappings::the().proxy_for_url(url);
HashMap<ByteString, ByteString> headers;
headers.set("User-Agent", m_user_agent.to_byte_string());
headers.set("Accept-Encoding", "gzip, deflate, br");
for (auto& it : request.headers()) {
headers.set(it.key, it.value);
}
auto protocol_request = m_connector->start_request(request.method(), url, headers, request.body(), proxy);
if (!protocol_request) {
auto start_request_failure_msg = "Failed to initiate load"sv;
log_failure(request, start_request_failure_msg);
if (error_callback)
error_callback(start_request_failure_msg, {}, {}, {});
return;
}
if (timeout.has_value() && timeout.value() > 0) {
auto timer = Platform::Timer::create_single_shot(timeout.value(), nullptr);
timer->on_timeout = [timer, protocol_request, timeout_callback = move(timeout_callback)] {
protocol_request->stop();
if (timeout_callback)
timeout_callback();
};
timer->start();
}
m_active_requests.set(*protocol_request);
protocol_request->on_buffered_request_finish = [this, success_callback = move(success_callback), error_callback = move(error_callback), log_success, log_failure, request, &protocol_request = *protocol_request](bool success, auto, auto& response_headers, auto status_code, ReadonlyBytes payload) mutable {
--m_pending_loads;
if (on_load_counter_change)
on_load_counter_change();
if (request.page()) {
if (auto set_cookie = response_headers.get("Set-Cookie"); set_cookie.has_value())
store_response_cookies(*request.page(), request.url(), *set_cookie);
if (auto cache_control = response_headers.get("cache-control"); cache_control.has_value()) {
if (cache_control.value().contains("no-store"sv)) {
s_resource_cache.remove(request);
}
}
}
if (!success || (status_code.has_value() && *status_code >= 400 && *status_code <= 599 && (payload.is_empty() || !request.is_main_resource()))) {
StringBuilder error_builder;
if (status_code.has_value())
error_builder.appendff("Load failed: {}", *status_code);
else
error_builder.append("Load failed"sv);
log_failure(request, error_builder.string_view());
if (error_callback)
error_callback(error_builder.to_byte_string(), status_code, payload, response_headers);
return;
}
log_success(request);
success_callback(payload, response_headers, status_code);
Platform::EventLoopPlugin::the().deferred_invoke([this, &protocol_request] {
m_active_requests.remove(protocol_request);
});
};
protocol_request->set_should_buffer_all_input(true);
protocol_request->on_certificate_requested = []() -> ResourceLoaderConnectorRequest::CertificateAndKey {
return {};
};
++m_pending_loads;
if (on_load_counter_change)
on_load_counter_change();
return;
}
auto not_implemented_error = ByteString::formatted("Protocol not implemented: {}", url.scheme());
log_failure(request, not_implemented_error);
if (error_callback)
error_callback(not_implemented_error, {}, {}, {});
}
bool ResourceLoader::is_port_blocked(int port)
{
int ports[] { 1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42,
43, 53, 77, 79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113,
115, 117, 119, 123, 135, 139, 143, 179, 389, 465, 512, 513, 514,
515, 526, 530, 531, 532, 540, 556, 563, 587, 601, 636, 993, 995,
2049, 3659, 4045, 6000, 6379, 6665, 6666, 6667, 6668, 6669 };
for (auto blocked_port : ports)
if (port == blocked_port)
return true;
return false;
}
void ResourceLoader::clear_cache()
{
dbgln_if(CACHE_DEBUG, "Clearing {} items from ResourceLoader cache", s_resource_cache.size());
s_resource_cache.clear();
}
void ResourceLoader::evict_from_cache(LoadRequest const& request)
{
dbgln_if(CACHE_DEBUG, "Removing resource {} from cache", request.url());
s_resource_cache.remove(request);
}
}