ladybird/Userland/Libraries/LibWeb/Fetch/Infrastructure/URL.cpp
Timothy Flynn bfc9dc447f AK+LibWeb: Replace our home-grown base64 encoder/decoders with simdutf
We currently have 2 base64 coders: one in AK, another in LibWeb for a
"forgiving" implementation. ECMA-262 has an upcoming proposal which will
require a third implementation.

Instead, let's use the base64 implementation that is used by Node.js and
recommended by the upcoming proposal. It handles forgiving decoding as
well.

Our users of AK's implementation should be fine with the forgiving
implementation. The AK impl originally had naive forgiving behavior, but
that was removed solely for performance reasons.

Using http://mattmahoney.net/dc/enwik8.zip (100MB unzipped) as a test,
performance of our old home-grown implementations vs. the simdutf
implementation (on Linux x64):

                Encode    Decode
AK base64       0.226s    0.169s
LibWeb base64   N/A       1.244s
simdutf         0.161s    0.047s
2024-07-16 10:27:39 +02:00

112 lines
4.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
* Copyright (c) 2022, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2023, Karol Kosek <krkk@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Base64.h>
#include <LibURL/URL.h>
#include <LibWeb/Fetch/Infrastructure/URL.h>
#include <LibWeb/MimeSniff/MimeType.h>
namespace Web::Fetch::Infrastructure {
// https://fetch.spec.whatwg.org/#is-local
bool is_local_url(URL::URL const& url)
{
// A URL is local if its scheme is a local scheme.
return any_of(LOCAL_SCHEMES, [&](auto scheme) { return url.scheme() == scheme; });
}
// https://fetch.spec.whatwg.org/#fetch-scheme
bool is_fetch_scheme(StringView scheme)
{
// A fetch scheme is "about", "blob", "data", "file", or an HTTP(S) scheme.
return any_of(FETCH_SCHEMES, [&](auto fetch_scheme) { return scheme == fetch_scheme; });
}
// https://fetch.spec.whatwg.org/#http-scheme
bool is_http_or_https_scheme(StringView scheme)
{
// An HTTP(S) scheme is "http" or "https".
return any_of(HTTP_SCHEMES, [&](auto http_scheme) { return scheme == http_scheme; });
}
// https://fetch.spec.whatwg.org/#data-url-processor
ErrorOr<DataURL> process_data_url(URL::URL const& data_url)
{
// 1. Assert: dataURLs scheme is "data".
VERIFY(data_url.scheme() == "data");
// 2. Let input be the result of running the URL serializer on dataURL with exclude fragment set to true.
auto input_serialized = data_url.serialize(URL::ExcludeFragment::Yes);
StringView input = input_serialized;
// 3. Remove the leading "data:" from input.
input = input.substring_view("data:"sv.length());
// 4. Let position point at the start of input.
// 5. Let mimeType be the result of collecting a sequence of code points that are not equal to U+002C (,), given position.
auto position = input.find(',');
auto mime_type = input.substring_view(0, position.value_or(input.length()));
// 6. Strip leading and trailing ASCII whitespace from mimeType.
mime_type = mime_type.trim_whitespace(TrimMode::Both);
// 7. If position is past the end of input, then return failure.
if (!position.has_value())
return Error::from_string_literal("Missing a comma character");
// 8. Advance position by 1.
position = position.value() + 1;
// 9. Let encodedBody be the remainder of input.
auto encoded_body = input.substring_view(position.value());
// 10. Let body be the percent-decoding of encodedBody.
auto body = URL::percent_decode(encoded_body).to_byte_buffer();
// 11. If mimeType ends with U+003B (;), followed by zero or more U+0020 SPACE, followed by an ASCII case-insensitive match for "base64", then:
if (mime_type.ends_with("base64"sv, CaseSensitivity::CaseInsensitive)) {
auto trimmed_substring_view = mime_type.substring_view(0, mime_type.length() - 6);
trimmed_substring_view = trimmed_substring_view.trim(" "sv, TrimMode::Right);
if (trimmed_substring_view.ends_with(';')) {
// 1. Let stringBody be the isomorphic decode of body.
auto string_body = StringView(body);
// 2. Set body to the forgiving-base64 decode of stringBody.
// 3. If body is failure, then return failure.
body = TRY(decode_base64(string_body));
// 4. Remove the last 6 code points from mimeType.
// 5. Remove trailing U+0020 SPACE code points from mimeType, if any.
// 6. Remove the last U+003B (;) from mimeType.
mime_type = trimmed_substring_view.substring_view(0, trimmed_substring_view.length() - 1);
}
}
// 12. If mimeType starts with ";", then prepend "text/plain" to mimeType.
StringBuilder builder;
if (mime_type.starts_with(';')) {
builder.append("text/plain"sv);
builder.append(mime_type);
mime_type = builder.string_view();
}
// 13. Let mimeTypeRecord be the result of parsing mimeType.
auto mime_type_record = TRY(MimeSniff::MimeType::parse(mime_type));
// 14. If mimeTypeRecord is failure, then set mimeTypeRecord to text/plain;charset=US-ASCII.
if (!mime_type_record.has_value()) {
mime_type_record = TRY(MimeSniff::MimeType::create("text"_string, "plain"_string));
TRY(mime_type_record->set_parameter("charset"_string, "US-ASCII"_string));
}
// 15. Return a new data: URL struct whose MIME type is mimeTypeRecord and body is body.
return DataURL { mime_type_record.release_value(), body };
}
}