LibURL: Make URL a copy-on-write type

This patch moves the data members of URL to an internal URL::Data struct
that is also reference-counted. URL then uses a CopyOnWrite<T> template
to give itself copy-on-write behavior.

This means that URL itself is now 8 bytes per instance, and copying is
cheap as long as you don't mutate.

This shrinks many data structures over in LibWeb land. As an example,
CSS::ComputedValues goes from 3024 bytes to 2288 bytes per instance.
This commit is contained in:
Andreas Kling 2024-08-02 15:23:49 +02:00 committed by Andreas Kling
parent c282138fd0
commit 936b76f36e
Notes: github-actions[bot] 2024-08-02 18:38:32 +00:00
3 changed files with 236 additions and 183 deletions

View file

@ -730,7 +730,7 @@ void Parser::shorten_urls_path(URL& url)
VERIFY(!url.cannot_be_a_base_url());
// 2. Let path be urls path.
auto& path = url.m_paths;
auto& path = url.m_data->paths;
// 3. If urls scheme is "file", paths size is 1, and path[0] is a normalized Windows drive letter, then return.
if (url.scheme() == "file" && path.size() == 1 && is_normalized_windows_drive_letter(path[0]))
@ -929,13 +929,13 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
}
// 2. Set urls scheme to buffer.
url->m_scheme = buffer.to_string_without_validation();
url->m_data->scheme = buffer.to_string_without_validation();
// 3. If state override is given, then:
if (state_override.has_value()) {
// 1. If urls port is urls schemes default port, then set urls port to null.
if (url->port() == default_port_for_scheme(url->scheme()))
url->m_port = {};
url->m_data->port = {};
// 2. Return.
return *url;
@ -954,7 +954,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
state = State::File;
}
// 6. Otherwise, if url is special, base is non-null, and bases scheme is urls scheme:
else if (url->is_special() && base_url.has_value() && base_url->scheme() == url->m_scheme) {
else if (url->is_special() && base_url.has_value() && base_url->scheme() == url->m_data->scheme) {
// 1. Assert: base is is special (and therefore does not have an opaque path).
VERIFY(base_url->is_special());
@ -972,7 +972,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
}
// 9. Otherwise, set urls path to the empty string and set state to opaque path state.
else {
url->m_cannot_be_a_base_url = true;
url->m_data->cannot_be_a_base_url = true;
url->append_slash();
state = State::CannotBeABaseUrlPath;
}
@ -992,22 +992,22 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// -> no scheme state, https://url.spec.whatwg.org/#no-scheme-state
case State::NoScheme:
// 1. If base is null, or base has an opaque path and c is not U+0023 (#), missing-scheme-non-relative-URL validation error, return failure.
if (!base_url.has_value() || (base_url->m_cannot_be_a_base_url && code_point != '#')) {
if (!base_url.has_value() || (base_url->m_data->cannot_be_a_base_url && code_point != '#')) {
report_validation_error();
return {};
}
// 2. Otherwise, if base has an opaque path and c is U+0023 (#), set urls scheme to bases scheme, urls path to bases path, urls query
// to bases query,urls fragment to the empty string, and set state to fragment state.
else if (base_url->m_cannot_be_a_base_url && code_point == '#') {
url->m_scheme = base_url->m_scheme;
url->m_paths = base_url->m_paths;
url->m_query = base_url->m_query;
url->m_fragment = String {};
url->m_cannot_be_a_base_url = true;
else if (base_url->m_data->cannot_be_a_base_url && code_point == '#') {
url->m_data->scheme = base_url->m_data->scheme;
url->m_data->paths = base_url->m_data->paths;
url->m_data->query = base_url->m_data->query;
url->m_data->fragment = String {};
url->m_data->cannot_be_a_base_url = true;
state = State::Fragment;
}
// 3. Otherwise, if bases scheme is not "file", set state to relative state and decrease pointer by 1.
else if (base_url->m_scheme != "file") {
else if (base_url->m_data->scheme != "file") {
state = State::Relative;
continue;
}
@ -1049,7 +1049,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
VERIFY(base_url->scheme() != "file");
// 2. Set urls scheme to bases scheme.
url->m_scheme = base_url->m_scheme;
url->m_data->scheme = base_url->m_data->scheme;
// 3. If c is U+002F (/), then set state to relative slash state.
if (code_point == '/') {
@ -1063,27 +1063,27 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 5. Otherwise:
else {
// 1. Set urls username to bases username, urls password to bases password, urls host to bases host, urls port to bases port, urls path to a clone of bases path, and urls query to bases query.
url->m_username = base_url->m_username;
url->m_password = base_url->m_password;
url->m_host = base_url->m_host;
url->m_port = base_url->m_port;
url->m_paths = base_url->m_paths;
url->m_query = base_url->m_query;
url->m_data->username = base_url->m_data->username;
url->m_data->password = base_url->m_data->password;
url->m_data->host = base_url->m_data->host;
url->m_data->port = base_url->m_data->port;
url->m_data->paths = base_url->m_data->paths;
url->m_data->query = base_url->m_data->query;
// 2. If c is U+003F (?), then set urls query to the empty string, and state to query state.
if (code_point == '?') {
url->m_query = String {};
url->m_data->query = String {};
state = State::Query;
}
// 3. Otherwise, if c is U+0023 (#), set urls fragment to the empty string and state to fragment state.
else if (code_point == '#') {
url->m_fragment = String {};
url->m_data->fragment = String {};
state = State::Fragment;
}
// 4. Otherwise, if c is not the EOF code point:
else if (code_point != end_of_file) {
// 1. Set urls query to null.
url->m_query = {};
url->m_data->query = {};
// 2. Shorten urls path.
shorten_urls_path(*url);
@ -1111,10 +1111,10 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
}
// 3. Otherwise, set urls username to bases username, urls password to bases password, urls host to bases host, urls port to bases port, state to path state, and then, decrease pointer by 1.
else {
url->m_username = base_url->m_username;
url->m_password = base_url->m_password;
url->m_host = base_url->m_host;
url->m_port = base_url->m_port;
url->m_data->username = base_url->m_data->username;
url->m_data->password = base_url->m_data->password;
url->m_data->host = base_url->m_data->host;
url->m_data->port = base_url->m_data->port;
state = State::Path;
continue;
}
@ -1180,23 +1180,23 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 3. If passwordTokenSeen is true, then append encodedCodePoints to urls password.
if (password_token_seen) {
if (password_builder.is_empty())
password_builder.append(url->m_password);
password_builder.append(url->m_data->password);
append_percent_encoded_if_necessary(password_builder, c, PercentEncodeSet::Userinfo);
}
// 4. Otherwise, append encodedCodePoints to urls username.
else {
if (username_builder.is_empty())
username_builder.append(url->m_username);
username_builder.append(url->m_data->username);
append_percent_encoded_if_necessary(username_builder, c, PercentEncodeSet::Userinfo);
}
}
if (username_builder.string_view().length() > url->m_username.bytes().size())
url->m_username = username_builder.to_string().release_value_but_fixme_should_propagate_errors();
if (password_builder.string_view().length() > url->m_password.bytes().size())
url->m_password = password_builder.to_string().release_value_but_fixme_should_propagate_errors();
if (username_builder.string_view().length() > url->m_data->username.bytes().size())
url->m_data->username = username_builder.to_string().release_value_but_fixme_should_propagate_errors();
if (password_builder.string_view().length() > url->m_data->password.bytes().size())
url->m_data->password = password_builder.to_string().release_value_but_fixme_should_propagate_errors();
// 5. Set buffer to the empty string.
buffer.clear();
@ -1255,7 +1255,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
return {};
// 5. Set urls host to host, buffer to the empty string, and state to port state.
url->m_host = host.release_value();
url->m_data->host = host.release_value();
buffer.clear();
state = State::Port;
}
@ -1285,7 +1285,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
return {};
// 5. Set urls host to host, buffer to the empty string, and state to path start state.
url->m_host = host.value();
url->m_data->host = host.value();
buffer.clear();
state = State::Port;
@ -1341,9 +1341,9 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 3. Set urls port to null, if port is urls schemes default port; otherwise to port.
if (port.value() == default_port_for_scheme(url->scheme()))
url->m_port = {};
url->m_data->port = {};
else
url->m_port = port.value();
url->m_data->port = port.value();
// 4. Set buffer to the empty string.
buffer.clear();
@ -1366,10 +1366,10 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// -> file state, https://url.spec.whatwg.org/#file-state
case State::File:
// 1. Set urls scheme to "file".
url->m_scheme = "file"_string;
url->m_data->scheme = "file"_string;
// 2. Set urls host to the empty string.
url->m_host = String {};
url->m_data->host = String {};
// 3. If c is U+002F (/) or U+005C (\), then:
if (code_point == '/' || code_point == '\\') {
@ -1381,26 +1381,26 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
state = State::FileSlash;
}
// 4. Otherwise, if base is non-null and bases scheme is "file":
else if (base_url.has_value() && base_url->m_scheme == "file") {
else if (base_url.has_value() && base_url->m_data->scheme == "file") {
// 1. Set urls host to bases host, urls path to a clone of bases path, and urls query to bases query.
url->m_host = base_url->m_host;
url->m_paths = base_url->m_paths;
url->m_query = base_url->m_query;
url->m_data->host = base_url->m_data->host;
url->m_data->paths = base_url->m_data->paths;
url->m_data->query = base_url->m_data->query;
// 2. If c is U+003F (?), then set urls query to the empty string and state to query state.
if (code_point == '?') {
url->m_query = String {};
url->m_data->query = String {};
state = State::Query;
}
// 3. Otherwise, if c is U+0023 (#), set urls fragment to the empty string and state to fragment state.
else if (code_point == '#') {
url->m_fragment = String {};
url->m_data->fragment = String {};
state = State::Fragment;
}
// 4. Otherwise, if c is not the EOF code point:
else if (code_point != end_of_file) {
// 1. Set urls query to null.
url->m_query = {};
url->m_data->query = {};
// 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten urls path.
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
@ -1413,7 +1413,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
report_validation_error();
// 2. Set urls path to « ».
url->m_paths.clear();
url->m_data->paths.clear();
}
// 4. Set state to path state and decrease pointer by 1.
@ -1442,18 +1442,18 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 2. Otherwise:
else {
// 1. If base is non-null and bases scheme is "file", then:
if (base_url.has_value() && base_url->m_scheme == "file") {
if (base_url.has_value() && base_url->m_data->scheme == "file") {
// 1. Set urls host to bases host.
url->m_host = base_url->m_host;
url->m_data->host = base_url->m_data->host;
// FIXME: The spec does not seem to mention these steps.
url->m_paths = base_url->m_paths;
url->m_paths.remove(url->m_paths.size() - 1);
url->m_data->paths = base_url->m_data->paths;
url->m_data->paths.remove(url->m_data->paths.size() - 1);
// 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter and bases path[0] is a normalized Windows drive letter, then append bases path[0] to urls path.
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0]))
url->m_paths.append(base_url->m_paths[0]);
if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_data->paths[0]))
url->m_data->paths.append(base_url->m_data->paths[0]);
}
// 2. Set state to path state, and decrease pointer by 1.
@ -1474,7 +1474,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 2. Otherwise, if buffer is the empty string, then:
else if (buffer.is_empty()) {
// 1. Set urls host to the empty string.
url->m_host = String {};
url->m_data->host = String {};
// 2. If state override is given, then return.
if (state_override.has_value())
@ -1498,7 +1498,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
host = String {};
// 4. Set urls host to host.
url->m_host = host.release_value();
url->m_data->host = host.release_value();
// 5. If state override is given, then return.
if (state_override.has_value())
@ -1532,12 +1532,12 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
}
// 2. Otherwise, if state override is not given and c is U+003F (?), set urls query to the empty string and state to query state.
else if (!state_override.has_value() && code_point == '?') {
url->m_query = String {};
url->m_data->query = String {};
state = State::Query;
}
// 3. Otherwise, if state override is not given and c is U+0023 (#), set urls fragment to the empty string and state to fragment state.
else if (!state_override.has_value() && code_point == '#') {
url->m_fragment = String {};
url->m_data->fragment = String {};
state = State::Fragment;
}
// 4. Otherwise, if c is not the EOF code point:
@ -1585,14 +1585,14 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 4. Otherwise, if buffer is not a single-dot URL path segment, then:
else if (!is_single_dot_path_segment(buffer.string_view())) {
// 1. If urls scheme is "file", urls path is empty, and buffer is a Windows drive letter, then replace the second code point in buffer with U+003A (:).
if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
if (url->m_data->scheme == "file" && url->m_data->paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
auto drive_letter = buffer.string_view()[0];
buffer.clear();
buffer.append(drive_letter);
buffer.append(':');
}
// 2. Append buffer to urls path.
url->m_paths.append(buffer.to_string_without_validation());
url->m_data->paths.append(buffer.to_string_without_validation());
}
// 5. Set buffer to the empty string.
@ -1600,12 +1600,12 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 6. If c is U+003F (?), then set urls query to the empty string and state to query state.
if (code_point == '?') {
url->m_query = String {};
url->m_data->query = String {};
state = State::Query;
}
// 7. If c is U+0023 (#), then set urls fragment to the empty string and state to fragment state.
else if (code_point == '#') {
url->m_fragment = String {};
url->m_data->fragment = String {};
state = State::Fragment;
}
}
@ -1626,20 +1626,20 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// -> opaque path state, https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
case State::CannotBeABaseUrlPath:
// NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the path on EOF.
VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());
VERIFY(url->m_data->paths.size() == 1 && url->m_data->paths[0].is_empty());
// 1. If c is U+003F (?), then set urls query to the empty string and state to query state.
if (code_point == '?') {
url->m_paths[0] = buffer.to_string_without_validation();
url->m_query = String {};
url->m_data->paths[0] = buffer.to_string_without_validation();
url->m_data->query = String {};
buffer.clear();
state = State::Query;
}
// 2. Otherwise, if c is U+0023 (#), then set urls fragment to the empty string and state to fragment state.
else if (code_point == '#') {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_paths[0] = buffer.to_string_without_validation();
url->m_fragment = String {};
url->m_data->paths[0] = buffer.to_string_without_validation();
url->m_data->fragment = String {};
buffer.clear();
state = State::Fragment;
}
@ -1657,7 +1657,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
if (code_point != end_of_file) {
append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control);
} else {
url->m_paths[0] = buffer.to_string_without_validation();
url->m_data->paths[0] = buffer.to_string_without_validation();
buffer.clear();
}
}
@ -1680,14 +1680,14 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
auto query_percent_encode_set = url->is_special() ? PercentEncodeSet::SpecialQuery : PercentEncodeSet::Query;
// 2. Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to urls query.
url->m_query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set).release_value_but_fixme_should_propagate_errors();
url->m_data->query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set).release_value_but_fixme_should_propagate_errors();
// 3. Set buffer to the empty string.
buffer.clear();
// 4. If c is U+0023 (#), then set urls fragment to the empty string and state to fragment state.
if (code_point == '#') {
url->m_fragment = String {};
url->m_data->fragment = String {};
state = State::Fragment;
}
}
@ -1722,7 +1722,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// NOTE: The percent-encode is done on EOF on the entire buffer.
buffer.append_code_point(code_point);
} else {
url->m_fragment = percent_encode_after_encoding(buffer.string_view(), PercentEncodeSet::Fragment).release_value_but_fixme_should_propagate_errors();
url->m_data->fragment = percent_encode_after_encoding(buffer.string_view(), PercentEncodeSet::Fragment).release_value_but_fixme_should_propagate_errors();
buffer.clear();
}
break;
@ -1735,7 +1735,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
++iterator;
}
url->m_valid = true;
url->m_data->valid = true;
dbgln_if(URL_PARSER_DEBUG, "URL::Parser::parse: Parsed URL to be '{}'.", url->serialize());
// 10. Return url.

View file

@ -21,7 +21,7 @@ URL::URL(StringView string)
: URL(Parser::basic_parse(string))
{
if constexpr (URL_PARSER_DEBUG) {
if (m_valid)
if (m_data->valid)
dbgln("URL constructor: Parsed URL to be '{}'.", serialize());
else
dbgln("URL constructor: Parsed URL to be invalid.");
@ -38,42 +38,42 @@ URL URL::complete_url(StringView relative_url) const
ErrorOr<String> URL::username() const
{
return String::from_byte_string(percent_decode(m_username));
return String::from_byte_string(percent_decode(m_data->username));
}
ErrorOr<String> URL::password() const
{
return String::from_byte_string(percent_decode(m_password));
return String::from_byte_string(percent_decode(m_data->password));
}
ByteString URL::path_segment_at_index(size_t index) const
{
VERIFY(index < path_segment_count());
return percent_decode(m_paths[index]);
return percent_decode(m_data->paths[index]);
}
ByteString URL::basename() const
{
if (!m_valid)
if (!m_data->valid)
return {};
if (m_paths.is_empty())
if (m_data->paths.is_empty())
return {};
auto& last_segment = m_paths.last();
auto& last_segment = m_data->paths.last();
return percent_decode(last_segment);
}
void URL::set_scheme(String scheme)
{
m_scheme = move(scheme);
m_valid = compute_validity();
m_data->scheme = move(scheme);
m_data->valid = compute_validity();
}
// https://url.spec.whatwg.org/#set-the-username
ErrorOr<void> URL::set_username(StringView username)
{
// To set the username given a url and username, set urls username to the result of running UTF-8 percent-encode on username using the userinfo percent-encode set.
m_username = TRY(String::from_byte_string(percent_encode(username, PercentEncodeSet::Userinfo)));
m_valid = compute_validity();
m_data->username = TRY(String::from_byte_string(percent_encode(username, PercentEncodeSet::Userinfo)));
m_data->valid = compute_validity();
return {};
}
@ -81,76 +81,76 @@ ErrorOr<void> URL::set_username(StringView username)
ErrorOr<void> URL::set_password(StringView password)
{
// To set the password given a url and password, set urls password to the result of running UTF-8 percent-encode on password using the userinfo percent-encode set.
m_password = TRY(String::from_byte_string(percent_encode(password, PercentEncodeSet::Userinfo)));
m_valid = compute_validity();
m_data->password = TRY(String::from_byte_string(percent_encode(password, PercentEncodeSet::Userinfo)));
m_data->valid = compute_validity();
return {};
}
void URL::set_host(Host host)
{
m_host = move(host);
m_valid = compute_validity();
m_data->host = move(host);
m_data->valid = compute_validity();
}
// https://url.spec.whatwg.org/#concept-host-serializer
ErrorOr<String> URL::serialized_host() const
{
return Parser::serialize_host(m_host);
return Parser::serialize_host(m_data->host);
}
void URL::set_port(Optional<u16> port)
{
if (port == default_port_for_scheme(m_scheme)) {
m_port = {};
if (port == default_port_for_scheme(m_data->scheme)) {
m_data->port = {};
return;
}
m_port = move(port);
m_valid = compute_validity();
m_data->port = move(port);
m_data->valid = compute_validity();
}
void URL::set_paths(Vector<ByteString> const& paths)
{
m_paths.clear_with_capacity();
m_paths.ensure_capacity(paths.size());
m_data->paths.clear_with_capacity();
m_data->paths.ensure_capacity(paths.size());
for (auto const& segment : paths)
m_paths.unchecked_append(String::from_byte_string(percent_encode(segment, PercentEncodeSet::Path)).release_value_but_fixme_should_propagate_errors());
m_valid = compute_validity();
m_data->paths.unchecked_append(String::from_byte_string(percent_encode(segment, PercentEncodeSet::Path)).release_value_but_fixme_should_propagate_errors());
m_data->valid = compute_validity();
}
void URL::append_path(StringView path)
{
m_paths.append(String::from_byte_string(percent_encode(path, PercentEncodeSet::Path)).release_value_but_fixme_should_propagate_errors());
m_data->paths.append(String::from_byte_string(percent_encode(path, PercentEncodeSet::Path)).release_value_but_fixme_should_propagate_errors());
}
// https://url.spec.whatwg.org/#cannot-have-a-username-password-port
bool URL::cannot_have_a_username_or_password_or_port() const
{
// A URL cannot have a username/password/port if its host is null or the empty string, or its scheme is "file".
return m_host.has<Empty>() || m_host == String {} || m_scheme == "file"sv;
return m_data->host.has<Empty>() || m_data->host == String {} || m_data->scheme == "file"sv;
}
// FIXME: This is by no means complete.
// NOTE: This relies on some assumptions about how the spec-defined URL parser works that may turn out to be wrong.
bool URL::compute_validity() const
{
if (m_scheme.is_empty())
if (m_data->scheme.is_empty())
return false;
if (m_cannot_be_a_base_url) {
if (m_paths.size() != 1)
if (m_data->cannot_be_a_base_url) {
if (m_data->paths.size() != 1)
return false;
if (m_paths[0].is_empty())
if (m_data->paths[0].is_empty())
return false;
} else {
if (m_scheme.is_one_of("about", "mailto"))
if (m_data->scheme.is_one_of("about", "mailto"))
return false;
// NOTE: Maybe it is allowed to have a zero-segment path.
if (m_paths.size() == 0)
if (m_data->paths.size() == 0)
return false;
}
// NOTE: A file URL's host should be the empty string for localhost, not null.
if (m_scheme == "file" && m_host.has<Empty>())
if (m_data->scheme == "file" && m_data->host.has<Empty>())
return false;
return true;
@ -251,13 +251,13 @@ ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) cons
// 1. If url has an opaque path, then return urls path.
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
if (cannot_be_a_base_url())
return m_paths[0].to_byte_string();
return m_data->paths[0].to_byte_string();
// 2. Let output be the empty string.
StringBuilder output;
// 3. For each segment of urls path: append U+002F (/) followed by segment to output.
for (auto const& segment : m_paths) {
for (auto const& segment : m_data->paths) {
output.append('/');
output.append(apply_percent_decoding == ApplyPercentDecoding::Yes ? percent_decode(segment) : segment.to_byte_string());
}
@ -271,23 +271,23 @@ ByteString URL::serialize(ExcludeFragment exclude_fragment) const
{
// 1. Let output be urls scheme and U+003A (:) concatenated.
StringBuilder output;
output.append(m_scheme);
output.append(m_data->scheme);
output.append(':');
// 2. If urls host is non-null:
if (!m_host.has<Empty>()) {
if (!m_data->host.has<Empty>()) {
// 1. Append "//" to output.
output.append("//"sv);
// 2. If url includes credentials, then:
if (includes_credentials()) {
// 1. Append urls username to output.
output.append(m_username);
output.append(m_data->username);
// 2. If urls password is not the empty string, then append U+003A (:), followed by urls password, to output.
if (!m_password.is_empty()) {
if (!m_data->password.is_empty()) {
output.append(':');
output.append(m_password);
output.append(m_data->password);
}
// 3. Append U+0040 (@) to output.
@ -298,34 +298,34 @@ ByteString URL::serialize(ExcludeFragment exclude_fragment) const
output.append(serialized_host().release_value_but_fixme_should_propagate_errors());
// 4. If urls port is non-null, append U+003A (:) followed by urls port, serialized, to output.
if (m_port.has_value())
output.appendff(":{}", *m_port);
if (m_data->port.has_value())
output.appendff(":{}", *m_data->port);
}
// 3. If urls host is null, url does not have an opaque path, urls paths size is greater than 1, and urls path[0] is the empty string, then append U+002F (/) followed by U+002E (.) to output.
// 4. Append the result of URL path serializing url to output.
// FIXME: Implement this closer to spec steps.
if (cannot_be_a_base_url()) {
output.append(m_paths[0]);
output.append(m_data->paths[0]);
} else {
if (m_host.has<Empty>() && m_paths.size() > 1 && m_paths[0].is_empty())
if (m_data->host.has<Empty>() && m_data->paths.size() > 1 && m_data->paths[0].is_empty())
output.append("/."sv);
for (auto& segment : m_paths) {
for (auto& segment : m_data->paths) {
output.append('/');
output.append(segment);
}
}
// 5. If urls query is non-null, append U+003F (?), followed by urls query, to output.
if (m_query.has_value()) {
if (m_data->query.has_value()) {
output.append('?');
output.append(*m_query);
output.append(*m_data->query);
}
// 6. If exclude fragment is false and urls fragment is non-null, then append U+0023 (#), followed by urls fragment, to output.
if (exclude_fragment == ExcludeFragment::No && m_fragment.has_value()) {
if (exclude_fragment == ExcludeFragment::No && m_data->fragment.has_value()) {
output.append('#');
output.append(*m_fragment);
output.append(*m_data->fragment);
}
// 7. Return output.
@ -338,38 +338,38 @@ ByteString URL::serialize(ExcludeFragment exclude_fragment) const
// resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible.
ByteString URL::serialize_for_display() const
{
VERIFY(m_valid);
VERIFY(m_data->valid);
StringBuilder builder;
builder.append(m_scheme);
builder.append(m_data->scheme);
builder.append(':');
if (!m_host.has<Empty>()) {
if (!m_data->host.has<Empty>()) {
builder.append("//"sv);
builder.append(serialized_host().release_value_but_fixme_should_propagate_errors());
if (m_port.has_value())
builder.appendff(":{}", *m_port);
if (m_data->port.has_value())
builder.appendff(":{}", *m_data->port);
}
if (cannot_be_a_base_url()) {
builder.append(m_paths[0]);
builder.append(m_data->paths[0]);
} else {
if (m_host.has<Empty>() && m_paths.size() > 1 && m_paths[0].is_empty())
if (m_data->host.has<Empty>() && m_data->paths.size() > 1 && m_data->paths[0].is_empty())
builder.append("/."sv);
for (auto& segment : m_paths) {
for (auto& segment : m_data->paths) {
builder.append('/');
builder.append(segment);
}
}
if (m_query.has_value()) {
if (m_data->query.has_value()) {
builder.append('?');
builder.append(*m_query);
builder.append(*m_data->query);
}
if (m_fragment.has_value()) {
if (m_data->fragment.has_value()) {
builder.append('#');
builder.append(*m_fragment);
builder.append(*m_data->fragment);
}
return builder.to_byte_string();
@ -384,27 +384,27 @@ ErrorOr<String> URL::to_string() const
// https://url.spec.whatwg.org/#concept-url-origin
ByteString URL::serialize_origin() const
{
VERIFY(m_valid);
VERIFY(m_data->valid);
if (m_scheme == "blob"sv) {
if (m_data->scheme == "blob"sv) {
// TODO: 1. If URLs blob URL entry is non-null, then return URLs blob URL entrys environments origin.
// 2. Let url be the result of parsing URLs path[0].
VERIFY(!m_paths.is_empty());
URL url = m_paths[0];
VERIFY(!m_data->paths.is_empty());
URL url = m_data->paths[0];
// 3. Return a new opaque origin, if url is failure, and urls origin otherwise.
if (!url.is_valid())
return "null";
return url.serialize_origin();
} else if (!m_scheme.is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin."
} else if (!m_data->scheme.is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin."
return "null";
}
StringBuilder builder;
builder.append(m_scheme);
builder.append(m_data->scheme);
builder.append("://"sv);
builder.append(serialized_host().release_value_but_fixme_should_propagate_errors());
if (m_port.has_value())
builder.appendff(":{}", *m_port);
if (m_data->port.has_value())
builder.appendff(":{}", *m_data->port);
return builder.to_byte_string();
}
@ -412,7 +412,7 @@ bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const
{
if (this == &other)
return true;
if (!m_valid || !other.m_valid)
if (!m_data->valid || !other.m_data->valid)
return false;
return serialize(exclude_fragments) == other.serialize(exclude_fragments);
}

View file

@ -78,6 +78,34 @@ enum class SpaceAsPlus {
ByteString percent_encode(StringView input, PercentEncodeSet set = PercentEncodeSet::Userinfo, SpaceAsPlus = SpaceAsPlus::No);
ByteString percent_decode(StringView input);
template<typename T>
class CopyOnWrite {
public:
CopyOnWrite()
: m_value(adopt_ref(*new T))
{
}
T& mutable_value()
{
if (m_value->ref_count() > 1)
m_value = m_value->clone();
return *m_value;
}
T const& value() const { return *m_value; }
operator T const&() const { return value(); }
operator T&() { return mutable_value(); }
T const* operator->() const { return &value(); }
T* operator->() { return &mutable_value(); }
T const* ptr() const { return m_value.ptr(); }
T* ptr() { return m_value.ptr(); }
private:
NonnullRefPtr<T> m_value;
};
// https://url.spec.whatwg.org/#url-representation
// A URL is a struct that represents a universal identifier. To disambiguate from a valid URL string it can also be referred to as a URL record.
class URL {
@ -95,26 +123,26 @@ public:
{
}
bool is_valid() const { return m_valid; }
bool is_valid() const { return m_data->valid; }
String const& scheme() const { return m_scheme; }
String const& scheme() const { return m_data->scheme; }
ErrorOr<String> username() const;
ErrorOr<String> password() const;
Host const& host() const { return m_host; }
Host const& host() const { return m_data->host; }
ErrorOr<String> serialized_host() const;
ByteString basename() const;
Optional<String> const& query() const { return m_query; }
Optional<String> const& fragment() const { return m_fragment; }
Optional<u16> port() const { return m_port; }
Optional<String> const& query() const { return m_data->query; }
Optional<String> const& fragment() const { return m_data->fragment; }
Optional<u16> port() const { return m_data->port; }
ByteString path_segment_at_index(size_t index) const;
size_t path_segment_count() const { return m_paths.size(); }
size_t path_segment_count() const { return m_data->paths.size(); }
u16 port_or_default() const { return m_port.value_or(default_port_for_scheme(m_scheme).value_or(0)); }
bool cannot_be_a_base_url() const { return m_cannot_be_a_base_url; }
u16 port_or_default() const { return m_data->port.value_or(default_port_for_scheme(m_data->scheme).value_or(0)); }
bool cannot_be_a_base_url() const { return m_data->cannot_be_a_base_url; }
bool cannot_have_a_username_or_password_or_port() const;
bool includes_credentials() const { return !m_username.is_empty() || !m_password.is_empty(); }
bool is_special() const { return is_special_scheme(m_scheme); }
bool includes_credentials() const { return !m_data->username.is_empty() || !m_data->password.is_empty(); }
bool is_special() const { return is_special_scheme(m_data->scheme); }
void set_scheme(String);
ErrorOr<void> set_username(StringView);
@ -122,14 +150,14 @@ public:
void set_host(Host);
void set_port(Optional<u16>);
void set_paths(Vector<ByteString> const&);
void set_query(Optional<String> query) { m_query = move(query); }
void set_fragment(Optional<String> fragment) { m_fragment = move(fragment); }
void set_cannot_be_a_base_url(bool value) { m_cannot_be_a_base_url = value; }
void set_query(Optional<String> query) { m_data->query = move(query); }
void set_fragment(Optional<String> fragment) { m_data->fragment = move(fragment); }
void set_cannot_be_a_base_url(bool value) { m_data->cannot_be_a_base_url = value; }
void append_path(StringView);
void append_slash()
{
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
m_paths.append(String {});
m_data->paths.append(String {});
}
ByteString serialize_path(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
@ -145,49 +173,74 @@ public:
URL complete_url(StringView) const;
bool operator==(URL const& other) const { return equals(other, ExcludeFragment::No); }
[[nodiscard]] bool operator==(URL const& other) const
{
if (m_data.ptr() == other.m_data.ptr())
return true;
return equals(other, ExcludeFragment::No);
}
String const& raw_username() const { return m_username; }
String const& raw_password() const { return m_password; }
String const& raw_username() const { return m_data->username; }
String const& raw_password() const { return m_data->password; }
Optional<BlobURLEntry> const& blob_url_entry() const { return m_blob_url_entry; }
void set_blob_url_entry(Optional<BlobURLEntry> entry) { m_blob_url_entry = move(entry); }
Optional<BlobURLEntry> const& blob_url_entry() const { return m_data->blob_url_entry; }
void set_blob_url_entry(Optional<BlobURLEntry> entry) { m_data->blob_url_entry = move(entry); }
private:
bool compute_validity() const;
bool m_valid { false };
struct Data : public RefCounted<Data> {
NonnullRefPtr<Data> clone()
{
auto clone = adopt_ref(*new Data);
clone->valid = valid;
clone->scheme = scheme;
clone->username = username;
clone->password = password;
clone->host = host;
clone->port = port;
clone->paths = paths;
clone->query = query;
clone->fragment = fragment;
clone->cannot_be_a_base_url = cannot_be_a_base_url;
clone->blob_url_entry = blob_url_entry;
return clone;
}
// A URLs scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string.
String m_scheme;
bool valid { false };
// A URLs username is an ASCII string identifying a username. It is initially the empty string.
String m_username;
// A URLs scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string.
String scheme;
// A URLs password is an ASCII string identifying a password. It is initially the empty string.
String m_password;
// A URLs username is an ASCII string identifying a username. It is initially the empty string.
String username;
// A URLs host is null or a host. It is initially null.
Host m_host;
// A URLs password is an ASCII string identifying a password. It is initially the empty string.
String password;
// A URLs port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null.
Optional<u16> m_port;
// A URLs host is null or a host. It is initially null.
Host host;
// A URLs path is either a URL path segment or a list of zero or more URL path segments, usually identifying a location. It is initially « ».
// A URL path segment is an ASCII string. It commonly refers to a directory or a file, but has no predefined meaning.
Vector<String> m_paths;
// A URLs port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null.
Optional<u16> port;
// A URLs query is either null or an ASCII string. It is initially null.
Optional<String> m_query;
// A URLs path is either a URL path segment or a list of zero or more URL path segments, usually identifying a location. It is initially « ».
// A URL path segment is an ASCII string. It commonly refers to a directory or a file, but has no predefined meaning.
Vector<String> paths;
// A URLs fragment is either null or an ASCII string that can be used for further processing on the resource the URLs other components identify. It is initially null.
Optional<String> m_fragment;
// A URLs query is either null or an ASCII string. It is initially null.
Optional<String> query;
bool m_cannot_be_a_base_url { false };
// A URLs fragment is either null or an ASCII string that can be used for further processing on the resource the URLs other components identify. It is initially null.
Optional<String> fragment;
// https://url.spec.whatwg.org/#concept-url-blob-entry
// A URL also has an associated blob URL entry that is either null or a blob URL entry. It is initially null.
Optional<BlobURLEntry> m_blob_url_entry;
bool cannot_be_a_base_url { false };
// https://url.spec.whatwg.org/#concept-url-blob-entry
// A URL also has an associated blob URL entry that is either null or a blob URL entry. It is initially null.
Optional<BlobURLEntry> blob_url_entry;
};
CopyOnWrite<Data> m_data;
};
URL create_with_url_or_path(ByteString const&);