LibWeb: Use text encoding from DOM when parsing URLs

This passes the DOM encoding down to the URL parser, so the correct
encoder can be used.
This commit is contained in:
BenJilks 2024-08-05 21:47:45 +01:00 committed by Tim Ledbetter
parent d80575a410
commit c1958437f9
Notes: github-actions[bot] 2024-08-08 16:51:10 +00:00
5 changed files with 11 additions and 8 deletions

View file

@ -806,8 +806,7 @@ ErrorOr<String> Parser::percent_encode_after_encoding(TextCodec::Encoder& encode
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
// NOTE: This parser assumes a UTF-8 encoding.
URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override)
URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override, Optional<StringView> encoding)
{
dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsing '{}'", raw_input);
@ -855,7 +854,11 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
State state = state_override.value_or(State::SchemeStart);
// 5. Set encoding to the result of getting an output encoding from encoding.
auto encoder = TextCodec::encoder_for("utf-8"sv);
Optional<TextCodec::Encoder&> encoder = {};
if (encoding.has_value())
encoder = TextCodec::encoder_for(TextCodec::get_output_encoding(*encoding));
if (!encoder.has_value())
encoder = TextCodec::encoder_for("utf-8"sv);
VERIFY(encoder.has_value());
// 6. Let buffer be the empty string.

View file

@ -58,7 +58,7 @@ public:
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
static URL basic_parse(StringView input, Optional<URL> const& base_url = {}, Optional<URL> url = {}, Optional<State> state_override = {});
static URL basic_parse(StringView input, Optional<URL> const& base_url = {}, Optional<URL> url = {}, Optional<State> state_override = {}, Optional<StringView> encoding = {});
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
static ErrorOr<String> percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);

View file

@ -1034,7 +1034,7 @@ URL::URL Document::parse_url(StringView url) const
auto base_url = this->base_url();
// 2. Return the result of applying the URL parser to url, with baseURL.
return DOMURL::parse(url, base_url);
return DOMURL::parse(url, base_url, Optional<StringView> { m_encoding });
}
void Document::set_needs_layout()

View file

@ -585,12 +585,12 @@ void strip_trailing_spaces_from_an_opaque_path(DOMURL& url)
}
// https://url.spec.whatwg.org/#concept-url-parser
URL::URL parse(StringView input, Optional<URL::URL> const& base_url)
URL::URL parse(StringView input, Optional<URL::URL> const& base_url, Optional<StringView> encoding)
{
// FIXME: We should probably have an extended version of URL::URL for LibWeb instead of standalone functions like this.
// 1. Let url be the result of running the basic URL parser on input with base and encoding.
auto url = URL::Parser::basic_parse(input, base_url);
auto url = URL::Parser::basic_parse(input, base_url, {}, {}, encoding);
// 2. If url is failure, return failure.
if (!url.is_valid())

View file

@ -99,6 +99,6 @@ bool host_is_domain(URL::Host const&);
void strip_trailing_spaces_from_an_opaque_path(DOMURL& url);
// https://url.spec.whatwg.org/#concept-url-parser
URL::URL parse(StringView input, Optional<URL::URL> const& base_url = {});
URL::URL parse(StringView input, Optional<URL::URL> const& base_url = {}, Optional<StringView> encoding = {});
}