LibURL: Use UTF-8 for percent encoding URL fragments

This commit is contained in:
Gingeh 2024-10-20 19:42:10 +11:00 committed by Andrew Kaster
parent 8e342e3e23
commit c10cb8ac8d
Notes: github-actions[bot] 2024-10-23 17:31:54 +00:00
2 changed files with 39 additions and 10 deletions

View file

@ -341,11 +341,38 @@ TEST_CASE(unicode)
TEST_CASE(query_with_non_ascii)
{
URL::URL url { "http://example.com/?utf8=✓"sv };
{
URL::URL url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "utf8=%E2%9C%93");
EXPECT(!url.fragment().has_value());
}
{
URL::URL url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "shift_jis=%26%2310003%3B");
EXPECT(!url.fragment().has_value());
}
}
TEST_CASE(fragment_with_non_ascii)
{
{
URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT(!url.query().has_value());
EXPECT_EQ(url.fragment(), "%E2%9C%93");
}
{
URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT(!url.query().has_value());
EXPECT_EQ(url.fragment(), "%E2%9C%93");
}
}
TEST_CASE(complete_file_url_with_base)

View file

@ -1688,10 +1688,12 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, URL
break;
// -> query state, https://url.spec.whatwg.org/#query-state
case State::Query:
// FIXME: 1. If encoding is not UTF-8 and one of the following is true:
// 1. If encoding is not UTF-8 and one of the following is true:
// * url is not special
// * urls scheme is "ws" or "wss"
// then set encoding to UTF-8.
if (!url->is_special() || url->m_data->scheme == "ws" || url->m_data->scheme == "wss")
encoder = TextCodec::encoder_for("utf-8"sv);
// 2. If one of the following is true:
// * state override is not given and c is U+0023 (#)
@ -1746,7 +1748,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, URL
// NOTE: The percent-encode is done on EOF on the entire buffer.
buffer.append_code_point(code_point);
} else {
url->m_data->fragment = percent_encode_after_encoding(*encoder, buffer.string_view(), PercentEncodeSet::Fragment);
url->m_data->fragment = percent_encode_after_encoding(*TextCodec::encoder_for("utf-8"sv), buffer.string_view(), PercentEncodeSet::Fragment);
buffer.clear();
}
break;