From 2a1fc96650607a923a3aac4f3245446af3e138e5 Mon Sep 17 00:00:00 2001 From: Tim Ledbetter Date: Sat, 28 Oct 2023 06:26:20 +0100 Subject: [PATCH] AK: Avoid unnecessary String allocations for URL username and password Previously, `URLParser` was constructing a new String for every character of the URL's username and password. This change improves performance by eliminating those unnecessary String allocations. A URL with a 100,000 character password can now be parsed in ~30ms vs ~8 seconds previously on my machine. --- AK/URLParser.cpp | 23 +++++++++++++++-------- Tests/AK/TestURL.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 89e656d16eb..f16e3ea2bab 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -1124,7 +1124,8 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, // 3. Set atSignSeen to true. at_sign_seen = true; - StringBuilder builder; + StringBuilder username_builder; + StringBuilder password_builder; // 4. For each codePoint in buffer: for (auto c : Utf8View(buffer.string_view())) { @@ -1137,21 +1138,27 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, // 2. Let encodedCodePoints be the result of running UTF-8 percent-encode codePoint using the userinfo percent-encode set. // NOTE: This is done inside of step 3 and 4 implementation - builder.clear(); // 3. If passwordTokenSeen is true, then append encodedCodePoints to url’s password. if (password_token_seen) { - builder.append(url->m_password); - URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo); - url->m_password = builder.to_string().release_value_but_fixme_should_propagate_errors(); + if (password_builder.is_empty()) + password_builder.append(url->m_password); + + URL::append_percent_encoded_if_necessary(password_builder, c, URL::PercentEncodeSet::Userinfo); } // 4. Otherwise, append encodedCodePoints to url’s username. else { - builder.append(url->m_username); - URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo); - url->m_username = builder.to_string().release_value_but_fixme_should_propagate_errors(); + if (username_builder.is_empty()) + username_builder.append(url->m_username); + + URL::append_percent_encoded_if_necessary(username_builder, c, URL::PercentEncodeSet::Userinfo); } } + if (username_builder.string_view().length() > url->m_username.bytes().size()) + url->m_username = username_builder.to_string().release_value_but_fixme_should_propagate_errors(); + if (password_builder.string_view().length() > url->m_password.bytes().size()) + url->m_password = password_builder.to_string().release_value_but_fixme_should_propagate_errors(); + // 5. Set buffer to the empty string. buffer.clear(); diff --git a/Tests/AK/TestURL.cpp b/Tests/AK/TestURL.cpp index 04edd8a0a00..8589c896acd 100644 --- a/Tests/AK/TestURL.cpp +++ b/Tests/AK/TestURL.cpp @@ -542,3 +542,44 @@ TEST_CASE(ipv4_address) EXPECT(!url.is_valid()); } } + +TEST_CASE(username_and_password) +{ + { + constexpr auto url_with_username_and_password = "http://username:password@test.com/index.html"sv; + URL url(url_with_username_and_password); + EXPECT(url.is_valid()); + EXPECT_EQ(MUST(url.serialized_host()), "test.com"sv); + EXPECT_EQ(MUST(url.username()), "username"sv); + EXPECT_EQ(MUST(url.password()), "password"sv); + } + + { + constexpr auto url_with_percent_encoded_credentials = "http://username%21%24%25:password%21%24%25@test.com/index.html"sv; + URL url(url_with_percent_encoded_credentials); + EXPECT(url.is_valid()); + EXPECT_EQ(MUST(url.serialized_host()), "test.com"sv); + EXPECT_EQ(MUST(url.username()), "username!$%"sv); + EXPECT_EQ(MUST(url.password()), "password!$%"sv); + } + + { + auto const& username = MUST(String::repeated('a', 50000)); + auto const& url_with_long_username = MUST(String::formatted("http://{}:@test.com/index.html", username)); + URL url(url_with_long_username); + EXPECT(url.is_valid()); + EXPECT_EQ(MUST(url.serialized_host()), "test.com"sv); + EXPECT_EQ(MUST(url.username()), username); + EXPECT(MUST(url.password()).is_empty()); + } + + { + auto const& password = MUST(String::repeated('a', 50000)); + auto const& url_with_long_password = MUST(String::formatted("http://:{}@test.com/index.html", password)); + URL url(url_with_long_password); + EXPECT(url.is_valid()); + EXPECT_EQ(MUST(url.serialized_host()), "test.com"sv); + EXPECT(MUST(url.username()).is_empty()); + EXPECT_EQ(MUST(url.password()), password); + } +}