AK: Make URL percent encoding faster by exploiting ASCII knowledge
Once we know that a code point must be a valid ASCII character, we now cast it to `char` and avoid the expensive generic StringView::contains(u32 code_point) checks. This dramatically speeds up URL parsing.
This commit is contained in:
parent
0ad4be3d78
commit
6c51ba27a2
Notes:
sideshowbarker
2024-07-16 22:34:39 +09:00
Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/6c51ba27a2 Pull-request: https://github.com/SerenityOS/serenity/pull/22496 Reviewed-by: https://github.com/LucasChollet
1 changed files with 8 additions and 6 deletions
14
AK/URL.cpp
14
AK/URL.cpp
|
@ -510,23 +510,25 @@ void URL::append_percent_encoded(StringBuilder& builder, u32 code_point)
|
|||
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
|
||||
bool URL::code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set)
|
||||
{
|
||||
// NOTE: Once we've checked for presence in the C0Control set, we know that the code point is
|
||||
// a valid ASCII character in the range 0x20..0x7E, so we can safely cast it to char.
|
||||
switch (set) {
|
||||
case URL::PercentEncodeSet::C0Control:
|
||||
return code_point < 0x20 || code_point > 0x7E;
|
||||
case URL::PercentEncodeSet::Fragment:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"<>`"sv.contains(code_point);
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"<>`"sv.contains(static_cast<char>(code_point));
|
||||
case URL::PercentEncodeSet::Query:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"#<>"sv.contains(code_point);
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"#<>"sv.contains(static_cast<char>(code_point));
|
||||
case URL::PercentEncodeSet::SpecialQuery:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || code_point == '\'';
|
||||
case URL::PercentEncodeSet::Path:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || "?`{}"sv.contains(code_point);
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || "?`{}"sv.contains(static_cast<char>(code_point));
|
||||
case URL::PercentEncodeSet::Userinfo:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Path) || "/:;=@[\\]^|"sv.contains(code_point);
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Path) || "/:;=@[\\]^|"sv.contains(static_cast<char>(code_point));
|
||||
case URL::PercentEncodeSet::Component:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Userinfo) || "$%&+,"sv.contains(code_point);
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Userinfo) || "$%&+,"sv.contains(static_cast<char>(code_point));
|
||||
case URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded:
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Component) || "!'()~"sv.contains(code_point);
|
||||
return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Component) || "!'()~"sv.contains(static_cast<char>(code_point));
|
||||
case URL::PercentEncodeSet::EncodeURI:
|
||||
// NOTE: This is the same percent encode set that JS encodeURI() uses.
|
||||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
|
||||
|
|
Loading…
Add table
Reference in a new issue