From 4b1deb6fe12b4c7f0434a415b5f203e9ebf08e06 Mon Sep 17 00:00:00 2001 From: Gingeh <39150378+Gingeh@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:55:46 +1100 Subject: [PATCH] LibWeb: Don't skip filtering when CSS contains null or surrogates --- Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 12 ++--- .../css/css-syntax/input-preprocessing.txt | Bin 0 -> 423 bytes .../css/css-syntax/input-preprocessing.html | 46 ++++++++++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt create mode 100644 Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html diff --git a/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index 443fc93af50..083c823d405 100644 --- a/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -204,15 +204,15 @@ Vector Tokenizer::tokenize(StringView input, StringView encoding) auto decoded_input = MUST(decoder->to_utf8(input)); - // OPTIMIZATION: If the input doesn't contain any CR or FF, we can skip the filtering - bool const contains_cr_or_ff = [&] { - for (auto byte : decoded_input.bytes()) { - if (byte == '\r' || byte == '\f') + // OPTIMIZATION: If the input doesn't contain any filterable characters, we can skip the filtering + bool const contains_filterable = [&] { + for (auto code_point : decoded_input.code_points()) { + if (code_point == '\r' || code_point == '\f' || code_point == 0x00 || is_unicode_surrogate(code_point)) return true; } return false; }(); - if (!contains_cr_or_ff) { + if (!contains_filterable) { return decoded_input; } @@ -242,7 +242,7 @@ Vector Tokenizer::tokenize(StringView input, StringView encoding) } else if (code_point == '\f') { builder.append('\n'); // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). - } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { + } else if (code_point == 0x00 || is_unicode_surrogate(code_point)) { builder.append_code_point(REPLACEMENT_CHARACTER); } else { builder.append_code_point(code_point); diff --git a/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt b/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt new file mode 100644 index 0000000000000000000000000000000000000000..17dd1f4da87a149abc0f9bc36d5aa7bc2d6053c1 GIT binary patch literal 423 zcmaKmO$x#=6oj+QJ4MLWP2GuW5kdUHp94huDngS$U$XFU;*q?9ucX#gLC7MRZ!$Cc zXsu~(01MMNBvKSih%{B3CBP1w=m6$Ex>Ah`6_5e}@NH{|V1~h5YhoM4I>-Z4wKf*z zii6ArTa(n4_d=@^p1nm kPKqK6_}*0|$DqF&uG7i#cVp+tia4WR($SRjGbvmC0^b#oNdN!< literal 0 HcmV?d00001 diff --git a/Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html b/Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html new file mode 100644 index 00000000000..4db4a32d6c3 --- /dev/null +++ b/Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html @@ -0,0 +1,46 @@ + +Input Preprocessing + + + + + + + +