8 달 전 · 4b1deb6fe1
--- a/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp
+++ b/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp
@@ -204,15 +204,15 @@ Vector<Token> Tokenizer::tokenize(StringView input, StringView encoding)
 
				 
			
 
				         auto decoded_input = MUST(decoder->to_utf8(input));
			
 
				 
			
 
				-        // OPTIMIZATION: If the input doesn't contain any CR or FF, we can skip the filtering
			
 
				-        bool const contains_cr_or_ff = [&] {
			
 
				-            for (auto byte : decoded_input.bytes()) {
			
 
				-                if (byte == '\r' || byte == '\f')
			
 
				+        // OPTIMIZATION: If the input doesn't contain any filterable characters, we can skip the filtering
			
 
				+        bool const contains_filterable = [&] {
			
 
				+            for (auto code_point : decoded_input.code_points()) {
			
 
				+                if (code_point == '\r' || code_point == '\f' || code_point == 0x00 || is_unicode_surrogate(code_point))
			
 
				                     return true;
			
 
				             }
			
 
				             return false;
			
 
				         }();
			
 
				-        if (!contains_cr_or_ff) {
			
 
				+        if (!contains_filterable) {
			
 
				             return decoded_input;
			
 
				         }
			
 
				 
			
@@ -242,7 +242,7 @@ Vector<Token> Tokenizer::tokenize(StringView input, StringView encoding)
 
				                 } else if (code_point == '\f') {
			
 
				                     builder.append('\n');
			
 
				                     // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�).
			
 
				-                } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) {
			
 
				+                } else if (code_point == 0x00 || is_unicode_surrogate(code_point)) {
			
 
				                     builder.append_code_point(REPLACEMENT_CHARACTER);
			
 
				                 } else {
			
 
				                     builder.append_code_point(code_point);
			
--- a/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt
+++ b/Tests/LibWeb/Text/expected/wpt-import/css/css-syntax/input-preprocessing.txt
--- a/Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html
+++ b/Tests/LibWeb/Text/input/wpt-import/css/css-syntax/input-preprocessing.html
@@ -0,0 +1,46 @@
 
				+<!doctype html>
			
 
				+<title>Input Preprocessing</title>
			
 
				+<script src="../../resources/testharness.js"></script>
			
 
				+<script src="../../resources/testharnessreport.js"></script>
			
 
				+<style>
			
 
				+
			
 
				+foo { color: blue; }
			
 
				+
			
 
				+</style>
			
 
				+
			
 
				+<meta name="author" title="Tab Atkins-Bittner">
			
 
				+<link rel=help href="https://drafts.csswg.org/css-syntax/#input-preprocessing">
			
 
				+
			
 
				+<script>
			
 
				+
			
 
				+function roundtripIdent(str) {
			
 
				+    const rule = document.styleSheets[0].cssRules[0];
			
 
				+    rule.selectorText = "original-ident";
			
 
				+    rule.selectorText = str;
			
 
				+    // Check for parse error.
			
 
				+    if(rule.selectorText == "original-ident") return "parse error";
			
 
				+    return rule.selectorText;
			
 
				+}
			
 
				+function testParsing(input, expected) {
			
 
				+    test(()=>{
			
 
				+        assert_equals(roundtripIdent(input), expected);
			
 
				+    }, `"${input}" becomes "${expected}"`);
			
 
				+}
			
 
				+
			
 
				+/* Can't figure out how to test the newline normalization... */
			
 
				+
			
 
				+/* NULL becomes FFFD */
			
 
				+testParsing("foo\x00", "foo\ufffd");
			
 
				+testParsing("f\x00oo", "f\ufffdoo");
			
 
				+testParsing("\x00foo", "\ufffdfoo");
			
 
				+testParsing("\x00", "\ufffd");
			
 
				+testParsing("\x00\x00\x00", "\ufffd\ufffd\ufffd");
			
 
				+
			
 
				+/* surrogates become FFFD */
			
 
				+testParsing("foo\ud800", "foo\ufffd");
			
 
				+testParsing("f\ud800oo", "f\ufffdoo");
			
 
				+testParsing("\ud800foo", "\ufffdfoo");
			
 
				+testParsing("\ud800", "\ufffd");
			
 
				+testParsing("\ud800\ud800\ud800", "\ufffd\ufffd\ufffd");
			
 
				+
			
 
				+</script>