vor 3 Jahren · ef7d80ced2
--- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp
+++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.cpp
@@ -9,6 +9,7 @@
 
				 
			
 
				 #include <AK/CharacterTypes.h>
			
 
				 #include <AK/Debug.h>
			
 
				+#include <AK/GenericLexer.h>
			
 
				 #include <AK/NonnullRefPtrVector.h>
			
 
				 #include <AK/SourceLocation.h>
			
 
				 #include <LibWeb/CSS/CSSFontFaceRule.h>
			
@@ -2707,6 +2708,266 @@ Optional<Ratio> Parser::parse_ratio(TokenStream<ComponentValue>& tokens)
 
				     return Ratio { static_cast<float>(first_number.token().number_value()) };
			
 
				 }
			
 
				 
			
 
				+// https://www.w3.org/TR/css-syntax-3/#urange-syntax
			
 
				+Optional<UnicodeRange> Parser::parse_unicode_range(TokenStream<ComponentValue>& tokens)
			
 
				+{
			
 
				+    tokens.skip_whitespace();
			
 
				+    auto position = tokens.position();
			
 
				+
			
 
				+    auto error = [&]() -> Optional<UnicodeRange> {
			
 
				+        tokens.rewind_to_position(position);
			
 
				+        return {};
			
 
				+    };
			
 
				+
			
 
				+    // <urange> =
			
 
				+    //  u '+' <ident-token> '?'* |
			
 
				+    //  u <dimension-token> '?'* |
			
 
				+    //  u <number-token> '?'* |
			
 
				+    //  u <number-token> <dimension-token> |
			
 
				+    //  u <number-token> <number-token> |
			
 
				+    //  u '+' '?'+
			
 
				+    // (All with no whitespace in between tokens.)
			
 
				+
			
 
				+    // NOTE: Parsing this is different from usual. We take these steps:
			
 
				+    // 1. Match the grammar above against the tokens.
			
 
				+    // 2. Convert the matching tokens back into a string using their original representation.
			
 
				+    // 3. Then, parse that string according to the spec algorithm.
			
 
				+
			
 
				+    auto is_question_mark = [](ComponentValue const& component_value) {
			
 
				+        return component_value.is(Token::Type::Delim) && component_value.token().delim() == '?';
			
 
				+    };
			
 
				+
			
 
				+    auto is_ending_token = [](ComponentValue const& component_value) {
			
 
				+        return component_value.is(Token::Type::EndOfFile)
			
 
				+            || component_value.is(Token::Type::Comma)
			
 
				+            || component_value.is(Token::Type::Semicolon)
			
 
				+            || component_value.is(Token::Type::Whitespace);
			
 
				+    };
			
 
				+
			
 
				+    // All options start with 'u'/'U'.
			
 
				+    auto& u = tokens.next_token();
			
 
				+    if (!(u.is(Token::Type::Ident) && u.token().ident().equals_ignoring_case("u"))) {
			
 
				+        dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> does not start with 'u'");
			
 
				+        return error();
			
 
				+    }
			
 
				+
			
 
				+    auto& second_token = tokens.next_token();
			
 
				+    auto after_second_token = tokens.position();
			
 
				+
			
 
				+    //  u '+' <ident-token> '?'* |
			
 
				+    //  u '+' '?'+
			
 
				+    if (second_token.is(Token::Type::Delim) && second_token.token().delim() == '+') {
			
 
				+        auto& third_token = tokens.next_token();
			
 
				+        if (third_token.is(Token::Type::Ident) || is_question_mark(third_token)) {
			
 
				+            while (is_question_mark(tokens.peek_token()))
			
 
				+                tokens.next_token();
			
 
				+            if (is_ending_token(tokens.peek_token()))
			
 
				+                return create_unicode_range_from_tokens(tokens, position, tokens.position());
			
 
				+        }
			
 
				+
			
 
				+        tokens.rewind_to_position(after_second_token);
			
 
				+    }
			
 
				+
			
 
				+    //  u <dimension-token> '?'*
			
 
				+    if (second_token.is(Token::Type::Dimension)) {
			
 
				+        while (is_question_mark(tokens.peek_token()))
			
 
				+            tokens.next_token();
			
 
				+        if (is_ending_token(tokens.peek_token()))
			
 
				+            return create_unicode_range_from_tokens(tokens, position, tokens.position());
			
 
				+
			
 
				+        tokens.rewind_to_position(after_second_token);
			
 
				+    }
			
 
				+
			
 
				+    //  u <number-token> '?'* |
			
 
				+    //  u <number-token> <dimension-token> |
			
 
				+    //  u <number-token> <number-token>
			
 
				+    if (second_token.is(Token::Type::Number)) {
			
 
				+        if (is_ending_token(tokens.peek_token()))
			
 
				+            return create_unicode_range_from_tokens(tokens, position, tokens.position());
			
 
				+
			
 
				+        auto& third_token = tokens.next_token();
			
 
				+        if (is_question_mark(third_token)) {
			
 
				+            while (is_question_mark(tokens.peek_token()))
			
 
				+                tokens.next_token();
			
 
				+            if (is_ending_token(tokens.peek_token()))
			
 
				+                return create_unicode_range_from_tokens(tokens, position, tokens.position());
			
 
				+        } else if (third_token.is(Token::Type::Dimension)) {
			
 
				+            if (is_ending_token(tokens.peek_token()))
			
 
				+                return create_unicode_range_from_tokens(tokens, position, tokens.position());
			
 
				+        } else if (third_token.is(Token::Type::Number)) {
			
 
				+            if (is_ending_token(tokens.peek_token()))
			
 
				+                return create_unicode_range_from_tokens(tokens, position, tokens.position());
			
 
				+        }
			
 
				+
			
 
				+        tokens.rewind_to_position(after_second_token);
			
 
				+    }
			
 
				+
			
 
				+    if constexpr (CSS_PARSER_DEBUG) {
			
 
				+        dbgln("CSSParser: Tokens did not match <urange> grammar.");
			
 
				+        tokens.dump_all_tokens();
			
 
				+    }
			
 
				+    return error();
			
 
				+}
			
 
				+
			
 
				+Optional<UnicodeRange> Parser::create_unicode_range_from_tokens(TokenStream<ComponentValue>& tokens, int start_position, int end_position)
			
 
				+{
			
 
				+    auto error = [&]() -> Optional<UnicodeRange> {
			
 
				+        tokens.rewind_to_position(start_position);
			
 
				+        return {};
			
 
				+    };
			
 
				+
			
 
				+    auto make_valid_unicode_range = [&](u32 start_value, u32 end_value) -> Optional<UnicodeRange> {
			
 
				+        // https://www.w3.org/TR/css-syntax-3/#maximum-allowed-code-point
			
 
				+        constexpr u32 maximum_allowed_code_point = 0x10FFFF;
			
 
				+
			
 
				+        // To determine what codepoints the <urange> represents:
			
 
				+        // 1. If end value is greater than the maximum allowed code point,
			
 
				+        //    the <urange> is invalid and a syntax error.
			
 
				+        if (end_value > maximum_allowed_code_point) {
			
 
				+            dbgln_if(CSS_PARSER_DEBUG, "CSSParser: Invalid <urange>: end_value ({}) > maximum ({})", end_value, maximum_allowed_code_point);
			
 
				+            return error();
			
 
				+        }
			
 
				+
			
 
				+        // 2. If start value is greater than end value, the <urange> is invalid and a syntax error.
			
 
				+        if (start_value > end_value) {
			
 
				+            dbgln_if(CSS_PARSER_DEBUG, "CSSParser: Invalid <urange>: start_value ({}) > end_value ({})", start_value, end_value);
			
 
				+            return error();
			
 
				+        }
			
 
				+
			
 
				+        // 3. Otherwise, the <urange> represents a contiguous range of codepoints from start value to end value, inclusive.
			
 
				+        return UnicodeRange { start_value, end_value };
			
 
				+    };
			
 
				+
			
 
				+    // 1. Skipping the first u token, concatenate the representations of all the tokens in the production together.
			
 
				+    //    Let this be text.
			
 
				+    StringBuilder text_builder;
			
 
				+    tokens.rewind_to_position(start_position);
			
 
				+    (void)tokens.next_token(); // Skip the 'u'
			
 
				+    while (tokens.position() != end_position) {
			
 
				+        // FIXME: This should use the "representation", that is, the original text that produced the token.
			
 
				+        //        See: https://www.w3.org/TR/css-syntax-3/#representation
			
 
				+        //        We don't have a way to get that, so instead, we're relying on Token::to_string(), and
			
 
				+        //        handling specific cases where that's not enough.
			
 
				+        auto& token = tokens.next_token();
			
 
				+        // Integers like `+34` get serialized as `34`, so manually include the `+` sign.
			
 
				+        if (token.is(Token::Type::Number) && token.token().number().is_integer_with_explicit_sign()) {
			
 
				+            auto int_value = token.token().number().integer_value();
			
 
				+            if (int_value >= 0)
			
 
				+                text_builder.append('+');
			
 
				+            text_builder.append(String::number(int_value));
			
 
				+        } else {
			
 
				+            text_builder.append(token.to_string());
			
 
				+        }
			
 
				+    }
			
 
				+    auto text = text_builder.string_view();
			
 
				+    GenericLexer lexer { text };
			
 
				+
			
 
				+    // 2. If the first character of text is U+002B PLUS SIGN, consume it.
			
 
				+    //    Otherwise, this is an invalid <urange>, and this algorithm must exit.
			
 
				+    if (lexer.next_is('+')) {
			
 
				+        lexer.consume();
			
 
				+    } else {
			
 
				+        dbgln_if(CSS_PARSER_DEBUG, "CSSParser: Second character of <urange> was not '+'; got: '{}'", lexer.consume());
			
 
				+        return error();
			
 
				+    }
			
 
				+
			
 
				+    // 3. Consume as many hex digits from text as possible.
			
 
				+    //    then consume as many U+003F QUESTION MARK (?) code points as possible.
			
 
				+    auto hex_digits = lexer.consume_while(is_ascii_hex_digit);
			
 
				+    auto question_marks = lexer.consume_while([](auto it) { return it == '?'; });
			
 
				+    //    If zero code points were consumed, or more than six code points were consumed,
			
 
				+    //    this is an invalid <urange>, and this algorithm must exit.
			
 
				+    size_t consumed_code_points = hex_digits.length() + question_marks.length();
			
 
				+    if (consumed_code_points == 0 || consumed_code_points > 6) {
			
 
				+        dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> start value had {} digits/?s, expected between 1 and 6.", consumed_code_points);
			
 
				+        return error();
			
 
				+    }
			
 
				+    StringView start_value_code_points { hex_digits.characters_without_null_termination(), consumed_code_points };
			
 
				+
			
 
				+    //    If any U+003F QUESTION MARK (?) code points were consumed, then:
			
 
				+    if (question_marks.length() > 0) {
			
 
				+        // 1. If there are any code points left in text, this is an invalid <urange>,
			
 
				+        //    and this algorithm must exit.
			
 
				+        if (lexer.tell_remaining() != 0) {
			
 
				+            dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> invalid; had {} code points left over.", lexer.tell_remaining());
			
 
				+            return error();
			
 
				+        }
			
 
				+
			
 
				+        // 2. Interpret the consumed code points as a hexadecimal number,
			
 
				+        //    with the U+003F QUESTION MARK (?) code points replaced by U+0030 DIGIT ZERO (0) code points.
			
 
				+        //    This is the start value.
			
 
				+        auto start_value_string = start_value_code_points.replace("?", "0", true);
			
 
				+        auto maybe_start_value = AK::StringUtils::convert_to_uint_from_hex<u32>(start_value_string);
			
 
				+        if (!maybe_start_value.has_value()) {
			
 
				+            dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> ?-converted start value did not parse as hex number.");
			
 
				+            return error();
			
 
				+        }
			
 
				+        u32 start_value = maybe_start_value.release_value();
			
 
				+
			
 
				+        // 3. Interpret the consumed code points as a hexadecimal number again,
			
 
				+        //    with the U+003F QUESTION MARK (?) code points replaced by U+0046 LATIN CAPITAL LETTER F (F) code points.
			
 
				+        //    This is the end value.
			
 
				+        auto end_value_string = start_value_code_points.replace("?", "F", true);
			
 
				+        auto maybe_end_value = AK::StringUtils::convert_to_uint_from_hex<u32>(end_value_string);
			
 
				+        if (!maybe_end_value.has_value()) {
			
 
				+            dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> ?-converted end value did not parse as hex number.");
			
 
				+            return error();
			
 
				+        }
			
 
				+        u32 end_value = maybe_end_value.release_value();
			
 
				+
			
 
				+        // 4. Exit this algorithm.
			
 
				+        return make_valid_unicode_range(start_value, end_value);
			
 
				+    }
			
 
				+    //   Otherwise, interpret the consumed code points as a hexadecimal number. This is the start value.
			
 
				+    auto maybe_start_value = AK::StringUtils::convert_to_uint_from_hex<u32>(start_value_code_points);
			
 
				+    if (!maybe_start_value.has_value()) {
			
 
				+        dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> start value did not parse as hex number.");
			
 
				+        return error();
			
 
				+    }
			
 
				+    u32 start_value = maybe_start_value.release_value();
			
 
				+
			
 
				+    // 4. If there are no code points left in text, The end value is the same as the start value.
			
 
				+    //    Exit this algorithm.
			
 
				+    if (lexer.tell_remaining() == 0)
			
 
				+        return make_valid_unicode_range(start_value, start_value);
			
 
				+
			
 
				+    // 5. If the next code point in text is U+002D HYPHEN-MINUS (-), consume it.
			
 
				+    if (lexer.next_is('-')) {
			
 
				+        lexer.consume();
			
 
				+    }
			
 
				+    //    Otherwise, this is an invalid <urange>, and this algorithm must exit.
			
 
				+    else {
			
 
				+        dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> start and end values not separated by '-'.");
			
 
				+        return error();
			
 
				+    }
			
 
				+
			
 
				+    // 6. Consume as many hex digits as possible from text.
			
 
				+    auto end_hex_digits = lexer.consume_while(is_ascii_hex_digit);
			
 
				+
			
 
				+    //   If zero hex digits were consumed, or more than 6 hex digits were consumed,
			
 
				+    //   this is an invalid <urange>, and this algorithm must exit.
			
 
				+    if (end_hex_digits.length() == 0 || end_hex_digits.length() > 6) {
			
 
				+        dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> end value had {} digits, expected between 1 and 6.", end_hex_digits.length());
			
 
				+        return error();
			
 
				+    }
			
 
				+
			
 
				+    //   If there are any code points left in text, this is an invalid <urange>, and this algorithm must exit.
			
 
				+    if (lexer.tell_remaining() != 0) {
			
 
				+        dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> invalid; had {} code points left over.", lexer.tell_remaining());
			
 
				+        return error();
			
 
				+    }
			
 
				+
			
 
				+    // 7. Interpret the consumed code points as a hexadecimal number. This is the end value.
			
 
				+    auto maybe_end_value = AK::StringUtils::convert_to_uint_from_hex<u32>(end_hex_digits);
			
 
				+    if (!maybe_end_value.has_value()) {
			
 
				+        dbgln_if(CSS_PARSER_DEBUG, "CSSParser: <urange> end value did not parse as hex number.");
			
 
				+        return error();
			
 
				+    }
			
 
				+    u32 end_value = maybe_end_value.release_value();
			
 
				+
			
 
				+    return make_valid_unicode_range(start_value, end_value);
			
 
				+}
			
 
				+
			
 
				 RefPtr<StyleValue> Parser::parse_dimension_value(ComponentValue const& component_value)
			
 
				 {
			
 
				     // Numbers with no units can be lengths, in two situations:
			
--- a/Userland/Libraries/LibWeb/CSS/Parser/Parser.h
+++ b/Userland/Libraries/LibWeb/CSS/Parser/Parser.h
@@ -1,6 +1,6 @@
 
				 /*
			
 
				  * Copyright (c) 2020-2021, the SerenityOS developers.
			
 
				- * Copyright (c) 2021, Sam Atkins <atkinssj@serenityos.org>
			
 
				+ * Copyright (c) 2021-2022, Sam Atkins <atkinssj@serenityos.org>
			
 
				  *
			
 
				  * SPDX-License-Identifier: BSD-2-Clause
			
 
				  */
			
@@ -26,6 +26,7 @@
 
				 #include <LibWeb/CSS/Selector.h>
			
 
				 #include <LibWeb/CSS/StyleValue.h>
			
 
				 #include <LibWeb/CSS/Supports.h>
			
 
				+#include <LibWeb/CSS/UnicodeRange.h>
			
 
				 
			
 
				 namespace Web::CSS {
			
 
				 
			
@@ -276,6 +277,8 @@ private:
 
				     Optional<Color> parse_color(ComponentValue const&);
			
 
				     Optional<Length> parse_length(ComponentValue const&);
			
 
				     Optional<Ratio> parse_ratio(TokenStream<ComponentValue>&);
			
 
				+    Optional<UnicodeRange> parse_unicode_range(TokenStream<ComponentValue>&);
			
 
				+    Optional<UnicodeRange> create_unicode_range_from_tokens(TokenStream<ComponentValue>&, int start_position, int end_position);
			
 
				 
			
 
				     enum class AllowedDataUrlType {
			
 
				         None,