vor 3 Jahren · 154ed3994c
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -593,6 +593,12 @@ TEST_CASE(ECMA262_parse)
 
				         { "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent },
			
 
				         { "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture },
			
 
				         { "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture },
			
 
				+        { "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
			
 
				+        { "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
			
 
				+        { "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup },
			
 
				+        { "(?<$$_$$>a)"sv },
			
 
				+        { "(?<ÿ>a)"sv },
			
 
				+        { "(?<𝓑𝓻𝓸𝔀𝓷>a)"sv },
			
 
				     };
			
 
				 
			
 
				     for (auto& test : tests) {
			
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@@ -2036,29 +2036,109 @@ bool ECMA262Parser::parse_unicode_property_escape(PropertyEscape& property, bool
 
				 
			
 
				 FlyString ECMA262Parser::read_capture_group_specifier(bool take_starting_angle_bracket)
			
 
				 {
			
 
				+    static auto id_start_category = Unicode::property_from_string("ID_Start"sv);
			
 
				+    static auto id_continue_category = Unicode::property_from_string("ID_Continue"sv);
			
 
				+    static constexpr const u32 REPLACEMENT_CHARACTER = 0xFFFD;
			
 
				+    constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
			
 
				+    constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
			
 
				+
			
 
				     if (take_starting_angle_bracket && !consume("<"))
			
 
				         return {};
			
 
				 
			
 
				     StringBuilder builder;
			
 
				+
			
 
				+    auto consume_code_point = [&] {
			
 
				+        Utf8View utf_8_view { m_parser_state.lexer.source().substring_view(m_parser_state.lexer.tell() - 1) };
			
 
				+        if (utf_8_view.is_empty())
			
 
				+            return REPLACEMENT_CHARACTER;
			
 
				+        u32 code_point = *utf_8_view.begin();
			
 
				+        auto characters = utf_8_view.byte_offset_of(1);
			
 
				+
			
 
				+        while (characters-- > 0)
			
 
				+            consume();
			
 
				+
			
 
				+        return code_point;
			
 
				+    };
			
 
				+
			
 
				+    {
			
 
				+        // The first character is limited to: https://tc39.es/ecma262/#prod-RegExpIdentifierStart
			
 
				+        //  RegExpIdentifierStart[UnicodeMode] ::
			
 
				+        //      IdentifierStartChar
			
 
				+        //      \ RegExpUnicodeEscapeSequence[+UnicodeMode]
			
 
				+        //      [~UnicodeMode] UnicodeLeadSurrogate UnicodeTrailSurrogate
			
 
				+
			
 
				+        auto code_point = consume_code_point();
			
 
				+
			
 
				+        if (code_point == '\\' && match('u')) {
			
 
				+            consume();
			
 
				+
			
 
				+            if (auto maybe_code_point = consume_escaped_code_point(true); maybe_code_point.has_value()) {
			
 
				+                code_point = *maybe_code_point;
			
 
				+            } else {
			
 
				+                set_error(Error::InvalidNameForCaptureGroup);
			
 
				+                return {};
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        if (is_ascii(code_point)) {
			
 
				+            // The only valid ID_Start unicode characters in ascii are the letters.
			
 
				+            if (!is_ascii_alpha(code_point) && code_point != '$' && code_point != '_') {
			
 
				+                set_error(Error::InvalidNameForCaptureGroup);
			
 
				+                return {};
			
 
				+            }
			
 
				+        } else if (id_start_category.has_value() && !Unicode::code_point_has_property(code_point, *id_start_category)) {
			
 
				+            set_error(Error::InvalidNameForCaptureGroup);
			
 
				+            return {};
			
 
				+        }
			
 
				+        builder.append_code_point(code_point);
			
 
				+    }
			
 
				+
			
 
				+    bool hit_end = false;
			
 
				+
			
 
				+    // Any following characters are limited to:
			
 
				+    //  RegExpIdentifierPart[UnicodeMode] ::
			
 
				+    //      IdentifierPartChar
			
 
				+    //      \ RegExpUnicodeEscapeSequence[+UnicodeMode]
			
 
				+    //      [~UnicodeMode] UnicodeLeadSurrogate UnicodeTrailSurrogate
			
 
				+
			
 
				     while (match(TokenType::Char) || match(TokenType::Dollar) || match(TokenType::LeftCurly) || match(TokenType::RightCurly)) {
			
 
				-        auto c = m_parser_state.current_token.value();
			
 
				-        if (c == ">")
			
 
				+        auto code_point = consume_code_point();
			
 
				+
			
 
				+        if (code_point == '>') {
			
 
				+            hit_end = true;
			
 
				             break;
			
 
				+        }
			
 
				 
			
 
				-        if (try_skip("\\u"sv)) {
			
 
				-            if (auto code_point = consume_escaped_code_point(true); code_point.has_value()) {
			
 
				-                builder.append_code_point(*code_point);
			
 
				+        if (code_point == '\\') {
			
 
				+            if (!try_skip("u")) {
			
 
				+                set_error(Error::InvalidNameForCaptureGroup);
			
 
				+                return {};
			
 
				+            }
			
 
				+            if (auto maybe_code_point = consume_escaped_code_point(true); maybe_code_point.has_value()) {
			
 
				+                code_point = *maybe_code_point;
			
 
				             } else {
			
 
				                 set_error(Error::InvalidNameForCaptureGroup);
			
 
				                 return {};
			
 
				             }
			
 
				-        } else {
			
 
				-            builder.append(consume().value());
			
 
				         }
			
 
				+
			
 
				+        if (is_ascii(code_point)) {
			
 
				+            // The only valid ID_Continue unicode characters in ascii are the letters and numbers.
			
 
				+            if (!is_ascii_alphanumeric(code_point) && code_point != '$' && code_point != '_') {
			
 
				+                set_error(Error::InvalidNameForCaptureGroup);
			
 
				+                return {};
			
 
				+            }
			
 
				+        } else if (code_point != ZERO_WIDTH_JOINER && code_point != ZERO_WIDTH_NON_JOINER) {
			
 
				+            if (id_continue_category.has_value() && !Unicode::code_point_has_property(code_point, *id_continue_category)) {
			
 
				+                set_error(Error::InvalidNameForCaptureGroup);
			
 
				+                return {};
			
 
				+            }
			
 
				+        }
			
 
				+        builder.append_code_point(code_point);
			
 
				     }
			
 
				 
			
 
				     FlyString name = builder.build();
			
 
				-    if (!consume(">") || name.is_empty())
			
 
				+    if (!hit_end || name.is_empty())
			
 
				         set_error(Error::InvalidNameForCaptureGroup);
			
 
				 
			
 
				     return name;