Procházet zdrojové kódy

LibRegex: In non-Unicode mode, parse \u{4} as a repetition pattern

Timothy Flynn před 4 roky
rodič
revize
a9716ad44e

+ 2 - 0
Tests/LibRegex/Regex.cpp

@@ -526,6 +526,7 @@ TEST_CASE(ECMA262_parse)
         { "\\uxxxx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
         { "\\ud83d"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
         { "\\ud83d\\uxxxx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
+        { "\\u{0}"sv },
         { "\\u{0}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
         { "\\u{10ffff}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
         { "\\u{10ffff"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
@@ -635,6 +636,7 @@ TEST_CASE(ECMA262_match)
         { "(a{3}){2}"sv, "aaaabaa"sv, false },
         { "(a{4}){2}"sv, "aaaaaaaa"sv },
         { "(a{4}){2}"sv, "aaaaaabaa"sv, false },
+        { "\\u{4}"sv, "uuuu" },
         // ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
         { "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended },
         { "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },

+ 6 - 4
Userland/Libraries/LibRegex/RegexParser.cpp

@@ -1489,13 +1489,15 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
 
     if (try_skip("u")) {
         if (match(TokenType::LeftCurly)) {
-            consume();
-
             if (!unicode) {
-                // FIXME: In non-Unicode mode, this should be parsed as a repetition symbol (repeating the 'u').
-                TODO();
+                // In non-Unicode mode, this should be parsed as a repetition symbol (repeating the 'u').
+                match_length_minimum += 1;
+                stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'u' } });
+                return true;
             }
 
+            consume();
+
             auto code_point = read_digits(ReadDigitsInitialZeroState::Allow, true, 6);
             if (code_point.has_value() && is_unicode(*code_point) && match(TokenType::RightCurly)) {
                 consume();