LibRegex: In non-Unicode mode, parse \u{4} as a repetition pattern

This commit is contained in:
Timothy Flynn 2021-08-15 10:29:03 -04:00 committed by Ali Mohammad Pur
parent e85c62e3a0
commit a9716ad44e
Notes: sideshowbarker 2024-07-18 05:34:07 +09:00
2 changed files with 8 additions and 4 deletions

View file

@ -526,6 +526,7 @@ TEST_CASE(ECMA262_parse)
{ "\\uxxxx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
{ "\\ud83d"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
{ "\\ud83d\\uxxxx"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
{ "\\u{0}"sv },
{ "\\u{0}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
{ "\\u{10ffff}"sv, regex::Error::NoError, ECMAScriptFlags::Unicode },
{ "\\u{10ffff"sv, regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
@ -635,6 +636,7 @@ TEST_CASE(ECMA262_match)
{ "(a{3}){2}"sv, "aaaabaa"sv, false },
{ "(a{4}){2}"sv, "aaaaaaaa"sv },
{ "(a{4}){2}"sv, "aaaaaabaa"sv, false },
{ "\\u{4}"sv, "uuuu" },
// ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
{ "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended },
{ "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },

View file

@ -1489,13 +1489,15 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
if (try_skip("u")) {
if (match(TokenType::LeftCurly)) {
consume();
if (!unicode) {
// FIXME: In non-Unicode mode, this should be parsed as a repetition symbol (repeating the 'u').
TODO();
// In non-Unicode mode, this should be parsed as a repetition symbol (repeating the 'u').
match_length_minimum += 1;
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'u' } });
return true;
}
consume();
auto code_point = read_digits(ReadDigitsInitialZeroState::Allow, true, 6);
if (code_point.has_value() && is_unicode(*code_point) && match(TokenType::RightCurly)) {
consume();