LibRegex: Disallow invalid interval qualifiers in Unicode mode

Fixes all remaining 'built-ins/RegExp/property-escapes' test262 tests.
This commit is contained in:
Timothy Flynn 2021-08-10 16:35:45 -04:00 committed by Andreas Kling
parent a98d3a1a85
commit df14d11a11
Notes: sideshowbarker 2024-07-18 07:06:40 +09:00
3 changed files with 63 additions and 45 deletions

View file

@ -522,6 +522,9 @@ TEST_CASE(ECMA262_parse)
{ "\\p{hello friends}", regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
{ "\\p{Prepended_Concatenation_Mark}", regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
{ "\\p{ASCII}", regex::Error::NoError, ECMAScriptFlags::Unicode },
{ "\\\\p{1}", regex::Error::NoError, ECMAScriptFlags::Unicode },
{ "\\\\p{AsCiI}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
{ "\\\\p{ASCII}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
};
for (auto& test : tests) {

View file

@ -1121,7 +1121,7 @@ Optional<unsigned> ECMA262Parser::read_digits(ECMA262Parser::ReadDigitsInitialZe
return str.to_uint();
}
bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minimum, bool, bool)
bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool)
{
enum class Repetition {
OneOrMore,
@ -1144,52 +1144,13 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
consume();
repetition_mark = Repetition::Optional;
} else if (match(TokenType::LeftCurly)) {
consume();
auto chars_consumed = 1;
repetition_mark = Repetition::Explicit;
auto low_bound_string = read_digits_as_string();
chars_consumed += low_bound_string.length();
auto low_bound = low_bound_string.to_uint();
if (!low_bound.has_value()) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
back(chars_consumed + !done());
return true;
}
repeat_min = low_bound.value();
if (match(TokenType::Comma)) {
consume();
++chars_consumed;
auto high_bound_string = read_digits_as_string();
auto high_bound = high_bound_string.to_uint();
if (high_bound.has_value()) {
repeat_max = high_bound.value();
chars_consumed += high_bound_string.length();
if (!parse_interval_quantifier(repeat_min, repeat_max)) {
if (unicode) {
// Invalid interval quantifiers are disallowed in Unicode mod - they must be esacped with '\{'.
set_error(Error::InvalidPattern);
}
} else {
repeat_max = repeat_min;
}
if (!match(TokenType::RightCurly)) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
back(chars_consumed + !done());
return true;
}
consume();
++chars_consumed;
if (repeat_max.has_value()) {
if (repeat_min.value() > repeat_max.value())
set_error(Error::InvalidBraceContent);
return !has_error();
}
} else {
return true;
@ -1223,6 +1184,59 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
return true;
}
bool ECMA262Parser::parse_interval_quantifier(Optional<size_t>& repeat_min, Optional<size_t>& repeat_max)
{
VERIFY(match(TokenType::LeftCurly));
consume();
auto chars_consumed = 1;
auto low_bound_string = read_digits_as_string();
chars_consumed += low_bound_string.length();
auto low_bound = low_bound_string.to_uint();
if (!low_bound.has_value()) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
back(chars_consumed + !done());
return false;
}
repeat_min = low_bound.value();
if (match(TokenType::Comma)) {
consume();
++chars_consumed;
auto high_bound_string = read_digits_as_string();
auto high_bound = high_bound_string.to_uint();
if (high_bound.has_value()) {
repeat_max = high_bound.value();
chars_consumed += high_bound_string.length();
}
} else {
repeat_max = repeat_min;
}
if (!match(TokenType::RightCurly)) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
back(chars_consumed + !done());
return false;
}
consume();
++chars_consumed;
if (repeat_max.has_value()) {
if (repeat_min.value() > repeat_max.value())
set_error(Error::InvalidBraceContent);
}
return true;
}
bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
{
if (match(TokenType::EscapeSequence)) {

View file

@ -228,6 +228,7 @@ private:
bool parse_assertion(ByteCode&, size_t&, bool unicode, bool named);
bool parse_atom(ByteCode&, size_t&, bool unicode, bool named);
bool parse_quantifier(ByteCode&, size_t&, bool unicode, bool named);
bool parse_interval_quantifier(Optional<size_t>& repeat_min, Optional<size_t>& repeat_max);
bool parse_atom_escape(ByteCode&, size_t&, bool unicode, bool named);
bool parse_character_class(ByteCode&, size_t&, bool unicode, bool named);
bool parse_capture_group(ByteCode&, size_t&, bool unicode, bool named);