LibRegex: Disallow invalid interval qualifiers in Unicode mode
Fixes all remaining 'built-ins/RegExp/property-escapes' test262 tests.
This commit is contained in:
parent
a98d3a1a85
commit
df14d11a11
Notes:
sideshowbarker
2024-07-18 07:06:40 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/df14d11a11b Pull-request: https://github.com/SerenityOS/serenity/pull/9333
3 changed files with 63 additions and 45 deletions
|
@ -522,6 +522,9 @@ TEST_CASE(ECMA262_parse)
|
|||
{ "\\p{hello friends}", regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
|
||||
{ "\\p{Prepended_Concatenation_Mark}", regex::Error::InvalidNameForProperty, ECMAScriptFlags::Unicode },
|
||||
{ "\\p{ASCII}", regex::Error::NoError, ECMAScriptFlags::Unicode },
|
||||
{ "\\\\p{1}", regex::Error::NoError, ECMAScriptFlags::Unicode },
|
||||
{ "\\\\p{AsCiI}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
{ "\\\\p{ASCII}", regex::Error::InvalidPattern, ECMAScriptFlags::Unicode },
|
||||
};
|
||||
|
||||
for (auto& test : tests) {
|
||||
|
|
|
@ -1121,7 +1121,7 @@ Optional<unsigned> ECMA262Parser::read_digits(ECMA262Parser::ReadDigitsInitialZe
|
|||
return str.to_uint();
|
||||
}
|
||||
|
||||
bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minimum, bool, bool)
|
||||
bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool)
|
||||
{
|
||||
enum class Repetition {
|
||||
OneOrMore,
|
||||
|
@ -1144,52 +1144,13 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
|
|||
consume();
|
||||
repetition_mark = Repetition::Optional;
|
||||
} else if (match(TokenType::LeftCurly)) {
|
||||
consume();
|
||||
auto chars_consumed = 1;
|
||||
repetition_mark = Repetition::Explicit;
|
||||
|
||||
auto low_bound_string = read_digits_as_string();
|
||||
chars_consumed += low_bound_string.length();
|
||||
|
||||
auto low_bound = low_bound_string.to_uint();
|
||||
|
||||
if (!low_bound.has_value()) {
|
||||
if (!m_should_use_browser_extended_grammar && done())
|
||||
return set_error(Error::MismatchingBrace);
|
||||
|
||||
back(chars_consumed + !done());
|
||||
return true;
|
||||
}
|
||||
|
||||
repeat_min = low_bound.value();
|
||||
|
||||
if (match(TokenType::Comma)) {
|
||||
consume();
|
||||
++chars_consumed;
|
||||
auto high_bound_string = read_digits_as_string();
|
||||
auto high_bound = high_bound_string.to_uint();
|
||||
if (high_bound.has_value()) {
|
||||
repeat_max = high_bound.value();
|
||||
chars_consumed += high_bound_string.length();
|
||||
if (!parse_interval_quantifier(repeat_min, repeat_max)) {
|
||||
if (unicode) {
|
||||
// Invalid interval quantifiers are disallowed in Unicode mod - they must be esacped with '\{'.
|
||||
set_error(Error::InvalidPattern);
|
||||
}
|
||||
} else {
|
||||
repeat_max = repeat_min;
|
||||
}
|
||||
|
||||
if (!match(TokenType::RightCurly)) {
|
||||
if (!m_should_use_browser_extended_grammar && done())
|
||||
return set_error(Error::MismatchingBrace);
|
||||
|
||||
back(chars_consumed + !done());
|
||||
return true;
|
||||
}
|
||||
|
||||
consume();
|
||||
++chars_consumed;
|
||||
|
||||
if (repeat_max.has_value()) {
|
||||
if (repeat_min.value() > repeat_max.value())
|
||||
set_error(Error::InvalidBraceContent);
|
||||
return !has_error();
|
||||
}
|
||||
} else {
|
||||
return true;
|
||||
|
@ -1223,6 +1184,59 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ECMA262Parser::parse_interval_quantifier(Optional<size_t>& repeat_min, Optional<size_t>& repeat_max)
|
||||
{
|
||||
VERIFY(match(TokenType::LeftCurly));
|
||||
consume();
|
||||
auto chars_consumed = 1;
|
||||
|
||||
auto low_bound_string = read_digits_as_string();
|
||||
chars_consumed += low_bound_string.length();
|
||||
|
||||
auto low_bound = low_bound_string.to_uint();
|
||||
|
||||
if (!low_bound.has_value()) {
|
||||
if (!m_should_use_browser_extended_grammar && done())
|
||||
return set_error(Error::MismatchingBrace);
|
||||
|
||||
back(chars_consumed + !done());
|
||||
return false;
|
||||
}
|
||||
|
||||
repeat_min = low_bound.value();
|
||||
|
||||
if (match(TokenType::Comma)) {
|
||||
consume();
|
||||
++chars_consumed;
|
||||
auto high_bound_string = read_digits_as_string();
|
||||
auto high_bound = high_bound_string.to_uint();
|
||||
if (high_bound.has_value()) {
|
||||
repeat_max = high_bound.value();
|
||||
chars_consumed += high_bound_string.length();
|
||||
}
|
||||
} else {
|
||||
repeat_max = repeat_min;
|
||||
}
|
||||
|
||||
if (!match(TokenType::RightCurly)) {
|
||||
if (!m_should_use_browser_extended_grammar && done())
|
||||
return set_error(Error::MismatchingBrace);
|
||||
|
||||
back(chars_consumed + !done());
|
||||
return false;
|
||||
}
|
||||
|
||||
consume();
|
||||
++chars_consumed;
|
||||
|
||||
if (repeat_max.has_value()) {
|
||||
if (repeat_min.value() > repeat_max.value())
|
||||
set_error(Error::InvalidBraceContent);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
|
||||
{
|
||||
if (match(TokenType::EscapeSequence)) {
|
||||
|
|
|
@ -228,6 +228,7 @@ private:
|
|||
bool parse_assertion(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_atom(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_quantifier(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_interval_quantifier(Optional<size_t>& repeat_min, Optional<size_t>& repeat_max);
|
||||
bool parse_atom_escape(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_character_class(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_capture_group(ByteCode&, size_t&, bool unicode, bool named);
|
||||
|
|
Loading…
Add table
Reference in a new issue