LibRegex: Use match_ordinary_characters() in ECMA262Parser::parse_atom()

Otherwise we would only match TokenType::Char, making all of these invalid:

- /foo,bar/
- /foo\/bar/
- /foo=bar/
- /foo-bar/
- /foo:bar/

Fixes #4243.
This commit is contained in:
Linus Groh 2020-11-29 19:10:46 +00:00 committed by Andreas Kling
parent 1279d2256c
commit eea7cabdbc
Notes: sideshowbarker 2024-07-19 01:10:57 +09:00
3 changed files with 16 additions and 15 deletions

View file

@ -154,6 +154,19 @@ Parser::Result Parser::parse(Optional<AllOptions> regex_options)
};
}
ALWAYS_INLINE bool Parser::match_ordinary_characters()
{
// NOTE: This method must not be called during bracket and repetition parsing!
// FIXME: Add assertion for that?
auto type = m_parser_state.current_token.type();
return (type == TokenType::Char
|| type == TokenType::Comma
|| type == TokenType::Slash
|| type == TokenType::EqualSign
|| type == TokenType::HyphenMinus
|| type == TokenType::Colon);
}
// =============================
// PosixExtended Parser
// =============================
@ -172,19 +185,6 @@ ALWAYS_INLINE bool PosixExtendedParser::match_repetition_symbol()
|| type == TokenType::LeftCurly);
}
ALWAYS_INLINE bool PosixExtendedParser::match_ordinary_characters()
{
// NOTE: This method must not be called during bracket and repetition parsing!
// FIXME: Add assertion for that?
auto type = m_parser_state.current_token.type();
return (type == TokenType::Char
|| type == TokenType::Comma
|| type == TokenType::Slash
|| type == TokenType::EqualSign
|| type == TokenType::HyphenMinus
|| type == TokenType::Colon);
}
ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& bytecode_to_repeat, size_t& match_length_minimum)
{
if (match(TokenType::LeftCurly)) {
@ -964,7 +964,7 @@ bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bo
return false;
}
if (match(TokenType::Char)) {
if (match_ordinary_characters()) {
auto token = consume().value();
match_length_minimum += 1;
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token[0] } });

View file

@ -90,6 +90,7 @@ protected:
ALWAYS_INLINE bool match(TokenType type) const;
ALWAYS_INLINE bool match(char ch) const;
ALWAYS_INLINE bool match_ordinary_characters();
ALWAYS_INLINE Token consume();
ALWAYS_INLINE Token consume(TokenType type, Error error);
ALWAYS_INLINE bool consume(const String&);
@ -145,7 +146,6 @@ public:
private:
ALWAYS_INLINE bool match_repetition_symbol();
ALWAYS_INLINE bool match_ordinary_characters();
bool parse_internal(ByteCode&, size_t&) override;

View file

@ -488,6 +488,7 @@ TEST_CASE(ECMA262_parse)
"^[\\w+/_-]+[=]{0,2}$", // #4189
"^(?:[^<]*(<[\\w\\W]+>)[^>]*$|#([\\w\\-]*)$)", // #4189
"\\/", // #4189
",/=-:", // #4243
"\\x", // Even invalid escapes are allowed if ~unicode.
};