mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibJS: Force the lexer to parse a regex when expecting a statement
This commit is contained in:
parent
05444103e3
commit
106f9e30d7
Notes:
sideshowbarker
2024-07-18 05:37:03 +09:00
Author: https://github.com/davidot Commit: https://github.com/SerenityOS/serenity/commit/106f9e30d70 Pull-request: https://github.com/SerenityOS/serenity/pull/9085 Reviewed-by: https://github.com/IdanHo Reviewed-by: https://github.com/linusg ✅ Reviewed-by: https://github.com/trflynn89
4 changed files with 103 additions and 21 deletions
|
@ -360,10 +360,15 @@ bool Lexer::slash_means_division() const
|
|||
return type == TokenType::BigIntLiteral
|
||||
|| type == TokenType::BoolLiteral
|
||||
|| type == TokenType::BracketClose
|
||||
|| type == TokenType::CurlyClose
|
||||
|| type == TokenType::Identifier
|
||||
|| type == TokenType::In
|
||||
|| type == TokenType::Instanceof
|
||||
|| type == TokenType::MinusMinus
|
||||
|| type == TokenType::NullLiteral
|
||||
|| type == TokenType::NumericLiteral
|
||||
|| type == TokenType::ParenClose
|
||||
|| type == TokenType::PlusPlus
|
||||
|| type == TokenType::RegexLiteral
|
||||
|| type == TokenType::StringLiteral
|
||||
|| type == TokenType::TemplateLiteralEnd
|
||||
|
@ -563,27 +568,7 @@ Token Lexer::next()
|
|||
}
|
||||
} else if (m_current_char == '/' && !slash_means_division()) {
|
||||
consume();
|
||||
token_type = TokenType::RegexLiteral;
|
||||
|
||||
while (!is_eof()) {
|
||||
if (m_current_char == '[') {
|
||||
m_regex_is_in_character_class = true;
|
||||
} else if (m_current_char == ']') {
|
||||
m_regex_is_in_character_class = false;
|
||||
} else if (!m_regex_is_in_character_class && m_current_char == '/') {
|
||||
break;
|
||||
}
|
||||
|
||||
if (match('\\', '/') || match('\\', '[') || match('\\', '\\') || (m_regex_is_in_character_class && match('\\', ']')))
|
||||
consume();
|
||||
consume();
|
||||
}
|
||||
|
||||
if (is_eof()) {
|
||||
token_type = TokenType::UnterminatedRegexLiteral;
|
||||
} else {
|
||||
consume();
|
||||
}
|
||||
token_type = consume_regex_literal();
|
||||
} else if (m_eof) {
|
||||
if (unterminated_comment) {
|
||||
token_type = TokenType::Invalid;
|
||||
|
@ -677,4 +662,73 @@ Token Lexer::next()
|
|||
return m_current_token;
|
||||
}
|
||||
|
||||
Token Lexer::force_slash_as_regex()
|
||||
{
|
||||
VERIFY(m_current_token.type() == TokenType::Slash || m_current_token.type() == TokenType::SlashEquals);
|
||||
|
||||
bool has_equals = m_current_token.type() == TokenType::SlashEquals;
|
||||
|
||||
VERIFY(m_position > 0);
|
||||
size_t value_start = m_position - 1;
|
||||
|
||||
if (has_equals) {
|
||||
VERIFY(m_source[value_start - 1] == '=');
|
||||
--value_start;
|
||||
--m_position;
|
||||
m_current_char = '=';
|
||||
}
|
||||
|
||||
TokenType token_type = consume_regex_literal();
|
||||
|
||||
m_current_token = Token(
|
||||
token_type,
|
||||
"",
|
||||
m_current_token.trivia(),
|
||||
m_source.substring_view(value_start - 1, m_position - value_start),
|
||||
m_filename,
|
||||
m_current_token.line_number(),
|
||||
m_current_token.line_column(),
|
||||
m_position);
|
||||
|
||||
if constexpr (LEXER_DEBUG) {
|
||||
dbgln("------------------------------");
|
||||
dbgln("Token: {}", m_current_token.name());
|
||||
dbgln("Trivia: _{}_", m_current_token.trivia());
|
||||
dbgln("Value: _{}_", m_current_token.value());
|
||||
dbgln("Line: {}, Column: {}", m_current_token.line_number(), m_current_token.line_column());
|
||||
dbgln("------------------------------");
|
||||
}
|
||||
|
||||
return m_current_token;
|
||||
}
|
||||
|
||||
TokenType Lexer::consume_regex_literal()
|
||||
{
|
||||
TokenType token_type = TokenType::RegexLiteral;
|
||||
|
||||
while (!is_eof()) {
|
||||
if (is_line_terminator() || (!m_regex_is_in_character_class && m_current_char == '/')) {
|
||||
break;
|
||||
} else if (m_current_char == '[') {
|
||||
m_regex_is_in_character_class = true;
|
||||
} else if (m_current_char == ']') {
|
||||
m_regex_is_in_character_class = false;
|
||||
} else if (!m_regex_is_in_character_class && m_current_char == '/') {
|
||||
break;
|
||||
}
|
||||
|
||||
if (match('\\', '/') || match('\\', '[') || match('\\', '\\') || (m_regex_is_in_character_class && match('\\', ']')))
|
||||
consume();
|
||||
consume();
|
||||
}
|
||||
|
||||
if (m_current_char == '/') {
|
||||
consume();
|
||||
return TokenType::RegexLiteral;
|
||||
} else {
|
||||
return TokenType::UnterminatedRegexLiteral;
|
||||
}
|
||||
return token_type;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,6 +25,8 @@ public:
|
|||
|
||||
void disallow_html_comments() { m_allow_html_comments = false; };
|
||||
|
||||
Token force_slash_as_regex();
|
||||
|
||||
private:
|
||||
void consume();
|
||||
bool consume_exponent();
|
||||
|
@ -47,6 +49,8 @@ private:
|
|||
bool match_numeric_literal_separator_followed_by(Callback) const;
|
||||
bool slash_means_division() const;
|
||||
|
||||
TokenType consume_regex_literal();
|
||||
|
||||
StringView m_source;
|
||||
size_t m_position { 0 };
|
||||
Token m_current_token;
|
||||
|
|
|
@ -400,6 +400,10 @@ NonnullRefPtr<Statement> Parser::parse_statement(AllowLabelledFunction allow_lab
|
|||
case TokenType::Semicolon:
|
||||
consume();
|
||||
return create_ast_node<EmptyStatement>({ m_state.current_token.filename(), rule_start.position(), position() });
|
||||
case TokenType::Slash:
|
||||
case TokenType::SlashEquals:
|
||||
m_state.current_token = m_state.lexer.force_slash_as_regex();
|
||||
[[fallthrough]];
|
||||
default:
|
||||
if (match_identifier_name()) {
|
||||
auto result = try_parse_labelled_statement(allow_labelled_function);
|
||||
|
@ -2556,6 +2560,8 @@ bool Parser::match_expression() const
|
|||
|| type == TokenType::This
|
||||
|| type == TokenType::Super
|
||||
|| type == TokenType::RegexLiteral
|
||||
|| type == TokenType::Slash // Wrongly recognized regex by lexer
|
||||
|| type == TokenType::SlashEquals // Wrongly recognized regex by lexer (/=a/ is a valid regex)
|
||||
|| type == TokenType::Yield
|
||||
|| match_unary_prefixed_expression();
|
||||
}
|
||||
|
|
|
@ -3,4 +3,22 @@ test("slash token resolution in lexer", () => {
|
|||
expect("``/foo/").not.toEval();
|
||||
expect("1/foo/").not.toEval();
|
||||
expect("1/foo").toEval();
|
||||
|
||||
expect("{} /foo/").toEval();
|
||||
expect("{} /=/").toEval();
|
||||
expect("{} /=a/").toEval();
|
||||
expect("{} /* */ /=a/").toEval();
|
||||
expect("{} /* /a/ */ /=a/").toEval();
|
||||
|
||||
expect("(function () {} / 1)").toEval();
|
||||
expect("(function () {} / 1)").toEval();
|
||||
|
||||
expect("+a++ / 1").toEval();
|
||||
expect("+a-- / 1").toEval();
|
||||
expect("a.in / b").toEval();
|
||||
expect("a.instanceof / b").toEval();
|
||||
|
||||
// FIXME: Even more 'reserved' words are valid however the cases below do still need to pass.
|
||||
//expect("a.void / b").toEval();
|
||||
expect("void / b/").toEval();
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue