LibJS: Force the lexer to parse a regex when expecting a statement

This commit is contained in:
davidot 2021-07-29 23:28:28 +02:00 committed by Linus Groh
parent 05444103e3
commit 106f9e30d7
Notes: sideshowbarker 2024-07-18 05:37:03 +09:00
4 changed files with 103 additions and 21 deletions

View file

@ -360,10 +360,15 @@ bool Lexer::slash_means_division() const
return type == TokenType::BigIntLiteral
|| type == TokenType::BoolLiteral
|| type == TokenType::BracketClose
|| type == TokenType::CurlyClose
|| type == TokenType::Identifier
|| type == TokenType::In
|| type == TokenType::Instanceof
|| type == TokenType::MinusMinus
|| type == TokenType::NullLiteral
|| type == TokenType::NumericLiteral
|| type == TokenType::ParenClose
|| type == TokenType::PlusPlus
|| type == TokenType::RegexLiteral
|| type == TokenType::StringLiteral
|| type == TokenType::TemplateLiteralEnd
@ -563,27 +568,7 @@ Token Lexer::next()
}
} else if (m_current_char == '/' && !slash_means_division()) {
consume();
token_type = TokenType::RegexLiteral;
while (!is_eof()) {
if (m_current_char == '[') {
m_regex_is_in_character_class = true;
} else if (m_current_char == ']') {
m_regex_is_in_character_class = false;
} else if (!m_regex_is_in_character_class && m_current_char == '/') {
break;
}
if (match('\\', '/') || match('\\', '[') || match('\\', '\\') || (m_regex_is_in_character_class && match('\\', ']')))
consume();
consume();
}
if (is_eof()) {
token_type = TokenType::UnterminatedRegexLiteral;
} else {
consume();
}
token_type = consume_regex_literal();
} else if (m_eof) {
if (unterminated_comment) {
token_type = TokenType::Invalid;
@ -677,4 +662,73 @@ Token Lexer::next()
return m_current_token;
}
Token Lexer::force_slash_as_regex()
{
VERIFY(m_current_token.type() == TokenType::Slash || m_current_token.type() == TokenType::SlashEquals);
bool has_equals = m_current_token.type() == TokenType::SlashEquals;
VERIFY(m_position > 0);
size_t value_start = m_position - 1;
if (has_equals) {
VERIFY(m_source[value_start - 1] == '=');
--value_start;
--m_position;
m_current_char = '=';
}
TokenType token_type = consume_regex_literal();
m_current_token = Token(
token_type,
"",
m_current_token.trivia(),
m_source.substring_view(value_start - 1, m_position - value_start),
m_filename,
m_current_token.line_number(),
m_current_token.line_column(),
m_position);
if constexpr (LEXER_DEBUG) {
dbgln("------------------------------");
dbgln("Token: {}", m_current_token.name());
dbgln("Trivia: _{}_", m_current_token.trivia());
dbgln("Value: _{}_", m_current_token.value());
dbgln("Line: {}, Column: {}", m_current_token.line_number(), m_current_token.line_column());
dbgln("------------------------------");
}
return m_current_token;
}
TokenType Lexer::consume_regex_literal()
{
TokenType token_type = TokenType::RegexLiteral;
while (!is_eof()) {
if (is_line_terminator() || (!m_regex_is_in_character_class && m_current_char == '/')) {
break;
} else if (m_current_char == '[') {
m_regex_is_in_character_class = true;
} else if (m_current_char == ']') {
m_regex_is_in_character_class = false;
} else if (!m_regex_is_in_character_class && m_current_char == '/') {
break;
}
if (match('\\', '/') || match('\\', '[') || match('\\', '\\') || (m_regex_is_in_character_class && match('\\', ']')))
consume();
consume();
}
if (m_current_char == '/') {
consume();
return TokenType::RegexLiteral;
} else {
return TokenType::UnterminatedRegexLiteral;
}
return token_type;
}
}

View file

@ -25,6 +25,8 @@ public:
void disallow_html_comments() { m_allow_html_comments = false; };
Token force_slash_as_regex();
private:
void consume();
bool consume_exponent();
@ -47,6 +49,8 @@ private:
bool match_numeric_literal_separator_followed_by(Callback) const;
bool slash_means_division() const;
TokenType consume_regex_literal();
StringView m_source;
size_t m_position { 0 };
Token m_current_token;

View file

@ -400,6 +400,10 @@ NonnullRefPtr<Statement> Parser::parse_statement(AllowLabelledFunction allow_lab
case TokenType::Semicolon:
consume();
return create_ast_node<EmptyStatement>({ m_state.current_token.filename(), rule_start.position(), position() });
case TokenType::Slash:
case TokenType::SlashEquals:
m_state.current_token = m_state.lexer.force_slash_as_regex();
[[fallthrough]];
default:
if (match_identifier_name()) {
auto result = try_parse_labelled_statement(allow_labelled_function);
@ -2556,6 +2560,8 @@ bool Parser::match_expression() const
|| type == TokenType::This
|| type == TokenType::Super
|| type == TokenType::RegexLiteral
|| type == TokenType::Slash // Wrongly recognized regex by lexer
|| type == TokenType::SlashEquals // Wrongly recognized regex by lexer (/=a/ is a valid regex)
|| type == TokenType::Yield
|| match_unary_prefixed_expression();
}

View file

@ -3,4 +3,22 @@ test("slash token resolution in lexer", () => {
expect("``/foo/").not.toEval();
expect("1/foo/").not.toEval();
expect("1/foo").toEval();
expect("{} /foo/").toEval();
expect("{} /=/").toEval();
expect("{} /=a/").toEval();
expect("{} /* */ /=a/").toEval();
expect("{} /* /a/ */ /=a/").toEval();
expect("(function () {} / 1)").toEval();
expect("(function () {} / 1)").toEval();
expect("+a++ / 1").toEval();
expect("+a-- / 1").toEval();
expect("a.in / b").toEval();
expect("a.instanceof / b").toEval();
// FIXME: Even more 'reserved' words are valid however the cases below do still need to pass.
//expect("a.void / b").toEval();
expect("void / b/").toEval();
});