Forráskód Böngészése

LibJS: Parse slashes after reserved identifiers correctly

Previously we were unable to parse code like `yield/2` because `/2`
was parsed as a regex. At the same time `for (a in / b/)` was parsed
as a division.

This is solved by defaulting to division in the lexer, but calling
`force_slash_as_regex()` from the parser whenever an IdentifierName
is parsed as a ReservedWord.
Simon Wanner 2 éve
szülő
commit
a2efecac03

+ 3 - 8
Userland/Libraries/LibJS/Lexer.cpp

@@ -500,23 +500,18 @@ bool Lexer::is_numeric_literal_start() const
 bool Lexer::slash_means_division() const
 {
     auto type = m_current_token.type();
-    return type == TokenType::BigIntLiteral
-        || type == TokenType::BoolLiteral
+    return m_current_token.is_identifier_name()
+        || type == TokenType::BigIntLiteral
         || type == TokenType::BracketClose
         || type == TokenType::CurlyClose
-        || type == TokenType::Identifier
-        || type == TokenType::In
-        || type == TokenType::Instanceof
         || type == TokenType::MinusMinus
-        || type == TokenType::NullLiteral
         || type == TokenType::NumericLiteral
         || type == TokenType::ParenClose
         || type == TokenType::PlusPlus
         || type == TokenType::PrivateIdentifier
         || type == TokenType::RegexLiteral
         || type == TokenType::StringLiteral
-        || type == TokenType::TemplateLiteralEnd
-        || type == TokenType::This;
+        || type == TokenType::TemplateLiteralEnd;
 }
 
 Token Lexer::next()

+ 27 - 15
Userland/Libraries/LibJS/Parser.cpp

@@ -1458,11 +1458,11 @@ Parser::PrimaryExpressionParseResult Parser::parse_primary_expression()
     case TokenType::BigIntLiteral:
         return { create_ast_node<BigIntLiteral>({ m_source_code, rule_start.position(), position() }, consume().value()) };
     case TokenType::BoolLiteral:
-        return { create_ast_node<BooleanLiteral>({ m_source_code, rule_start.position(), position() }, consume().bool_value()) };
+        return { create_ast_node<BooleanLiteral>({ m_source_code, rule_start.position(), position() }, consume_and_allow_division().bool_value()) };
     case TokenType::StringLiteral:
         return { parse_string_literal(consume()) };
     case TokenType::NullLiteral:
-        consume();
+        consume_and_allow_division();
         return { create_ast_node<NullLiteral>({ m_source_code, rule_start.position(), position() }) };
     case TokenType::CurlyOpen:
         return { parse_object_expression() };
@@ -2168,7 +2168,7 @@ Parser::ExpressionResult Parser::parse_secondary_expression(NonnullRefPtr<Expres
             expected("IdentifierName");
         }
 
-        return create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), create_ast_node<Identifier>({ m_source_code, rule_start.position(), position() }, consume().DeprecatedFlyString_value()));
+        return create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), create_ast_node<Identifier>({ m_source_code, rule_start.position(), position() }, consume_and_allow_division().DeprecatedFlyString_value()));
     case TokenType::BracketOpen: {
         consume(TokenType::BracketOpen);
         auto expression = create_ast_node<MemberExpression>({ m_source_code, rule_start.position(), position() }, move(lhs), parse_expression(0), true);
@@ -4010,6 +4010,18 @@ bool Parser::done() const
 }
 
 Token Parser::consume()
+{
+    auto old_token = m_state.current_token;
+    m_state.current_token = m_state.lexer.next();
+
+    // If an IdentifierName is not parsed as an Identifier a slash after it should not be a division
+    if (old_token.is_identifier_name() && (m_state.current_token.type() == TokenType::Slash || m_state.current_token.type() == TokenType::SlashEquals)) {
+        m_state.current_token = m_state.lexer.force_slash_as_regex();
+    }
+    return old_token;
+}
+
+Token Parser::consume_and_allow_division()
 {
     auto old_token = m_state.current_token;
     m_state.current_token = m_state.lexer.next();
@@ -4057,26 +4069,26 @@ Token Parser::consume_identifier()
     if (match(TokenType::Let)) {
         if (m_state.strict_mode)
             syntax_error("'let' is not allowed as an identifier in strict mode");
-        return consume();
+        return consume_and_allow_division();
     }
 
     if (match(TokenType::Yield)) {
         if (m_state.strict_mode || m_state.in_generator_function_context)
             syntax_error("Identifier must not be a reserved word in strict mode ('yield')");
-        return consume();
+        return consume_and_allow_division();
     }
 
     if (match(TokenType::Await)) {
         if (m_program_type == Program::Type::Module || m_state.await_expression_is_valid || m_state.in_class_static_init_block)
             syntax_error("Identifier must not be a reserved word in modules ('await')");
-        return consume();
+        return consume_and_allow_division();
     }
 
     if (match(TokenType::Async))
-        return consume();
+        return consume_and_allow_division();
 
     expected("Identifier");
-    return consume();
+    return consume_and_allow_division();
 }
 
 // https://tc39.es/ecma262/#prod-IdentifierReference
@@ -4092,33 +4104,33 @@ Token Parser::consume_identifier_reference()
         if (m_program_type == Program::Type::Module && name == "await"sv)
             syntax_error("'await' is not allowed as an identifier in module");
 
-        return consume();
+        return consume_and_allow_division();
     }
 
     // See note in Parser::parse_identifier().
     if (match(TokenType::Let)) {
         if (m_state.strict_mode)
             syntax_error("'let' is not allowed as an identifier in strict mode");
-        return consume();
+        return consume_and_allow_division();
     }
 
     if (match(TokenType::Yield)) {
         if (m_state.strict_mode)
             syntax_error("Identifier reference may not be 'yield' in strict mode");
-        return consume();
+        return consume_and_allow_division();
     }
 
     if (match(TokenType::Await)) {
         if (m_program_type == Program::Type::Module)
             syntax_error("'await' is not allowed as an identifier in module");
-        return consume();
+        return consume_and_allow_division();
     }
 
     if (match(TokenType::Async))
-        return consume();
+        return consume_and_allow_division();
 
     expected(Token::name(TokenType::Identifier));
-    return consume();
+    return consume_and_allow_division();
 }
 
 Token Parser::consume(TokenType expected_type)
@@ -4126,7 +4138,7 @@ Token Parser::consume(TokenType expected_type)
     if (!match(expected_type)) {
         expected(Token::name(expected_type));
     }
-    auto token = consume();
+    auto token = expected_type == TokenType::Identifier ? consume_and_allow_division() : consume();
     if (expected_type == TokenType::Identifier) {
         if (m_state.strict_mode && is_strict_reserved_word(token.value()))
             syntax_error(DeprecatedString::formatted("Identifier must not be a reserved word in strict mode ('{}')", token.value()));

+ 1 - 0
Userland/Libraries/LibJS/Parser.h

@@ -244,6 +244,7 @@ private:
     void expected(char const* what);
     void syntax_error(DeprecatedString const& message, Optional<Position> = {});
     Token consume();
+    Token consume_and_allow_division();
     Token consume_identifier();
     Token consume_identifier_reference();
     Token consume(TokenType type);

+ 6 - 0
Userland/Libraries/LibJS/Tests/modules/basic-modules.js

@@ -210,6 +210,12 @@ describe("in- and exports", () => {
     test("can have top level using declarations which trigger at the end of running a module", () => {
         expectModulePassed("./top-level-dispose.mjs");
     });
+
+    test("can export default a RegExp", () => {
+        const result = expectModulePassed("./default-regexp-export.mjs");
+        expect(result.default).toBeInstanceOf(RegExp);
+        expect(result.default.toString()).toBe(/foo/.toString());
+    });
 });
 
 describe("loops", () => {

+ 3 - 0
Userland/Libraries/LibJS/Tests/modules/default-regexp-export.mjs

@@ -0,0 +1,3 @@
+export default /foo/;
+
+export let passed = true;

+ 25 - 2
Userland/Libraries/LibJS/Tests/syntax/slash-after-block.js

@@ -19,7 +19,30 @@ test("slash token resolution in lexer", () => {
     expect("a.instanceof / b").toEval();
     expect("class A { #name; d = a.#name / b; }").toEval();
 
-    // FIXME: Even more 'reserved' words are valid however the cases below do still need to pass.
-    //expect("a.void / b").toEval();
+    expect("async / b").toEval();
+    expect("a.delete / b").toEval();
+    expect("delete / b/").toEval();
+    expect("a.in / b").toEval();
+    expect("for (a in / b/) {}").toEval();
+    expect("a.instanceof / b").toEval();
+    expect("a instanceof / b/").toEval();
+    expect("new / b/").toEval();
+    expect("null / b").toEval();
+    expect("for (a of / b/) {}").toEval();
+    expect("a.return / b").toEval();
+    expect("function foo() { return / b/ }").toEval();
+    expect("throw / b/").toEval();
+    expect("a.typeof / b").toEval();
+    expect("a.void / b").toEval();
     expect("void / b/").toEval();
+
+    expect("await / b").toEval();
+    expect("await / b/").not.toEval();
+    expect("async function foo() { await / b }").not.toEval();
+    expect("async function foo() { await / b/ }").toEval();
+
+    expect("yield / b").toEval();
+    expect("yield / b/").not.toEval();
+    expect("function* foo() { yield / b }").not.toEval();
+    expect("function* foo() { yield / b/ }").toEval();
 });