2020-03-11 18:27:43 +00:00
/*
2021-05-29 10:38:28 +00:00
* Copyright ( c ) 2020 , Stephan Unverwerth < s . unverwerth @ serenityos . org >
2022-01-16 22:51:28 +00:00
* Copyright ( c ) 2021 - 2022 , David Tuin < davidot @ serenityos . org >
2024-10-04 11:19:50 +00:00
* Copyright ( c ) 2023 , Andreas Kling < andreas @ ladybird . org >
2020-03-11 18:27:43 +00:00
*
2021-04-22 08:24:48 +00:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-03-11 18:27:43 +00:00
*/
# pragma once
2022-02-16 06:34:59 +00:00
# include <AK/Assertions.h>
2020-10-08 17:49:08 +00:00
# include <AK/HashTable.h>
2020-03-18 10:23:53 +00:00
# include <AK/NonnullRefPtr.h>
2020-05-26 11:57:50 +00:00
# include <AK/StringBuilder.h>
2020-05-28 05:22:08 +00:00
# include <LibJS/AST.h>
# include <LibJS/Lexer.h>
2022-11-23 11:39:23 +00:00
# include <LibJS/ParserError.h>
2022-01-15 16:26:06 +00:00
# include <LibJS/Runtime/FunctionConstructor.h>
2020-12-28 17:15:22 +00:00
# include <LibJS/SourceRange.h>
2022-02-16 06:34:59 +00:00
# include <LibJS/Token.h>
# include <initializer_list>
2020-05-14 15:26:01 +00:00
# include <stdio.h>
2020-03-11 18:27:43 +00:00
namespace JS {
2020-03-12 22:02:41 +00:00
enum class Associativity {
Left ,
Right
} ;
2020-10-20 16:56:49 +00:00
struct FunctionNodeParseOptions {
2022-09-01 22:46:37 +00:00
enum : u16 {
2020-10-20 16:56:49 +00:00
CheckForFunctionAndName = 1 < < 0 ,
AllowSuperPropertyLookup = 1 < < 1 ,
AllowSuperConstructorCall = 1 < < 2 ,
2020-10-20 17:32:51 +00:00
IsGetterFunction = 1 < < 3 ,
IsSetterFunction = 1 < < 4 ,
2020-10-25 11:14:04 +00:00
IsArrowFunction = 1 < < 5 ,
2021-06-14 10:22:59 +00:00
IsGeneratorFunction = 1 < < 6 ,
2021-11-09 18:39:22 +00:00
IsAsyncFunction = 1 < < 7 ,
2022-09-01 22:46:37 +00:00
HasDefaultExportName = 1 < < 8 ,
2020-10-20 16:56:49 +00:00
} ;
} ;
2021-09-22 10:44:56 +00:00
class ScopePusher ;
2020-03-11 18:27:43 +00:00
class Parser {
public :
2022-04-09 23:55:45 +00:00
struct EvalInitialState {
bool in_eval_function_context { false } ;
bool allow_super_property_lookup { false } ;
bool allow_super_constructor_call { false } ;
bool in_class_field_initializer { false } ;
} ;
explicit Parser ( Lexer lexer , Program : : Type program_type = Program : : Type : : Script , Optional < EvalInitialState > initial_state_for_eval = { } ) ;
2020-03-11 18:27:43 +00:00
2021-06-20 03:13:53 +00:00
NonnullRefPtr < Program > parse_program ( bool starts_in_strict_mode = false ) ;
2020-03-11 18:27:43 +00:00
2020-03-19 10:52:56 +00:00
template < typename FunctionNodeType >
2022-09-01 22:46:37 +00:00
NonnullRefPtr < FunctionNodeType > parse_function_node ( u16 parse_options = FunctionNodeParseOptions : : CheckForFunctionAndName , Optional < Position > const & function_start = { } ) ;
2022-11-23 12:12:36 +00:00
Vector < FunctionParameter > parse_formal_parameters ( int & function_length , u16 parse_options = 0 ) ;
2021-09-17 23:11:32 +00:00
enum class AllowDuplicates {
Yes ,
No
} ;
enum class AllowMemberExpressions {
Yes ,
No
} ;
2023-02-19 21:07:52 +00:00
RefPtr < BindingPattern const > parse_binding_pattern ( AllowDuplicates is_var_declaration = AllowDuplicates : : No , AllowMemberExpressions allow_member_expressions = AllowMemberExpressions : : No ) ;
2020-03-19 10:52:56 +00:00
2021-06-14 11:16:41 +00:00
struct PrimaryExpressionParseResult {
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Expression const > result ;
2021-06-14 11:16:41 +00:00
bool should_continue_parsing_as_expression { true } ;
} ;
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Declaration const > parse_declaration ( ) ;
2021-07-24 23:01:22 +00:00
enum class AllowLabelledFunction {
No ,
Yes
} ;
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Statement const > parse_statement ( AllowLabelledFunction allow_labelled_function = AllowLabelledFunction : : No ) ;
NonnullRefPtr < BlockStatement const > parse_block_statement ( ) ;
2024-05-22 10:04:50 +00:00
NonnullRefPtr < FunctionBody const > parse_function_body ( Vector < FunctionParameter > const & parameters , FunctionKind function_kind , FunctionParsingInsights & ) ;
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ReturnStatement const > parse_return_statement ( ) ;
2022-12-20 21:09:57 +00:00
enum class IsForLoopVariableDeclaration {
No ,
Yes
} ;
2023-02-19 21:07:52 +00:00
NonnullRefPtr < VariableDeclaration const > parse_variable_declaration ( IsForLoopVariableDeclaration is_for_loop_variable_declaration = IsForLoopVariableDeclaration : : No ) ;
2024-06-13 19:31:31 +00:00
[ [ nodiscard ] ] RefPtr < Identifier const > parse_lexical_binding ( Optional < DeclarationKind > = { } ) ;
2023-02-19 21:07:52 +00:00
NonnullRefPtr < UsingDeclaration const > parse_using_declaration ( IsForLoopVariableDeclaration is_for_loop_variable_declaration = IsForLoopVariableDeclaration : : No ) ;
NonnullRefPtr < Statement const > parse_for_statement ( ) ;
2021-11-23 15:09:28 +00:00
enum class IsForAwaitLoop {
No ,
Yes
} ;
2022-02-16 06:34:59 +00:00
struct ForbiddenTokens {
ForbiddenTokens ( std : : initializer_list < TokenType > const & forbidden ) ;
ForbiddenTokens merge ( ForbiddenTokens other ) const ;
bool allows ( TokenType token ) const ;
ForbiddenTokens forbid ( std : : initializer_list < TokenType > const & forbidden ) const ;
private :
void forbid_tokens ( std : : initializer_list < TokenType > const & forbidden ) ;
bool m_forbid_in_token : 1 { false } ;
bool m_forbid_logical_tokens : 1 { false } ;
bool m_forbid_coalesce_token : 1 { false } ;
bool m_forbid_paren_open : 1 { false } ;
bool m_forbid_question_mark_period : 1 { false } ;
bool m_forbid_equals : 1 { false } ;
} ;
struct ExpressionResult {
2023-02-19 21:07:52 +00:00
template < typename T >
ExpressionResult ( NonnullRefPtr < T const > expression , ForbiddenTokens forbidden = { } )
: expression ( move ( expression ) )
, forbidden ( forbidden )
{
}
2022-02-16 06:34:59 +00:00
template < typename T >
ExpressionResult ( NonnullRefPtr < T > expression , ForbiddenTokens forbidden = { } )
2023-02-19 21:07:52 +00:00
: expression ( move ( expression ) )
2022-02-16 06:34:59 +00:00
, forbidden ( forbidden )
{
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Expression const > expression ;
2022-02-16 06:34:59 +00:00
ForbiddenTokens forbidden ;
} ;
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Statement const > parse_for_in_of_statement ( NonnullRefPtr < ASTNode const > lhs , IsForAwaitLoop is_await ) ;
NonnullRefPtr < IfStatement const > parse_if_statement ( ) ;
NonnullRefPtr < ThrowStatement const > parse_throw_statement ( ) ;
NonnullRefPtr < TryStatement const > parse_try_statement ( ) ;
NonnullRefPtr < CatchClause const > parse_catch_clause ( ) ;
NonnullRefPtr < SwitchStatement const > parse_switch_statement ( ) ;
NonnullRefPtr < SwitchCase const > parse_switch_case ( ) ;
NonnullRefPtr < BreakStatement const > parse_break_statement ( ) ;
NonnullRefPtr < ContinueStatement const > parse_continue_statement ( ) ;
NonnullRefPtr < DoWhileStatement const > parse_do_while_statement ( ) ;
NonnullRefPtr < WhileStatement const > parse_while_statement ( ) ;
NonnullRefPtr < WithStatement const > parse_with_statement ( ) ;
NonnullRefPtr < DebuggerStatement const > parse_debugger_statement ( ) ;
NonnullRefPtr < ConditionalExpression const > parse_conditional_expression ( NonnullRefPtr < Expression const > test , ForbiddenTokens ) ;
NonnullRefPtr < OptionalChain const > parse_optional_chain ( NonnullRefPtr < Expression const > base ) ;
NonnullRefPtr < Expression const > parse_expression ( int min_precedence , Associativity associate = Associativity : : Right , ForbiddenTokens forbidden = { } ) ;
2021-06-14 11:16:41 +00:00
PrimaryExpressionParseResult parse_primary_expression ( ) ;
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Expression const > parse_unary_prefixed_expression ( ) ;
NonnullRefPtr < RegExpLiteral const > parse_regexp_literal ( ) ;
NonnullRefPtr < ObjectExpression const > parse_object_expression ( ) ;
NonnullRefPtr < ArrayExpression const > parse_array_expression ( ) ;
2022-08-17 00:04:27 +00:00
enum class StringLiteralType {
Normal ,
NonTaggedTemplate ,
TaggedTemplate
} ;
2023-02-19 21:07:52 +00:00
NonnullRefPtr < StringLiteral const > parse_string_literal ( Token const & token , StringLiteralType string_literal_type = StringLiteralType : : Normal , bool * contains_invalid_escape = nullptr ) ;
NonnullRefPtr < TemplateLiteral const > parse_template_literal ( bool is_tagged ) ;
ExpressionResult parse_secondary_expression ( NonnullRefPtr < Expression const > , int min_precedence , Associativity associate = Associativity : : Right , ForbiddenTokens forbidden = { } ) ;
NonnullRefPtr < Expression const > parse_call_expression ( NonnullRefPtr < Expression const > ) ;
NonnullRefPtr < NewExpression const > parse_new_expression ( ) ;
NonnullRefPtr < ClassDeclaration const > parse_class_declaration ( ) ;
NonnullRefPtr < ClassExpression const > parse_class_expression ( bool expect_class_name ) ;
NonnullRefPtr < YieldExpression const > parse_yield_expression ( ) ;
NonnullRefPtr < AwaitExpression const > parse_await_expression ( ) ;
NonnullRefPtr < Expression const > parse_property_key ( ) ;
NonnullRefPtr < AssignmentExpression const > parse_assignment_expression ( AssignmentOp , NonnullRefPtr < Expression const > lhs , int min_precedence , Associativity , ForbiddenTokens forbidden = { } ) ;
NonnullRefPtr < Identifier const > parse_identifier ( ) ;
NonnullRefPtr < ImportStatement const > parse_import_statement ( Program & program ) ;
NonnullRefPtr < ExportStatement const > parse_export_statement ( Program & program ) ;
RefPtr < FunctionExpression const > try_parse_arrow_function_expression ( bool expect_parens , bool is_async = false ) ;
RefPtr < LabelledStatement const > try_parse_labelled_statement ( AllowLabelledFunction allow_function ) ;
RefPtr < MetaProperty const > try_parse_new_target_expression ( ) ;
RefPtr < MetaProperty const > try_parse_import_meta_expression ( ) ;
NonnullRefPtr < ImportCall const > parse_import_call ( ) ;
2020-11-02 21:27:42 +00:00
2021-09-14 02:26:31 +00:00
Vector < CallExpression : : Argument > parse_arguments ( ) ;
2021-06-19 12:43:09 +00:00
bool has_errors ( ) const { return m_state . errors . size ( ) ; }
2022-11-23 11:39:23 +00:00
Vector < ParserError > const & errors ( ) const { return m_state . errors ; }
2021-07-19 15:56:21 +00:00
void print_errors ( bool print_hint = true ) const
2020-05-14 15:26:01 +00:00
{
2021-06-19 12:43:09 +00:00
for ( auto & error : m_state . errors ) {
2021-07-19 15:56:21 +00:00
if ( print_hint ) {
auto hint = error . source_location_hint ( m_state . lexer . source ( ) ) ;
if ( ! hint . is_empty ( ) )
warnln ( " {} " , hint ) ;
}
2023-12-16 14:19:34 +00:00
warnln ( " SyntaxError: {} " , error . to_byte_string ( ) ) ;
2020-12-06 14:50:39 +00:00
}
2020-05-14 15:26:01 +00:00
}
2020-03-11 18:27:43 +00:00
2021-04-11 20:41:51 +00:00
struct TokenMemoization {
bool try_parse_arrow_function_expression_failed ;
} ;
2022-01-15 16:26:06 +00:00
// Needs to mess with m_state, and we're not going to expose a non-const getter for that :^)
2024-11-14 15:01:23 +00:00
friend ThrowCompletionOr < GC : : Ref < ECMAScriptFunctionObject > > FunctionConstructor : : create_dynamic_function ( VM & , FunctionObject & , FunctionObject * , FunctionKind , ReadonlySpan < String > parameter_args , String const & body_arg ) ;
2022-01-15 16:26:06 +00:00
2024-05-22 10:04:50 +00:00
static Parser parse_function_body_from_string ( ByteString const & body_string , u16 parse_options , Vector < FunctionParameter > const & parameters , FunctionKind kind , FunctionParsingInsights & ) ;
2023-07-07 21:14:03 +00:00
2020-03-11 18:27:43 +00:00
private :
2020-04-13 14:42:54 +00:00
friend class ScopePusher ;
2021-11-26 23:01:23 +00:00
void parse_script ( Program & program , bool starts_in_strict_mode ) ;
void parse_module ( Program & program ) ;
2020-04-13 14:42:54 +00:00
2020-03-12 22:02:41 +00:00
Associativity operator_associativity ( TokenType ) const ;
2020-03-11 18:27:43 +00:00
bool match_expression ( ) const ;
2020-03-14 18:45:51 +00:00
bool match_unary_prefixed_expression ( ) const ;
2022-02-16 06:34:59 +00:00
bool match_secondary_expression ( ForbiddenTokens forbidden = { } ) const ;
2020-03-11 18:27:43 +00:00
bool match_statement ( ) const ;
2021-08-14 15:42:30 +00:00
bool match_export_or_import ( ) const ;
2023-12-02 15:20:01 +00:00
bool match_with_clause ( ) const ;
2022-12-20 21:09:57 +00:00
enum class AllowUsingDeclaration {
No ,
Yes
} ;
bool match_declaration ( AllowUsingDeclaration allow_using = AllowUsingDeclaration : : No ) const ;
2021-10-07 22:38:24 +00:00
bool try_match_let_declaration ( ) const ;
2022-12-20 21:09:57 +00:00
bool try_match_using_declaration ( ) const ;
2021-10-07 22:38:24 +00:00
bool match_variable_declaration ( ) const ;
2021-07-11 11:04:55 +00:00
bool match_identifier ( ) const ;
2022-12-23 00:45:29 +00:00
bool token_is_identifier ( Token const & ) const ;
2020-04-18 18:31:27 +00:00
bool match_identifier_name ( ) const ;
2020-06-08 18:31:21 +00:00
bool match_property_key ( ) const ;
2021-10-12 20:45:52 +00:00
bool is_private_identifier_valid ( ) const ;
2020-03-11 18:27:43 +00:00
bool match ( TokenType type ) const ;
bool done ( ) const ;
2022-04-01 17:58:27 +00:00
void expected ( char const * what ) ;
2023-12-16 14:19:34 +00:00
void syntax_error ( ByteString const & message , Optional < Position > = { } ) ;
2020-03-11 18:27:43 +00:00
Token consume ( ) ;
2023-05-27 22:08:52 +00:00
Token consume_and_allow_division ( ) ;
2021-07-11 11:04:55 +00:00
Token consume_identifier ( ) ;
Token consume_identifier_reference ( ) ;
2020-03-11 18:27:43 +00:00
Token consume ( TokenType type ) ;
2020-10-19 17:01:28 +00:00
Token consume_and_validate_numeric_literal ( ) ;
2020-04-17 13:05:58 +00:00
void consume_or_insert_semicolon ( ) ;
2020-03-30 13:24:43 +00:00
void save_state ( ) ;
void load_state ( ) ;
2020-12-29 13:17:39 +00:00
void discard_saved_state ( ) ;
2020-11-02 21:03:19 +00:00
Position position ( ) const ;
2020-03-11 18:27:43 +00:00
2023-02-19 21:07:52 +00:00
RefPtr < BindingPattern const > synthesize_binding_pattern ( Expression const & expression ) ;
2021-09-17 23:11:32 +00:00
2022-01-16 22:51:28 +00:00
Token next_token ( size_t steps = 1 ) const ;
2021-08-28 15:04:37 +00:00
2023-01-09 00:23:00 +00:00
void check_identifier_name_for_assignment_validity ( DeprecatedFlyString const & , bool force_strict = false ) ;
2021-07-11 11:04:55 +00:00
2022-04-01 17:58:27 +00:00
bool try_parse_arrow_function_expression_failed_at_position ( Position const & ) const ;
void set_try_parse_arrow_function_expression_failed_at_position ( Position const & , bool ) ;
2021-04-11 20:41:51 +00:00
2021-09-18 21:02:50 +00:00
bool match_invalid_escaped_keyword ( ) const ;
2021-09-22 10:44:56 +00:00
bool parse_directive ( ScopeNode & body ) ;
void parse_statement_list ( ScopeNode & output_node , AllowLabelledFunction allow_labelled_functions = AllowLabelledFunction : : No ) ;
2022-01-16 22:51:28 +00:00
2023-01-09 00:23:00 +00:00
DeprecatedFlyString consume_string_value ( ) ;
2022-01-16 22:51:28 +00:00
ModuleRequest parse_module_request ( ) ;
2021-09-22 10:44:56 +00:00
2020-12-28 17:15:22 +00:00
struct RulePosition {
2020-12-29 05:12:02 +00:00
AK_MAKE_NONCOPYABLE ( RulePosition ) ;
AK_MAKE_NONMOVABLE ( RulePosition ) ;
public :
2020-12-28 17:15:22 +00:00
RulePosition ( Parser & parser , Position position )
: m_parser ( parser )
, m_position ( position )
{
2020-12-29 13:17:39 +00:00
m_parser . m_rule_starts . append ( position ) ;
2020-12-28 17:15:22 +00:00
}
~ RulePosition ( )
{
2020-12-29 13:17:39 +00:00
auto last = m_parser . m_rule_starts . take_last ( ) ;
2021-02-23 19:42:32 +00:00
VERIFY ( last . line = = m_position . line ) ;
VERIFY ( last . column = = m_position . column ) ;
2020-12-28 17:15:22 +00:00
}
2022-04-01 17:58:27 +00:00
Position const & position ( ) const { return m_position ; }
2020-12-28 17:15:22 +00:00
private :
Parser & m_parser ;
Position m_position ;
} ;
[ [ nodiscard ] ] RulePosition push_start ( ) { return { * this , position ( ) } ; }
2020-03-30 13:24:43 +00:00
struct ParserState {
2021-06-19 12:43:09 +00:00
Lexer lexer ;
Token current_token ;
2023-11-16 11:21:20 +00:00
bool previous_token_was_period { false } ;
2022-11-23 11:39:23 +00:00
Vector < ParserError > errors ;
2021-10-07 23:55:24 +00:00
ScopePusher * current_scope_pusher { nullptr } ;
2021-06-14 07:30:43 +00:00
2021-09-18 21:01:54 +00:00
HashMap < StringView , Optional < Position > > labels_in_scope ;
2022-11-27 01:24:38 +00:00
HashMap < size_t , Position > invalid_property_range_in_object_expression ;
2021-10-12 20:45:52 +00:00
HashTable < StringView > * referenced_private_names { nullptr } ;
2021-06-19 12:43:09 +00:00
bool strict_mode { false } ;
bool allow_super_property_lookup { false } ;
bool allow_super_constructor_call { false } ;
bool in_function_context { false } ;
2023-07-12 02:02:27 +00:00
bool initiated_by_eval { false } ;
2022-04-09 23:55:45 +00:00
bool in_eval_function_context { false } ; // This controls if we allow new.target or not. Note that eval("return") is not allowed, so we have to have a separate state variable for eval.
2021-08-21 09:31:36 +00:00
bool in_formal_parameter_context { false } ;
2023-07-12 02:02:27 +00:00
bool in_catch_parameter_context { false } ;
2021-06-19 12:43:09 +00:00
bool in_generator_function_context { false } ;
2021-11-26 22:50:32 +00:00
bool await_expression_is_valid { false } ;
2021-06-19 12:43:09 +00:00
bool in_arrow_function_context { false } ;
bool in_break_context { false } ;
bool in_continue_context { false } ;
bool string_legacy_octal_escape_sequence_in_scope { false } ;
2021-08-28 15:11:05 +00:00
bool in_class_field_initializer { false } ;
2021-11-09 20:52:21 +00:00
bool in_class_static_init_block { false } ;
LibJS: Add an optimization to avoid needless arguments object creation
This gives FunctionNode a "might need arguments object" boolean flag and
sets it based on the simplest possible heuristic for this: if we
encounter an identifier called "arguments" or "eval" up to the next
(nested) function declaration or expression, we won't need an arguments
object. Otherwise, we *might* need one - the final decision is made in
the FunctionDeclarationInstantiation AO.
Now, this is obviously not perfect. Even if you avoid eval, something
like `foo.arguments` will still trigger a false positive - but it's a
start and already massively cuts down on needlessly allocated objects,
especially in real-world code that is often minified, and so a full
"arguments" identifier will be an actual arguments object more often
than not.
To illustrate the actual impact of this change, here's the number of
allocated arguments objects during a full test-js run:
Before:
- Unmapped arguments objects: 78765
- Mapped arguments objects: 2455
After:
- Unmapped arguments objects: 18
- Mapped arguments objects: 37
This results in a ~5% speedup of test-js on my Linux host machine, and
about 3.5% on i686 Serenity in QEMU (warm runs, average of 5).
The following microbenchmark (calling an empty function 1M times) runs
25% faster on Linux and 45% on Serenity:
function foo() {}
for (var i = 0; i < 1_000_000; ++i)
foo();
test262 reports no changes in either direction, apart from a speedup :^)
2021-10-05 07:44:58 +00:00
bool function_might_need_arguments_object { false } ;
2020-03-30 13:24:43 +00:00
2021-08-14 15:30:37 +00:00
ParserState ( Lexer , Program : : Type ) ;
2020-03-30 13:24:43 +00:00
} ;
2024-06-13 19:31:31 +00:00
[ [ nodiscard ] ] NonnullRefPtr < Identifier const > create_identifier_and_register_in_current_scope ( SourceRange range , DeprecatedFlyString string , Optional < DeclarationKind > = { } ) ;
2023-07-04 22:14:41 +00:00
2023-02-19 21:07:52 +00:00
NonnullRefPtr < SourceCode const > m_source_code ;
2020-12-29 13:17:39 +00:00
Vector < Position > m_rule_starts ;
2021-06-19 12:43:09 +00:00
ParserState m_state ;
2023-01-09 00:23:00 +00:00
DeprecatedFlyString m_filename ;
2020-05-02 18:46:39 +00:00
Vector < ParserState > m_saved_state ;
2024-09-02 09:42:48 +00:00
HashMap < size_t , TokenMemoization > m_token_memoizations ;
2021-08-14 15:30:37 +00:00
Program : : Type m_program_type ;
2020-03-11 18:27:43 +00:00
} ;
}