2020-03-11 18:27:43 +00:00
/*
2021-05-29 10:38:28 +00:00
* Copyright ( c ) 2020 , Stephan Unverwerth < s . unverwerth @ serenityos . org >
2022-04-30 20:22:52 +00:00
* Copyright ( c ) 2020 - 2022 , Linus Groh < linusg @ serenityos . org >
2022-01-16 22:51:28 +00:00
* Copyright ( c ) 2021 - 2022 , David Tuin < davidot @ serenityos . org >
2021-10-07 16:43:22 +00:00
* Copyright ( c ) 2021 , Ali Mohammad Pur < mpfard @ serenityos . org >
2021-11-09 20:52:21 +00:00
* Copyright ( c ) 2021 , Idan Horowitz < idan . horowitz @ serenityos . org >
2023-02-19 21:07:52 +00:00
* Copyright ( c ) 2023 , Andreas Kling < kling @ serenityos . org >
2020-03-11 18:27:43 +00:00
*
2021-04-22 08:24:48 +00:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-03-11 18:27:43 +00:00
*/
# include "Parser.h"
2021-06-16 22:57:01 +00:00
# include <AK/Array.h>
2021-06-13 08:47:09 +00:00
# include <AK/CharacterTypes.h>
2021-05-10 11:01:38 +00:00
# include <AK/HashTable.h>
2020-04-13 14:42:54 +00:00
# include <AK/ScopeGuard.h>
2020-03-11 18:27:43 +00:00
# include <AK/StdLibExtras.h>
2020-10-15 18:46:52 +00:00
# include <AK/TemporaryChange.h>
2021-07-29 14:34:37 +00:00
# include <LibJS/Runtime/RegExpObject.h>
# include <LibRegex/Regex.h>
2020-03-11 18:27:43 +00:00
namespace JS {
2020-03-12 22:02:41 +00:00
2020-04-13 14:42:54 +00:00
class ScopePusher {
2022-11-15 00:39:07 +00:00
// NOTE: We really only need ModuleTopLevel and NotModuleTopLevel as the only
// difference seems to be in https://tc39.es/ecma262/#sec-static-semantics-varscopeddeclarations
// where ModuleItemList only does the VarScopedDeclaration and not the
// TopLevelVarScopedDeclarations.
enum class ScopeLevel {
NotTopLevel ,
ScriptTopLevel ,
ModuleTopLevel ,
FunctionTopLevel ,
StaticInitTopLevel
} ;
2023-07-07 21:14:03 +00:00
public :
2023-07-04 22:14:41 +00:00
enum class ScopeType {
Function ,
Program ,
Block ,
ForLoop ,
With ,
Catch ,
ClassStaticInit ,
ClassField ,
ClassDeclaration ,
} ;
2021-09-22 10:44:56 +00:00
private :
2023-07-04 22:14:41 +00:00
ScopePusher ( Parser & parser , ScopeNode * node , ScopeLevel scope_level , ScopeType type )
2020-04-13 14:42:54 +00:00
: m_parser ( parser )
2022-11-15 00:39:07 +00:00
, m_scope_level ( scope_level )
2023-07-04 22:14:41 +00:00
, m_type ( type )
2020-04-13 14:42:54 +00:00
{
2021-09-22 10:44:56 +00:00
m_parent_scope = exchange ( m_parser . m_state . current_scope_pusher , this ) ;
2023-07-07 21:14:03 +00:00
if ( type ! = ScopeType : : Function ) {
VERIFY ( node | | ( m_parent_scope & & scope_level = = ScopeLevel : : NotTopLevel ) ) ;
if ( ! node )
m_node = m_parent_scope - > m_node ;
else
m_node = node ;
}
2021-07-04 01:15:52 +00:00
2022-11-15 00:39:07 +00:00
if ( ! is_top_level ( ) )
2021-09-22 10:44:56 +00:00
m_top_level_scope = m_parent_scope - > m_top_level_scope ;
else
m_top_level_scope = this ;
2020-04-13 14:42:54 +00:00
}
2022-11-15 00:39:07 +00:00
bool is_top_level ( )
{
return m_scope_level ! = ScopeLevel : : NotTopLevel ;
}
2021-09-22 10:44:56 +00:00
public :
2023-07-12 02:02:27 +00:00
static ScopePusher function_scope ( Parser & parser , RefPtr < Identifier const > function_name = nullptr )
2020-04-13 14:42:54 +00:00
{
2023-07-12 02:02:27 +00:00
ScopePusher scope_pusher ( parser , nullptr , ScopeLevel : : FunctionTopLevel , ScopeType : : Function ) ;
if ( function_name ) {
scope_pusher . m_bound_names . set ( function_name - > string ( ) ) ;
}
return scope_pusher ;
2021-09-22 10:44:56 +00:00
}
2021-07-04 01:15:52 +00:00
2021-09-22 10:44:56 +00:00
static ScopePusher program_scope ( Parser & parser , Program & program )
{
2023-07-04 22:14:41 +00:00
return ScopePusher ( parser , & program , program . type ( ) = = Program : : Type : : Script ? ScopeLevel : : ScriptTopLevel : ScopeLevel : : ModuleTopLevel , ScopeType : : Program ) ;
2021-09-22 10:44:56 +00:00
}
2021-07-05 19:45:34 +00:00
2021-09-22 10:44:56 +00:00
static ScopePusher block_scope ( Parser & parser , ScopeNode & node )
{
2023-07-04 22:14:41 +00:00
return ScopePusher ( parser , & node , ScopeLevel : : NotTopLevel , ScopeType : : Block ) ;
2021-07-04 01:15:52 +00:00
}
2023-07-04 22:14:41 +00:00
static ScopePusher for_loop_scope ( Parser & parser , ScopeNode & node )
2021-07-04 01:15:52 +00:00
{
2023-07-04 22:14:41 +00:00
return ScopePusher ( parser , & node , ScopeLevel : : NotTopLevel , ScopeType : : ForLoop ) ;
}
2021-07-04 01:15:52 +00:00
2023-07-04 22:14:41 +00:00
static ScopePusher with_scope ( Parser & parser , ScopeNode & node )
{
ScopePusher scope_pusher ( parser , & node , ScopeLevel : : NotTopLevel , ScopeType : : With ) ;
2021-09-22 10:44:56 +00:00
return scope_pusher ;
}
2021-07-05 19:45:34 +00:00
2023-02-19 21:07:52 +00:00
static ScopePusher catch_scope ( Parser & parser , RefPtr < BindingPattern const > const & pattern , DeprecatedFlyString const & parameter )
2021-09-22 10:44:56 +00:00
{
2023-07-04 22:14:41 +00:00
ScopePusher scope_pusher ( parser , nullptr , ScopeLevel : : NotTopLevel , ScopeType : : Catch ) ;
2021-09-22 10:44:56 +00:00
if ( pattern ) {
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( pattern - > for_each_bound_identifier ( [ & ] ( auto const & identifier ) {
scope_pusher . m_forbidden_var_names . set ( identifier . string ( ) ) ;
scope_pusher . m_bound_names . set ( identifier . string ( ) ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-10-08 10:04:13 +00:00
} else if ( ! parameter . is_empty ( ) ) {
scope_pusher . m_var_names . set ( parameter ) ;
2023-07-04 22:14:41 +00:00
scope_pusher . m_bound_names . set ( parameter ) ;
2021-07-05 19:45:34 +00:00
}
2023-07-04 22:14:41 +00:00
2021-09-22 10:44:56 +00:00
return scope_pusher ;
2021-07-05 19:45:34 +00:00
}
2021-10-20 19:29:47 +00:00
static ScopePusher static_init_block_scope ( Parser & parser , ScopeNode & node )
{
2023-07-04 22:14:41 +00:00
ScopePusher scope_pusher ( parser , & node , ScopeLevel : : StaticInitTopLevel , ScopeType : : ClassStaticInit ) ;
return scope_pusher ;
}
static ScopePusher class_field_scope ( Parser & parser , ScopeNode & node )
{
ScopePusher scope_pusher ( parser , & node , ScopeLevel : : NotTopLevel , ScopeType : : ClassField ) ;
return scope_pusher ;
2021-10-20 19:29:47 +00:00
}
2023-07-04 22:14:41 +00:00
static ScopePusher class_declaration_scope ( Parser & parser , RefPtr < Identifier const > class_name )
2021-10-13 17:59:38 +00:00
{
2023-07-04 22:14:41 +00:00
ScopePusher scope_pusher ( parser , nullptr , ScopeLevel : : NotTopLevel , ScopeType : : ClassDeclaration ) ;
if ( class_name ) {
scope_pusher . m_bound_names . set ( class_name - > string ( ) ) ;
}
return scope_pusher ;
2021-10-13 17:59:38 +00:00
}
2023-07-07 21:14:03 +00:00
ScopeType type ( ) const { return m_type ; }
2023-02-19 21:07:52 +00:00
void add_declaration ( NonnullRefPtr < Declaration const > declaration )
2021-07-05 19:45:34 +00:00
{
2021-09-22 10:44:56 +00:00
if ( declaration - > is_lexical_declaration ( ) ) {
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( declaration - > for_each_bound_identifier ( [ & ] ( auto const & identifier ) {
auto const & name = identifier . string ( ) ;
2021-09-22 10:44:56 +00:00
if ( m_var_names . contains ( name ) | | m_forbidden_lexical_names . contains ( name ) | | m_function_names . contains ( name ) )
throw_identifier_declared ( name , declaration ) ;
if ( m_lexical_names . set ( name ) ! = AK : : HashSetResult : : InsertedNewEntry )
throw_identifier_declared ( name , declaration ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-09-22 10:44:56 +00:00
m_node - > add_lexical_declaration ( move ( declaration ) ) ;
} else if ( ! declaration - > is_function_declaration ( ) ) {
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( declaration - > for_each_bound_identifier ( [ & ] ( auto const & identifier ) {
auto const & name = identifier . string ( ) ;
2021-09-22 10:44:56 +00:00
ScopePusher * pusher = this ;
while ( true ) {
if ( pusher - > m_lexical_names . contains ( name )
| | pusher - > m_function_names . contains ( name )
| | pusher - > m_forbidden_var_names . contains ( name ) )
throw_identifier_declared ( name , declaration ) ;
pusher - > m_var_names . set ( name ) ;
2022-11-15 00:39:07 +00:00
if ( pusher - > is_top_level ( ) )
2021-09-22 10:44:56 +00:00
break ;
VERIFY ( pusher - > m_parent_scope ! = nullptr ) ;
pusher = pusher - > m_parent_scope ;
}
2022-11-15 00:39:07 +00:00
VERIFY ( pusher - > is_top_level ( ) & & pusher - > m_node ) ;
2021-09-22 10:44:56 +00:00
pusher - > m_node - > add_var_scoped_declaration ( declaration ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-09-22 10:44:56 +00:00
VERIFY ( m_top_level_scope ) ;
m_top_level_scope - > m_node - > add_var_scoped_declaration ( move ( declaration ) ) ;
} else {
2022-11-15 00:39:07 +00:00
if ( m_scope_level ! = ScopeLevel : : NotTopLevel & & m_scope_level ! = ScopeLevel : : ModuleTopLevel ) {
// Only non-top levels and Module don't var declare the top functions
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( declaration - > for_each_bound_identifier ( [ & ] ( auto const & identifier ) {
m_var_names . set ( identifier . string ( ) ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-09-22 10:44:56 +00:00
m_node - > add_var_scoped_declaration ( move ( declaration ) ) ;
} else {
VERIFY ( is < FunctionDeclaration > ( * declaration ) ) ;
auto & function_declaration = static_cast < FunctionDeclaration const & > ( * declaration ) ;
2023-07-04 22:14:41 +00:00
auto function_name = function_declaration . name ( ) ;
2021-09-22 10:44:56 +00:00
if ( m_var_names . contains ( function_name ) | | m_lexical_names . contains ( function_name ) )
throw_identifier_declared ( function_name , declaration ) ;
2022-01-14 23:30:02 +00:00
if ( function_declaration . kind ( ) ! = FunctionKind : : Normal | | m_parser . m_state . strict_mode ) {
2021-09-22 10:44:56 +00:00
if ( m_function_names . contains ( function_name ) )
throw_identifier_declared ( function_name , declaration ) ;
m_lexical_names . set ( function_name ) ;
m_node - > add_lexical_declaration ( move ( declaration ) ) ;
return ;
}
m_function_names . set ( function_name ) ;
if ( ! m_lexical_names . contains ( function_name ) )
2023-02-19 21:07:52 +00:00
m_functions_to_hoist . append ( static_ptr_cast < FunctionDeclaration const > ( declaration ) ) ;
2021-09-22 10:44:56 +00:00
m_node - > add_lexical_declaration ( move ( declaration ) ) ;
2021-07-05 19:45:34 +00:00
}
}
2021-09-22 10:44:56 +00:00
}
2021-10-07 16:43:22 +00:00
ScopePusher const * last_function_scope ( ) const
{
for ( auto scope_ptr = this ; scope_ptr ; scope_ptr = scope_ptr - > m_parent_scope ) {
if ( scope_ptr - > m_function_parameters . has_value ( ) )
return scope_ptr ;
}
return nullptr ;
}
2022-11-23 12:12:36 +00:00
Vector < FunctionParameter > const & function_parameters ( ) const
2021-10-07 16:43:22 +00:00
{
return * m_function_parameters ;
}
ScopePusher * parent_scope ( ) { return m_parent_scope ; }
ScopePusher const * parent_scope ( ) const { return m_parent_scope ; }
2023-01-09 00:23:00 +00:00
[[nodiscard]] bool has_declaration ( DeprecatedFlyString const & name ) const
2021-10-07 16:43:22 +00:00
{
return m_lexical_names . contains ( name ) | | m_var_names . contains ( name ) | | ! m_functions_to_hoist . find_if ( [ & name ] ( auto & function ) { return function - > name ( ) = = name ; } ) . is_end ( ) ;
}
bool contains_direct_call_to_eval ( ) const { return m_contains_direct_call_to_eval ; }
bool contains_access_to_arguments_object ( ) const { return m_contains_access_to_arguments_object ; }
2023-07-04 22:14:41 +00:00
void set_contains_direct_call_to_eval ( )
{
m_contains_direct_call_to_eval = true ;
2023-07-12 02:02:27 +00:00
m_screwed_by_eval_in_scope_chain = true ;
2023-07-04 22:14:41 +00:00
}
2021-10-07 16:43:22 +00:00
void set_contains_access_to_arguments_object ( ) { m_contains_access_to_arguments_object = true ; }
2023-07-07 21:14:03 +00:00
void set_scope_node ( ScopeNode * node ) { m_node = node ; }
void set_function_parameters ( Vector < FunctionParameter > const & parameters )
{
m_function_parameters = parameters ;
for ( auto & parameter : parameters ) {
parameter . binding . visit (
[ & ] ( Identifier const & identifier ) {
register_identifier ( identifier ) ;
m_function_parameters_candidates_for_local_variables . set ( identifier . string ( ) ) ;
m_forbidden_lexical_names . set ( identifier . string ( ) ) ;
} ,
[ & ] ( NonnullRefPtr < BindingPattern const > const & binding_pattern ) {
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( binding_pattern - > for_each_bound_identifier ( [ & ] ( auto const & identifier ) {
m_forbidden_lexical_names . set ( identifier . string ( ) ) ;
2023-07-07 21:14:03 +00:00
} ) ) ;
} ) ;
}
}
2021-10-07 16:43:22 +00:00
2021-09-22 10:44:56 +00:00
~ ScopePusher ( )
{
2022-11-15 00:39:07 +00:00
VERIFY ( is_top_level ( ) | | m_parent_scope ) ;
2021-09-22 10:44:56 +00:00
2023-07-07 21:14:03 +00:00
if ( m_parent_scope & & ! m_function_parameters . has_value ( ) ) {
m_parent_scope - > m_contains_access_to_arguments_object | = m_contains_access_to_arguments_object ;
m_parent_scope - > m_contains_direct_call_to_eval | = m_contains_direct_call_to_eval ;
m_parent_scope - > m_contains_await_expression | = m_contains_await_expression ;
}
2023-07-12 02:02:27 +00:00
if ( m_parent_scope & & m_contains_direct_call_to_eval ) {
m_parent_scope - > m_screwed_by_eval_in_scope_chain = true ;
2023-07-07 21:14:03 +00:00
}
if ( ! m_node ) {
m_parser . m_state . current_scope_pusher = m_parent_scope ;
return ;
}
2023-07-04 22:14:41 +00:00
for ( auto & it : m_identifier_groups ) {
auto const & identifier_group_name = it . key ;
auto & identifier_group = it . value ;
2023-07-12 02:02:27 +00:00
if ( m_parser . m_state . in_catch_parameter_context ) {
// NOTE: The parser currently cannot determine if an identifier captured by a function belongs to the environment created by a catch parameter.
// As a result, any identifiers used inside the catch parameter are not considered as candidates for optimization in local or global variable access.
continue ;
}
2023-07-04 22:14:41 +00:00
if ( identifier_group_name = = " arguments " sv ) {
// NOTE: arguments is a special variable that should not be treated as a candidate to become local
continue ;
}
bool scope_has_declaration = false ;
2023-07-12 23:44:48 +00:00
if ( is_top_level ( ) & & m_var_names . contains ( identifier_group_name ) )
scope_has_declaration = true ;
else if ( m_lexical_names . contains ( identifier_group_name ) | | m_function_names . contains ( identifier_group_name ) )
scope_has_declaration = true ;
2023-07-04 22:14:41 +00:00
2023-07-08 17:31:41 +00:00
bool hoistable_function_declaration = false ;
for ( auto const & function_declaration : m_functions_to_hoist ) {
if ( function_declaration - > name ( ) = = identifier_group_name )
hoistable_function_declaration = true ;
}
2023-07-04 22:14:41 +00:00
if ( ( m_type = = ScopeType : : ClassDeclaration | | m_type = = ScopeType : : Catch ) & & m_bound_names . contains ( identifier_group_name ) ) {
2023-07-12 02:02:27 +00:00
// NOTE: Currently, the parser cannot recognize that assigning a named function expression creates a scope with a binding for the function name.
// As a result, function names are not considered as candidates for optimization in global variable access.
2023-07-04 22:14:41 +00:00
continue ;
}
2023-07-12 02:02:27 +00:00
if ( m_type = = ScopeType : : Function & & m_bound_names . contains ( identifier_group_name ) ) {
// NOTE: Currently parser can't determine that named function expression assigment creates scope with binding for funciton name so function names are not considered as candidates to be optmized in global variables access
identifier_group . might_be_variable_in_lexical_scope_in_named_function_assignment = true ;
}
2023-07-04 22:14:41 +00:00
if ( m_type = = ScopeType : : ClassDeclaration | | m_type = = ScopeType : : Catch ) {
// NOTE: Class declaration and catch scopes do not have own ScopeNode hence can't contain declaration of any variable
scope_has_declaration = false ;
}
2023-07-12 02:02:27 +00:00
if ( m_type = = ScopeType : : Function ) {
if ( ! m_contains_access_to_arguments_object & & m_function_parameters_candidates_for_local_variables . contains ( identifier_group_name ) ) {
scope_has_declaration = true ;
} else if ( m_forbidden_lexical_names . contains ( identifier_group_name ) ) {
// NOTE: If an identifier is used as a function parameter that cannot be optimized locally or globally, it is simply ignored.
continue ;
}
}
if ( m_type = = ScopeType : : Function & & hoistable_function_declaration ) {
// NOTE: Hoistable function declarations are currently not optimized into global or local variables, but future improvements may change that.
continue ;
2023-07-06 19:25:13 +00:00
}
2023-07-12 02:02:27 +00:00
if ( m_type = = ScopeType : : Program ) {
auto can_use_global_for_identifier = true ;
if ( identifier_group . used_inside_with_statement )
can_use_global_for_identifier = false ;
else if ( identifier_group . might_be_variable_in_lexical_scope_in_named_function_assignment )
can_use_global_for_identifier = false ;
else if ( m_screwed_by_eval_in_scope_chain )
can_use_global_for_identifier = false ;
else if ( m_parser . m_state . initiated_by_eval )
can_use_global_for_identifier = false ;
if ( can_use_global_for_identifier ) {
for ( auto & identifier : identifier_group . identifiers )
identifier - > set_is_global ( ) ;
}
} else if ( scope_has_declaration ) {
2023-07-08 17:31:41 +00:00
if ( hoistable_function_declaration )
2023-07-04 22:14:41 +00:00
continue ;
2023-07-12 02:02:27 +00:00
if ( ! identifier_group . captured_by_nested_function & & ! identifier_group . used_inside_with_statement ) {
2023-07-04 22:14:41 +00:00
auto function_scope = last_function_scope ( ) ;
2023-07-12 02:02:27 +00:00
if ( ! function_scope | | m_screwed_by_eval_in_scope_chain ) {
2023-07-04 22:14:41 +00:00
continue ;
}
auto local_variable_index = function_scope - > m_node - > add_local_variable ( identifier_group_name ) ;
for ( auto & identifier : identifier_group . identifiers )
identifier - > set_local_variable_index ( local_variable_index ) ;
}
} else {
2023-07-12 02:02:27 +00:00
if ( m_function_parameters . has_value ( ) | | m_type = = ScopeType : : ClassField | | m_type = = ScopeType : : ClassStaticInit ) {
2023-07-04 22:14:41 +00:00
// NOTE: Class fields and class static initialization sections implicitly create functions
identifier_group . captured_by_nested_function = true ;
}
2023-07-12 02:02:27 +00:00
if ( m_type = = ScopeType : : With )
identifier_group . used_inside_with_statement = true ;
2023-07-04 22:14:41 +00:00
if ( m_parent_scope ) {
if ( auto maybe_parent_scope_identifier_group = m_parent_scope - > m_identifier_groups . get ( identifier_group_name ) ; maybe_parent_scope_identifier_group . has_value ( ) ) {
maybe_parent_scope_identifier_group . value ( ) . identifiers . extend ( identifier_group . identifiers ) ;
if ( identifier_group . captured_by_nested_function )
maybe_parent_scope_identifier_group . value ( ) . captured_by_nested_function = true ;
2023-07-12 02:02:27 +00:00
if ( identifier_group . used_inside_with_statement )
maybe_parent_scope_identifier_group . value ( ) . used_inside_with_statement = true ;
if ( identifier_group . might_be_variable_in_lexical_scope_in_named_function_assignment )
maybe_parent_scope_identifier_group . value ( ) . might_be_variable_in_lexical_scope_in_named_function_assignment = true ;
2023-07-04 22:14:41 +00:00
} else {
m_parent_scope - > m_identifier_groups . set ( identifier_group_name , identifier_group ) ;
}
}
}
}
2023-07-08 17:31:41 +00:00
for ( size_t i = 0 ; i < m_functions_to_hoist . size ( ) ; i + + ) {
auto const & function_declaration = m_functions_to_hoist [ i ] ;
if ( m_lexical_names . contains ( function_declaration - > name ( ) ) | | m_forbidden_var_names . contains ( function_declaration - > name ( ) ) )
continue ;
if ( is_top_level ( ) ) {
m_node - > add_hoisted_function ( move ( m_functions_to_hoist [ i ] ) ) ;
} else {
if ( ! m_parent_scope - > m_lexical_names . contains ( function_declaration - > name ( ) ) & & ! m_parent_scope - > m_function_names . contains ( function_declaration - > name ( ) ) )
m_parent_scope - > m_functions_to_hoist . append ( move ( m_functions_to_hoist [ i ] ) ) ;
}
}
2021-09-22 10:44:56 +00:00
VERIFY ( m_parser . m_state . current_scope_pusher = = this ) ;
m_parser . m_state . current_scope_pusher = m_parent_scope ;
}
2022-01-18 17:55:19 +00:00
void set_contains_await_expression ( )
{
m_contains_await_expression = true ;
}
bool contains_await_expression ( ) const
{
return m_contains_await_expression ;
}
2022-12-20 21:09:57 +00:00
bool can_have_using_declaration ( ) const
{
return m_scope_level ! = ScopeLevel : : ScriptTopLevel ;
}
2023-07-04 22:14:41 +00:00
void register_identifier ( NonnullRefPtr < Identifier > id )
{
if ( auto maybe_identifier_group = m_identifier_groups . get ( id - > string ( ) ) ; maybe_identifier_group . has_value ( ) ) {
maybe_identifier_group . value ( ) . identifiers . append ( id ) ;
} else {
m_identifier_groups . set ( id - > string ( ) , IdentifierGroup {
. captured_by_nested_function = false ,
. identifiers = { id } ,
} ) ;
}
}
2021-09-22 10:44:56 +00:00
private :
2023-02-19 21:07:52 +00:00
void throw_identifier_declared ( DeprecatedFlyString const & name , NonnullRefPtr < Declaration const > const & declaration )
2021-09-22 10:44:56 +00:00
{
2022-12-04 18:02:33 +00:00
m_parser . syntax_error ( DeprecatedString : : formatted ( " Identifier '{}' already declared " , name ) , declaration - > source_range ( ) . start ) ;
2020-04-13 14:42:54 +00:00
}
Parser & m_parser ;
2021-09-22 10:44:56 +00:00
ScopeNode * m_node { nullptr } ;
2022-11-15 00:39:07 +00:00
ScopeLevel m_scope_level { ScopeLevel : : NotTopLevel } ;
2023-07-04 22:14:41 +00:00
ScopeType m_type ;
2021-09-22 10:44:56 +00:00
ScopePusher * m_parent_scope { nullptr } ;
ScopePusher * m_top_level_scope { nullptr } ;
2023-01-09 00:23:00 +00:00
HashTable < DeprecatedFlyString > m_lexical_names ;
HashTable < DeprecatedFlyString > m_var_names ;
HashTable < DeprecatedFlyString > m_function_names ;
2021-09-22 10:44:56 +00:00
2023-01-09 00:23:00 +00:00
HashTable < DeprecatedFlyString > m_forbidden_lexical_names ;
HashTable < DeprecatedFlyString > m_forbidden_var_names ;
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < FunctionDeclaration const > > m_functions_to_hoist ;
2021-10-07 16:43:22 +00:00
2023-07-04 22:14:41 +00:00
HashTable < DeprecatedFlyString > m_bound_names ;
2023-07-06 19:25:13 +00:00
HashTable < DeprecatedFlyString > m_function_parameters_candidates_for_local_variables ;
2023-07-04 22:14:41 +00:00
struct IdentifierGroup {
bool captured_by_nested_function { false } ;
2023-07-12 02:02:27 +00:00
bool used_inside_with_statement { false } ;
bool might_be_variable_in_lexical_scope_in_named_function_assignment { false } ;
2023-07-04 22:14:41 +00:00
Vector < NonnullRefPtr < Identifier > > identifiers ;
} ;
HashMap < DeprecatedFlyString , IdentifierGroup > m_identifier_groups ;
2022-11-23 12:12:36 +00:00
Optional < Vector < FunctionParameter > > m_function_parameters ;
2021-10-07 16:43:22 +00:00
bool m_contains_access_to_arguments_object { false } ;
bool m_contains_direct_call_to_eval { false } ;
2022-01-18 17:55:19 +00:00
bool m_contains_await_expression { false } ;
2023-07-12 02:02:27 +00:00
bool m_screwed_by_eval_in_scope_chain { false } ;
2020-04-13 14:42:54 +00:00
} ;
2020-08-18 16:46:36 +00:00
class OperatorPrecedenceTable {
public :
constexpr OperatorPrecedenceTable ( )
: m_token_precedence ( )
{
for ( size_t i = 0 ; i < array_size ( m_operator_precedence ) ; + + i ) {
auto & op = m_operator_precedence [ i ] ;
m_token_precedence [ static_cast < size_t > ( op . token ) ] = op . precedence ;
}
}
2020-03-30 13:24:43 +00:00
2020-08-18 16:46:36 +00:00
constexpr int get ( TokenType token ) const
{
int p = m_token_precedence [ static_cast < size_t > ( token ) ] ;
if ( p = = 0 ) {
2020-12-06 16:55:19 +00:00
warnln ( " Internal Error: No precedence for operator {} " , Token : : name ( token ) ) ;
2021-02-23 19:42:32 +00:00
VERIFY_NOT_REACHED ( ) ;
2020-08-18 16:46:36 +00:00
return - 1 ;
}
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
return p ;
}
2020-03-12 22:02:41 +00:00
2023-08-08 02:38:46 +00:00
constexpr int get_unary ( TokenType token ) const
{
constexpr int operator_precedence_unary_plus_minus = 17 ;
switch ( token ) {
case TokenType : : Minus :
case TokenType : : Plus :
return operator_precedence_unary_plus_minus ;
default :
return get ( token ) ;
}
}
2020-08-18 16:46:36 +00:00
private :
int m_token_precedence [ cs_num_of_js_tokens ] ;
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
struct OperatorPrecedence {
TokenType token ;
int precedence ;
} ;
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
static constexpr const OperatorPrecedence m_operator_precedence [ ] = {
{ TokenType : : Period , 20 } ,
{ TokenType : : BracketOpen , 20 } ,
{ TokenType : : ParenOpen , 20 } ,
{ TokenType : : QuestionMarkPeriod , 20 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : New , 19 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : PlusPlus , 18 } ,
{ TokenType : : MinusMinus , 18 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : ExclamationMark , 17 } ,
{ TokenType : : Tilde , 17 } ,
{ TokenType : : Typeof , 17 } ,
{ TokenType : : Void , 17 } ,
{ TokenType : : Delete , 17 } ,
{ TokenType : : Await , 17 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : DoubleAsterisk , 16 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : Asterisk , 15 } ,
{ TokenType : : Slash , 15 } ,
{ TokenType : : Percent , 15 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : Plus , 14 } ,
{ TokenType : : Minus , 14 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : ShiftLeft , 13 } ,
{ TokenType : : ShiftRight , 13 } ,
{ TokenType : : UnsignedShiftRight , 13 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : LessThan , 12 } ,
{ TokenType : : LessThanEquals , 12 } ,
{ TokenType : : GreaterThan , 12 } ,
{ TokenType : : GreaterThanEquals , 12 } ,
{ TokenType : : In , 12 } ,
{ TokenType : : Instanceof , 12 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : EqualsEquals , 11 } ,
{ TokenType : : ExclamationMarkEquals , 11 } ,
{ TokenType : : EqualsEqualsEquals , 11 } ,
{ TokenType : : ExclamationMarkEqualsEquals , 11 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : Ampersand , 10 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : Caret , 9 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : Pipe , 8 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : DoubleQuestionMark , 7 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : DoubleAmpersand , 6 } ,
2020-03-12 22:02:41 +00:00
2020-08-18 16:46:36 +00:00
{ TokenType : : DoublePipe , 5 } ,
{ TokenType : : QuestionMark , 4 } ,
{ TokenType : : Equals , 3 } ,
{ TokenType : : PlusEquals , 3 } ,
{ TokenType : : MinusEquals , 3 } ,
{ TokenType : : DoubleAsteriskEquals , 3 } ,
{ TokenType : : AsteriskEquals , 3 } ,
{ TokenType : : SlashEquals , 3 } ,
{ TokenType : : PercentEquals , 3 } ,
{ TokenType : : ShiftLeftEquals , 3 } ,
{ TokenType : : ShiftRightEquals , 3 } ,
{ TokenType : : UnsignedShiftRightEquals , 3 } ,
{ TokenType : : AmpersandEquals , 3 } ,
{ TokenType : : CaretEquals , 3 } ,
2020-10-05 15:49:43 +00:00
{ TokenType : : PipeEquals , 3 } ,
{ TokenType : : DoubleAmpersandEquals , 3 } ,
{ TokenType : : DoublePipeEquals , 3 } ,
{ TokenType : : DoubleQuestionMarkEquals , 3 } ,
2020-08-18 16:46:36 +00:00
{ TokenType : : Yield , 2 } ,
{ TokenType : : Comma , 1 } ,
} ;
} ;
constexpr OperatorPrecedenceTable g_operator_precedence ;
2020-03-12 22:02:41 +00:00
2021-08-14 15:30:37 +00:00
Parser : : ParserState : : ParserState ( Lexer l , Program : : Type program_type )
2021-06-19 12:43:09 +00:00
: lexer ( move ( l ) )
2020-03-12 22:02:41 +00:00
{
2021-08-14 15:30:37 +00:00
if ( program_type = = Program : : Type : : Module )
lexer . disallow_html_comments ( ) ;
current_token = lexer . next ( ) ;
2020-08-18 16:46:36 +00:00
}
2020-03-12 22:02:41 +00:00
2022-04-09 23:55:45 +00:00
Parser : : Parser ( Lexer lexer , Program : : Type program_type , Optional < EvalInitialState > initial_state_for_eval )
2023-01-26 13:40:21 +00:00
: m_source_code ( SourceCode : : create ( lexer . filename ( ) , String : : from_deprecated_string ( lexer . source ( ) ) . release_value_but_fixme_should_propagate_errors ( ) ) )
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
, m_state ( move ( lexer ) , program_type )
2021-08-14 15:30:37 +00:00
, m_program_type ( program_type )
2020-08-18 16:46:36 +00:00
{
2022-04-09 23:55:45 +00:00
if ( initial_state_for_eval . has_value ( ) ) {
2023-07-12 02:02:27 +00:00
m_state . initiated_by_eval = true ;
2022-04-09 23:55:45 +00:00
m_state . in_eval_function_context = initial_state_for_eval - > in_eval_function_context ;
m_state . allow_super_property_lookup = initial_state_for_eval - > allow_super_property_lookup ;
m_state . allow_super_constructor_call = initial_state_for_eval - > allow_super_constructor_call ;
m_state . in_class_field_initializer = initial_state_for_eval - > in_class_field_initializer ;
}
2020-03-12 22:02:41 +00:00
}
Associativity Parser : : operator_associativity ( TokenType type ) const
{
switch ( type ) {
case TokenType : : Period :
case TokenType : : BracketOpen :
case TokenType : : ParenOpen :
case TokenType : : QuestionMarkPeriod :
case TokenType : : Asterisk :
case TokenType : : Slash :
case TokenType : : Percent :
case TokenType : : Plus :
case TokenType : : Minus :
case TokenType : : ShiftLeft :
case TokenType : : ShiftRight :
case TokenType : : UnsignedShiftRight :
case TokenType : : LessThan :
case TokenType : : LessThanEquals :
case TokenType : : GreaterThan :
case TokenType : : GreaterThanEquals :
case TokenType : : In :
case TokenType : : Instanceof :
case TokenType : : EqualsEquals :
case TokenType : : ExclamationMarkEquals :
case TokenType : : EqualsEqualsEquals :
case TokenType : : ExclamationMarkEqualsEquals :
2020-03-28 10:48:52 +00:00
case TokenType : : Typeof :
2020-04-15 16:55:03 +00:00
case TokenType : : Void :
2020-04-26 11:53:40 +00:00
case TokenType : : Delete :
2021-11-09 20:52:21 +00:00
case TokenType : : Await :
2020-03-12 22:02:41 +00:00
case TokenType : : Ampersand :
case TokenType : : Caret :
case TokenType : : Pipe :
case TokenType : : DoubleQuestionMark :
case TokenType : : DoubleAmpersand :
case TokenType : : DoublePipe :
case TokenType : : Comma :
return Associativity : : Left ;
default :
return Associativity : : Right ;
}
2020-03-11 18:27:43 +00:00
}
2021-09-22 10:44:56 +00:00
bool Parser : : parse_directive ( ScopeNode & body )
{
bool found_use_strict = false ;
while ( ! done ( ) & & match ( TokenType : : StringLiteral ) ) {
2022-11-27 01:21:25 +00:00
auto raw_value = m_state . current_token . original_value ( ) ;
2021-09-22 10:44:56 +00:00
// It cannot be a labelled function since we hit a string literal.
auto statement = parse_statement ( AllowLabelledFunction : : No ) ;
body . append ( statement ) ;
VERIFY ( is < ExpressionStatement > ( * statement ) ) ;
auto & expression = static_cast < ExpressionStatement const & > ( * statement ) . expression ( ) ;
if ( ! is < StringLiteral > ( expression ) )
break ;
2022-11-27 01:21:25 +00:00
if ( raw_value . is_one_of ( " 'use strict' " sv , " \" use strict \" " ) ) {
2021-09-22 10:44:56 +00:00
found_use_strict = true ;
if ( m_state . string_legacy_octal_escape_sequence_in_scope )
syntax_error ( " Octal escape sequence in string literal not allowed in strict mode " ) ;
break ;
}
}
m_state . string_legacy_octal_escape_sequence_in_scope = false ;
return found_use_strict ;
}
2021-06-20 03:13:53 +00:00
NonnullRefPtr < Program > Parser : : parse_program ( bool starts_in_strict_mode )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto program = adopt_ref ( * new Program ( { m_source_code , rule_start . position ( ) , position ( ) } , m_program_type ) ) ;
2021-09-22 10:44:56 +00:00
ScopePusher program_scope = ScopePusher : : program_scope ( * this , * program ) ;
2021-11-26 23:01:23 +00:00
if ( m_program_type = = Program : : Type : : Script )
parse_script ( program , starts_in_strict_mode ) ;
else
parse_module ( program ) ;
2022-11-24 14:38:52 +00:00
program - > set_end_offset ( { } , position ( ) . offset ) ;
2021-11-26 23:01:23 +00:00
return program ;
}
void Parser : : parse_script ( Program & program , bool starts_in_strict_mode )
{
bool strict_before = m_state . strict_mode ;
if ( starts_in_strict_mode )
2021-09-22 10:44:56 +00:00
m_state . strict_mode = true ;
bool has_use_strict = parse_directive ( program ) ;
if ( m_state . strict_mode | | has_use_strict ) {
2021-11-26 23:01:23 +00:00
program . set_strict_mode ( ) ;
2021-06-20 03:13:53 +00:00
m_state . strict_mode = true ;
}
2020-05-28 05:22:08 +00:00
2021-11-26 23:01:23 +00:00
parse_statement_list ( program , AllowLabelledFunction : : Yes ) ;
if ( ! done ( ) ) {
expected ( " statement or declaration " ) ;
consume ( ) ;
}
m_state . strict_mode = strict_before ;
}
void Parser : : parse_module ( Program & program )
{
TemporaryChange strict_mode_rollback ( m_state . strict_mode , true ) ;
TemporaryChange await_expression_valid_rollback ( m_state . await_expression_is_valid , true ) ;
// Since strict mode is already enabled we skip any directive parsing.
2020-03-11 18:27:43 +00:00
while ( ! done ( ) ) {
2021-09-22 10:44:56 +00:00
parse_statement_list ( program , AllowLabelledFunction : : Yes ) ;
2021-07-11 23:25:32 +00:00
2021-09-22 10:44:56 +00:00
if ( done ( ) )
break ;
2021-07-11 23:25:32 +00:00
2021-09-22 10:44:56 +00:00
if ( match_export_or_import ( ) ) {
2021-08-14 15:42:30 +00:00
VERIFY ( m_state . current_token . type ( ) = = TokenType : : Export | | m_state . current_token . type ( ) = = TokenType : : Import ) ;
if ( m_state . current_token . type ( ) = = TokenType : : Export )
2021-11-26 23:01:23 +00:00
program . append_export ( parse_export_statement ( program ) ) ;
2021-08-14 15:42:30 +00:00
else
2021-11-26 23:01:23 +00:00
program . append_import ( parse_import_statement ( program ) ) ;
2021-08-14 15:42:30 +00:00
2020-03-11 18:27:43 +00:00
} else {
2020-10-22 22:30:07 +00:00
expected ( " statement or declaration " ) ;
2020-03-11 18:27:43 +00:00
consume ( ) ;
}
}
2021-09-22 10:44:56 +00:00
2022-01-18 17:55:19 +00:00
VERIFY ( m_state . current_scope_pusher ) ;
if ( m_state . current_scope_pusher - > contains_await_expression ( ) )
program . set_has_top_level_await ( ) ;
2021-11-26 23:01:23 +00:00
for ( auto & export_statement : program . exports ( ) ) {
2023-03-06 13:17:01 +00:00
if ( export_statement - > has_statement ( ) )
2021-11-26 23:01:23 +00:00
continue ;
2023-03-06 13:17:01 +00:00
for ( auto & entry : export_statement - > entries ( ) ) {
2022-11-23 11:16:51 +00:00
if ( entry . is_module_request ( ) | | entry . kind = = ExportEntry : : Kind : : EmptyNamedExport )
2021-11-26 23:01:23 +00:00
return ;
auto const & exported_name = entry . local_or_import_name ;
bool found = false ;
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:54:20 +00:00
MUST ( program . for_each_lexically_declared_identifier ( [ & ] ( auto const & identifier ) {
if ( identifier . string ( ) = = exported_name )
2021-11-26 23:01:23 +00:00
found = true ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-11-26 23:01:23 +00:00
if ( found )
continue ;
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 15:00:43 +00:00
MUST ( program . for_each_var_declared_identifier ( [ & ] ( auto const & identifier ) {
if ( identifier . string ( ) = = exported_name )
2021-11-26 23:01:23 +00:00
found = true ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2022-09-01 21:13:45 +00:00
for ( auto & import : program . imports ( ) ) {
2023-03-06 13:17:01 +00:00
if ( import - > has_bound_name ( exported_name ) ) {
2022-09-01 21:13:45 +00:00
found = true ;
break ;
}
}
2021-11-26 23:01:23 +00:00
if ( ! found )
2023-03-06 13:17:01 +00:00
syntax_error ( DeprecatedString : : formatted ( " '{}' in export is not declared " , exported_name ) , export_statement - > source_range ( ) . start ) ;
2021-11-26 23:01:23 +00:00
}
}
2020-03-11 18:27:43 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Declaration const > Parser : : parse_declaration ( )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-11-09 18:39:22 +00:00
if ( m_state . current_token . type ( ) = = TokenType : : Async & & next_token ( ) . type ( ) = = TokenType : : Function )
return parse_function_node < FunctionDeclaration > ( ) ;
2021-06-19 12:43:09 +00:00
switch ( m_state . current_token . type ( ) ) {
2020-06-08 18:31:21 +00:00
case TokenType : : Class :
2020-08-29 11:25:37 +00:00
return parse_class_declaration ( ) ;
2021-09-22 10:44:56 +00:00
case TokenType : : Function :
return parse_function_node < FunctionDeclaration > ( ) ;
2020-10-22 22:30:07 +00:00
case TokenType : : Let :
case TokenType : : Const :
return parse_variable_declaration ( ) ;
2022-12-20 21:09:57 +00:00
case TokenType : : Identifier :
if ( m_state . current_token . original_value ( ) = = " using " sv ) {
if ( ! m_state . current_scope_pusher - > can_have_using_declaration ( ) )
syntax_error ( " 'using' not allowed outside of block, for loop or function " ) ;
return parse_using_declaration ( ) ;
}
[[fallthrough]] ;
2020-10-22 22:30:07 +00:00
default :
expected ( " declaration " ) ;
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ErrorDeclaration > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2020-10-22 22:30:07 +00:00
}
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Statement const > Parser : : parse_statement ( AllowLabelledFunction allow_labelled_function )
2020-10-22 22:30:07 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-08-28 15:04:37 +00:00
auto type = m_state . current_token . type ( ) ;
switch ( type ) {
2020-03-11 18:27:43 +00:00
case TokenType : : CurlyOpen :
return parse_block_statement ( ) ;
case TokenType : : Return :
return parse_return_statement ( ) ;
2021-09-22 10:44:56 +00:00
case TokenType : : Var : {
auto declaration = parse_variable_declaration ( ) ;
m_state . current_scope_pusher - > add_declaration ( declaration ) ;
return declaration ;
}
2020-03-12 12:12:12 +00:00
case TokenType : : For :
return parse_for_statement ( ) ;
2020-03-21 17:40:17 +00:00
case TokenType : : If :
return parse_if_statement ( ) ;
2020-03-24 21:03:50 +00:00
case TokenType : : Throw :
return parse_throw_statement ( ) ;
2020-03-24 13:03:55 +00:00
case TokenType : : Try :
return parse_try_statement ( ) ;
2020-03-29 11:09:54 +00:00
case TokenType : : Break :
return parse_break_statement ( ) ;
2020-04-04 22:22:42 +00:00
case TokenType : : Continue :
2020-08-29 11:25:37 +00:00
return parse_continue_statement ( ) ;
2020-03-29 11:09:54 +00:00
case TokenType : : Switch :
return parse_switch_statement ( ) ;
2020-04-04 19:29:23 +00:00
case TokenType : : Do :
return parse_do_while_statement ( ) ;
2020-04-21 18:27:57 +00:00
case TokenType : : While :
return parse_while_statement ( ) ;
2020-11-28 14:05:57 +00:00
case TokenType : : With :
2021-06-19 12:43:09 +00:00
if ( m_state . strict_mode )
2020-11-28 19:17:33 +00:00
syntax_error ( " 'with' statement not allowed in strict mode " ) ;
2020-11-28 14:05:57 +00:00
return parse_with_statement ( ) ;
2020-04-30 16:26:27 +00:00
case TokenType : : Debugger :
2020-08-29 11:25:37 +00:00
return parse_debugger_statement ( ) ;
2020-05-03 09:59:00 +00:00
case TokenType : : Semicolon :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < EmptyStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2021-07-29 21:28:28 +00:00
case TokenType : : Slash :
case TokenType : : SlashEquals :
m_state . current_token = m_state . lexer . force_slash_as_regex ( ) ;
[[fallthrough]] ;
2020-03-11 18:27:43 +00:00
default :
2021-09-18 21:02:50 +00:00
if ( match_invalid_escaped_keyword ( ) )
2021-08-21 09:27:20 +00:00
syntax_error ( " Keyword must not contain escaped characters " ) ;
2021-07-24 23:01:22 +00:00
if ( match_identifier_name ( ) ) {
auto result = try_parse_labelled_statement ( allow_labelled_function ) ;
2020-05-28 20:36:59 +00:00
if ( ! result . is_null ( ) )
return result . release_nonnull ( ) ;
}
2020-04-17 13:05:58 +00:00
if ( match_expression ( ) ) {
2021-11-30 14:52:51 +00:00
if ( match ( TokenType : : Async ) ) {
auto lookahead_token = next_token ( ) ;
if ( lookahead_token . type ( ) = = TokenType : : Function & & ! lookahead_token . trivia_contains_line_terminator ( ) )
syntax_error ( " Async function declaration not allowed in single-statement context " ) ;
} else if ( match ( TokenType : : Function ) | | match ( TokenType : : Class ) ) {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " {} declaration not allowed in single-statement context " , m_state . current_token . name ( ) ) ) ;
2021-11-30 14:52:51 +00:00
} else if ( match ( TokenType : : Let ) & & next_token ( ) . type ( ) = = TokenType : : BracketOpen ) {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " let followed by [ is not allowed in single-statement context " ) ) ;
2021-11-30 14:52:51 +00:00
}
2021-08-28 15:04:37 +00:00
2020-04-17 13:05:58 +00:00
auto expr = parse_expression ( 0 ) ;
consume_or_insert_semicolon ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ExpressionStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( expr ) ) ;
2020-04-17 13:05:58 +00:00
}
2020-10-22 22:30:07 +00:00
expected ( " statement " ) ;
2020-03-11 18:27:43 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ErrorStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2020-08-29 11:25:37 +00:00
}
2020-03-11 18:27:43 +00:00
}
2021-09-18 21:02:50 +00:00
bool Parser : : match_invalid_escaped_keyword ( ) const
{
if ( m_state . current_token . type ( ) ! = TokenType : : EscapedKeyword )
return false ;
auto token_value = m_state . current_token . value ( ) ;
2021-11-26 22:25:10 +00:00
if ( token_value = = " await " sv )
2021-11-26 22:50:32 +00:00
return m_program_type = = Program : : Type : : Module | | m_state . await_expression_is_valid ;
2021-11-26 22:25:10 +00:00
if ( token_value = = " async " sv )
return false ;
if ( token_value = = " yield " sv )
return m_state . in_generator_function_context ;
if ( m_state . strict_mode )
2021-09-18 21:02:50 +00:00
return true ;
2021-11-26 22:25:10 +00:00
return token_value ! = " let " sv ;
2021-09-18 21:02:50 +00:00
}
2022-07-11 17:32:29 +00:00
static constexpr AK : : Array < StringView , 9 > strict_reserved_words = { " implements " sv , " interface " sv , " let " sv , " package " sv , " private " sv , " protected " sv , " public " sv , " static " sv , " yield " sv } ;
2021-07-11 23:27:35 +00:00
static bool is_strict_reserved_word ( StringView str )
{
2021-11-10 23:55:02 +00:00
return any_of ( strict_reserved_words , [ & str ] ( StringView word ) {
2021-07-11 23:27:35 +00:00
return word = = str ;
} ) ;
}
2022-11-23 12:12:36 +00:00
static bool is_simple_parameter_list ( Vector < FunctionParameter > const & parameters )
2021-08-28 15:04:37 +00:00
{
2022-11-23 12:12:36 +00:00
return all_of ( parameters , [ ] ( FunctionParameter const & parameter ) {
2023-07-06 15:49:38 +00:00
return ! parameter . is_rest & & parameter . default_value . is_null ( ) & & parameter . binding . has < NonnullRefPtr < Identifier const > > ( ) ;
2021-08-28 15:04:37 +00:00
} ) ;
}
2023-02-19 21:07:52 +00:00
RefPtr < FunctionExpression const > Parser : : try_parse_arrow_function_expression ( bool expect_parens , bool is_async )
2020-03-30 13:26:09 +00:00
{
2021-11-14 23:47:16 +00:00
if ( is_async )
VERIFY ( match ( TokenType : : Async ) ) ;
if ( ! expect_parens & & ! is_async ) {
2021-09-14 00:51:16 +00:00
// NOTE: This is a fast path where we try to fail early in case this can't possibly
// be a match. The idea is to avoid the expensive parser state save/load mechanism.
// The logic is duplicated below in the "real" !expect_parens branch.
if ( ! match_identifier ( ) & & ! match ( TokenType : : Yield ) & & ! match ( TokenType : : Await ) )
return nullptr ;
2021-10-07 22:38:24 +00:00
auto token = next_token ( ) ;
2021-09-14 00:51:16 +00:00
if ( token . trivia_contains_line_terminator ( ) )
return nullptr ;
if ( token . type ( ) ! = TokenType : : Arrow )
return nullptr ;
}
2020-03-30 13:26:09 +00:00
save_state ( ) ;
2022-01-18 23:46:16 +00:00
auto rule_start = ( expect_parens & & ! is_async )
// Someone has consumed the opening parenthesis for us! Start there.
? RulePosition { * this , m_rule_starts . last ( ) }
// We've not encountered one yet, so the rule start is actually here.
: push_start ( ) ;
2020-04-13 14:42:54 +00:00
ArmedScopeGuard state_rollback_guard = [ & ] {
load_state ( ) ;
} ;
2020-03-30 13:26:09 +00:00
2022-01-14 23:30:02 +00:00
auto function_kind = FunctionKind : : Normal ;
2021-11-14 23:47:16 +00:00
if ( is_async ) {
consume ( TokenType : : Async ) ;
function_kind = FunctionKind : : Async ;
if ( m_state . current_token . trivia_contains_line_terminator ( ) )
return nullptr ;
// Since we have async it can be followed by paren open in the expect_parens case
// so we also consume that token.
if ( expect_parens ) {
VERIFY ( match ( TokenType : : ParenOpen ) ) ;
consume ( TokenType : : ParenOpen ) ;
}
}
2022-11-23 12:12:36 +00:00
Vector < FunctionParameter > parameters ;
2020-05-06 03:02:14 +00:00
i32 function_length = - 1 ;
2023-07-07 21:14:03 +00:00
bool contains_direct_call_to_eval = false ;
auto function_body_result = [ & ] ( ) - > RefPtr < FunctionBody const > {
ScopePusher function_scope = ScopePusher : : function_scope ( * this ) ;
if ( expect_parens ) {
// We have parens around the function parameters and can re-use the same parsing
// logic used for regular functions: multiple parameters, default values, rest
// parameter, maybe a trailing comma. If we have a new syntax error afterwards we
// check if it's about a wrong token (something like duplicate parameter name must
// not abort), know parsing failed and rollback the parser state.
auto previous_syntax_errors = m_state . errors . size ( ) ;
TemporaryChange in_async_context ( m_state . await_expression_is_valid , is_async | | m_state . await_expression_is_valid ) ;
parameters = parse_formal_parameters ( function_length , FunctionNodeParseOptions : : IsArrowFunction | ( is_async ? FunctionNodeParseOptions : : IsAsyncFunction : 0 ) ) ;
if ( m_state . errors . size ( ) > previous_syntax_errors & & m_state . errors [ previous_syntax_errors ] . message . starts_with ( " Unexpected token " sv ) )
return nullptr ;
if ( ! match ( TokenType : : ParenClose ) )
return nullptr ;
consume ( ) ;
} else {
// No parens - this must be an identifier followed by arrow. That's it.
if ( ! match_identifier ( ) & & ! match ( TokenType : : Yield ) & & ! match ( TokenType : : Await ) )
return nullptr ;
auto token = consume_identifier_reference ( ) ;
if ( m_state . strict_mode & & token . value ( ) . is_one_of ( " arguments " sv , " eval " sv ) )
syntax_error ( " BindingIdentifier may not be 'arguments' or 'eval' in strict mode " ) ;
if ( is_async & & token . value ( ) = = " await " sv )
syntax_error ( " 'await' is a reserved identifier in async functions " ) ;
auto identifier = create_ast_node < Identifier const > ( { m_source_code , rule_start . position ( ) , position ( ) } , token . DeprecatedFlyString_value ( ) ) ;
parameters . append ( { identifier , { } } ) ;
}
// If there's a newline between the closing paren and arrow it's not a valid arrow function,
// ASI should kick in instead (it'll then fail with "Unexpected token Arrow")
if ( m_state . current_token . trivia_contains_line_terminator ( ) )
2020-10-18 23:26:41 +00:00
return nullptr ;
2023-07-07 21:14:03 +00:00
if ( ! match ( TokenType : : Arrow ) )
2020-10-18 23:26:41 +00:00
return nullptr ;
consume ( ) ;
2020-03-30 13:26:09 +00:00
2023-07-07 21:14:03 +00:00
if ( function_length = = - 1 )
function_length = parameters . size ( ) ;
2020-10-08 17:49:08 +00:00
2023-07-07 21:14:03 +00:00
auto old_labels_in_scope = move ( m_state . labels_in_scope ) ;
ScopeGuard guard ( [ & ] ( ) {
m_state . labels_in_scope = move ( old_labels_in_scope ) ;
} ) ;
2021-10-08 10:43:38 +00:00
2021-06-19 12:43:09 +00:00
TemporaryChange change ( m_state . in_arrow_function_context , true ) ;
2021-11-26 22:50:32 +00:00
TemporaryChange async_context_change ( m_state . await_expression_is_valid , is_async ) ;
2021-11-26 22:29:05 +00:00
TemporaryChange in_class_static_init_block_change ( m_state . in_class_static_init_block , false ) ;
2021-11-14 23:47:16 +00:00
2020-03-30 13:26:09 +00:00
if ( match ( TokenType : : CurlyOpen ) ) {
// Parse a function body with statements
2022-01-15 16:07:51 +00:00
consume ( TokenType : : CurlyOpen ) ;
auto body = parse_function_body ( parameters , function_kind , contains_direct_call_to_eval ) ;
consume ( TokenType : : CurlyClose ) ;
return body ;
2020-03-30 13:26:09 +00:00
}
if ( match_expression ( ) ) {
// Parse a function body which returns a single expression
// FIXME: We synthesize a block with a return statement
// for arrow function bodies which are a single expression.
// Esprima generates a single "ArrowFunctionExpression"
// with a "body" property.
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto return_block = create_ast_node < FunctionBody > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2023-07-07 21:14:03 +00:00
VERIFY ( m_state . current_scope_pusher - > type ( ) = = ScopePusher : : ScopeType : : Function ) ;
m_state . current_scope_pusher - > set_scope_node ( return_block ) ;
m_state . current_scope_pusher - > set_function_parameters ( parameters ) ;
2021-10-07 16:43:22 +00:00
auto return_expression = parse_expression ( 2 ) ;
2023-02-19 21:07:52 +00:00
return_block - > append < ReturnStatement const > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( return_expression ) ) ;
2021-09-22 10:44:56 +00:00
if ( m_state . strict_mode )
2023-02-19 21:07:52 +00:00
const_cast < FunctionBody & > ( * return_block ) . set_strict_mode ( ) ;
2023-07-07 21:14:03 +00:00
contains_direct_call_to_eval = m_state . current_scope_pusher - > contains_direct_call_to_eval ( ) ;
2020-03-30 13:26:09 +00:00
return return_block ;
}
// Invalid arrow function body
return nullptr ;
} ( ) ;
2021-07-11 23:29:07 +00:00
if ( function_body_result . is_null ( ) )
return nullptr ;
2023-07-04 22:14:41 +00:00
auto local_variables_names = function_body_result - > local_variables_names ( ) ;
2021-07-11 23:29:07 +00:00
state_rollback_guard . disarm ( ) ;
discard_saved_state ( ) ;
auto body = function_body_result . release_nonnull ( ) ;
2021-09-22 10:44:56 +00:00
if ( body - > in_strict_mode ( ) ) {
2021-07-11 23:29:07 +00:00
for ( auto & parameter : parameters ) {
parameter . binding . visit (
2023-07-06 15:49:38 +00:00
[ & ] ( Identifier const & identifier ) {
check_identifier_name_for_assignment_validity ( identifier . string ( ) , true ) ;
2021-07-11 23:29:07 +00:00
} ,
[ & ] ( auto const & ) { } ) ;
}
2020-03-30 13:26:09 +00:00
}
2022-01-18 23:46:16 +00:00
auto function_start_offset = rule_start . position ( ) . offset ;
auto function_end_offset = position ( ) . offset - m_state . current_token . trivia ( ) . length ( ) ;
2022-12-04 18:02:33 +00:00
auto source_text = DeprecatedString { m_state . lexer . source ( ) . substring_view ( function_start_offset , function_end_offset - function_start_offset ) } ;
2021-07-11 23:29:07 +00:00
return create_ast_node < FunctionExpression > (
2023-07-04 22:14:41 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } , nullptr , move ( source_text ) ,
2022-01-18 23:46:16 +00:00
move ( body ) , move ( parameters ) , function_length , function_kind , body - > in_strict_mode ( ) ,
2023-07-04 22:14:41 +00:00
/* might_need_arguments_object */ false , contains_direct_call_to_eval , move ( local_variables_names ) , /* is_arrow_function */ true ) ;
2020-03-30 13:26:09 +00:00
}
2023-02-19 21:07:52 +00:00
RefPtr < LabelledStatement const > Parser : : try_parse_labelled_statement ( AllowLabelledFunction allow_function )
2020-05-28 18:09:19 +00:00
{
2021-09-18 17:39:20 +00:00
{
// NOTE: This is a fast path where we try to fail early to avoid the expensive save_state+load_state.
2021-10-07 22:38:24 +00:00
if ( next_token ( ) . type ( ) ! = TokenType : : Colon )
2021-09-18 17:39:20 +00:00
return { } ;
}
2020-05-28 18:09:19 +00:00
save_state ( ) ;
2020-12-29 05:12:02 +00:00
auto rule_start = push_start ( ) ;
2020-05-28 18:09:19 +00:00
ArmedScopeGuard state_rollback_guard = [ & ] {
load_state ( ) ;
} ;
2021-08-21 09:27:20 +00:00
if ( m_state . current_token . value ( ) = = " yield " sv & & ( m_state . strict_mode | | m_state . in_generator_function_context ) ) {
2021-09-18 21:02:50 +00:00
return { } ;
}
2021-11-26 22:50:32 +00:00
if ( m_state . current_token . value ( ) = = " await " sv & & ( m_program_type = = Program : : Type : : Module | | m_state . await_expression_is_valid | | m_state . in_class_static_init_block ) ) {
2021-07-24 23:01:22 +00:00
return { } ;
}
2021-09-18 21:01:54 +00:00
auto identifier = [ & ] {
if ( m_state . current_token . value ( ) = = " await " sv ) {
return consume ( ) . value ( ) ;
}
return consume_identifier_reference ( ) . value ( ) ;
} ( ) ;
2020-05-28 18:09:19 +00:00
if ( ! match ( TokenType : : Colon ) )
return { } ;
consume ( TokenType : : Colon ) ;
if ( ! match_statement ( ) )
return { } ;
2021-07-24 23:01:22 +00:00
2021-09-18 21:01:54 +00:00
state_rollback_guard . disarm ( ) ;
discard_saved_state ( ) ;
if ( m_state . strict_mode & & identifier = = " let " sv ) {
syntax_error ( " Strict mode reserved word 'let' is not allowed in label " , rule_start . position ( ) ) ;
return { } ;
}
2021-07-24 23:01:22 +00:00
if ( match ( TokenType : : Function ) & & ( allow_function = = AllowLabelledFunction : : No | | m_state . strict_mode ) ) {
syntax_error ( " Not allowed to declare a function here " ) ;
return { } ;
}
if ( m_state . labels_in_scope . contains ( identifier ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Label '{}' has already been declared " , identifier ) ) ;
2021-07-24 23:01:22 +00:00
2023-02-19 21:07:52 +00:00
RefPtr < Statement const > labelled_item ;
2021-07-24 23:01:22 +00:00
2021-09-18 21:01:54 +00:00
auto is_iteration_statement = false ;
2021-07-24 23:01:22 +00:00
if ( match ( TokenType : : Function ) ) {
2021-09-18 21:01:54 +00:00
m_state . labels_in_scope . set ( identifier , { } ) ;
2021-07-24 23:01:22 +00:00
auto function_declaration = parse_function_node < FunctionDeclaration > ( ) ;
2021-09-22 10:44:56 +00:00
VERIFY ( m_state . current_scope_pusher ) ;
m_state . current_scope_pusher - > add_declaration ( function_declaration ) ;
2021-07-24 23:01:22 +00:00
if ( function_declaration - > kind ( ) = = FunctionKind : : Generator )
syntax_error ( " Generator functions cannot be defined in labelled statements " ) ;
2021-11-09 18:39:22 +00:00
if ( function_declaration - > kind ( ) = = FunctionKind : : Async )
syntax_error ( " Async functions cannot be defined in labelled statements " ) ;
2021-07-24 23:01:22 +00:00
LibJS: Replace the custom unwind mechanism with completions :^)
This includes:
- Parsing proper LabelledStatements with try_parse_labelled_statement()
- Removing LabelableStatement
- Implementing the LoopEvaluation semantics via loop_evaluation() in
each IterationStatement subclass; and IterationStatement evaluation
via {For,ForIn,ForOf,ForAwaitOf,While,DoWhile}Statement::execute()
- Updating ReturnStatement, BreakStatement and ContinueStatement to
return the appropriate completion types
- Basically reimplementing TryStatement and SwitchStatement according to
the spec, using completions
- Honoring result completion types in AsyncBlockStart and
OrdinaryCallEvaluateBody
- Removing any uses of the VM unwind mechanism - most importantly,
VM::throw_exception() now exclusively sets an exception and no longer
triggers any unwinding mechanism.
However, we already did a good job updating all of LibWeb and userland
applications to not use it, and the few remaining uses elsewhere don't
rely on unwinding AFAICT.
2022-01-05 18:11:16 +00:00
labelled_item = move ( function_declaration ) ;
2021-07-24 23:01:22 +00:00
} else {
2021-09-18 21:01:54 +00:00
m_state . labels_in_scope . set ( identifier , { } ) ;
LibJS: Replace the custom unwind mechanism with completions :^)
This includes:
- Parsing proper LabelledStatements with try_parse_labelled_statement()
- Removing LabelableStatement
- Implementing the LoopEvaluation semantics via loop_evaluation() in
each IterationStatement subclass; and IterationStatement evaluation
via {For,ForIn,ForOf,ForAwaitOf,While,DoWhile}Statement::execute()
- Updating ReturnStatement, BreakStatement and ContinueStatement to
return the appropriate completion types
- Basically reimplementing TryStatement and SwitchStatement according to
the spec, using completions
- Honoring result completion types in AsyncBlockStart and
OrdinaryCallEvaluateBody
- Removing any uses of the VM unwind mechanism - most importantly,
VM::throw_exception() now exclusively sets an exception and no longer
triggers any unwinding mechanism.
However, we already did a good job updating all of LibWeb and userland
applications to not use it, and the few remaining uses elsewhere don't
rely on unwinding AFAICT.
2022-01-05 18:11:16 +00:00
labelled_item = parse_statement ( allow_function ) ;
// Extract the innermost statement from a potentially nested chain of LabelledStatements.
auto statement = labelled_item ;
while ( is < LabelledStatement > ( * statement ) )
2023-02-19 21:07:52 +00:00
statement = static_cast < LabelledStatement const & > ( * statement ) . labelled_item ( ) ;
LibJS: Replace the custom unwind mechanism with completions :^)
This includes:
- Parsing proper LabelledStatements with try_parse_labelled_statement()
- Removing LabelableStatement
- Implementing the LoopEvaluation semantics via loop_evaluation() in
each IterationStatement subclass; and IterationStatement evaluation
via {For,ForIn,ForOf,ForAwaitOf,While,DoWhile}Statement::execute()
- Updating ReturnStatement, BreakStatement and ContinueStatement to
return the appropriate completion types
- Basically reimplementing TryStatement and SwitchStatement according to
the spec, using completions
- Honoring result completion types in AsyncBlockStart and
OrdinaryCallEvaluateBody
- Removing any uses of the VM unwind mechanism - most importantly,
VM::throw_exception() now exclusively sets an exception and no longer
triggers any unwinding mechanism.
However, we already did a good job updating all of LibWeb and userland
applications to not use it, and the few remaining uses elsewhere don't
rely on unwinding AFAICT.
2022-01-05 18:11:16 +00:00
if ( is < IterationStatement > ( * statement ) )
2021-09-18 21:01:54 +00:00
is_iteration_statement = true ;
}
if ( ! is_iteration_statement ) {
if ( auto entry = m_state . labels_in_scope . find ( identifier ) ; entry ! = m_state . labels_in_scope . end ( ) & & entry - > value . has_value ( ) )
syntax_error ( " labelled continue statement cannot use non iterating statement " , m_state . labels_in_scope . get ( identifier ) . value ( ) ) ;
2021-07-24 23:01:22 +00:00
}
2021-06-19 12:43:09 +00:00
m_state . labels_in_scope . remove ( identifier ) ;
2020-05-28 18:09:19 +00:00
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < LabelledStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , identifier , labelled_item . release_nonnull ( ) ) ;
2020-05-28 18:09:19 +00:00
}
2023-02-19 21:07:52 +00:00
RefPtr < MetaProperty const > Parser : : try_parse_new_target_expression ( )
2020-11-02 21:27:42 +00:00
{
2021-11-26 22:41:59 +00:00
// Optimization which skips the save/load state.
if ( next_token ( ) . type ( ) ! = TokenType : : Period )
return { } ;
2020-11-02 21:27:42 +00:00
save_state ( ) ;
2020-12-29 05:12:02 +00:00
auto rule_start = push_start ( ) ;
2020-11-02 21:27:42 +00:00
ArmedScopeGuard state_rollback_guard = [ & ] {
load_state ( ) ;
} ;
consume ( TokenType : : New ) ;
2021-11-26 22:41:59 +00:00
consume ( TokenType : : Period ) ;
2020-11-02 21:27:42 +00:00
if ( ! match ( TokenType : : Identifier ) )
return { } ;
2021-08-21 09:27:20 +00:00
// The string 'target' cannot have escapes so we check original value.
if ( consume ( ) . original_value ( ) ! = " target " sv )
2020-11-02 21:27:42 +00:00
return { } ;
state_rollback_guard . disarm ( ) ;
2020-12-29 13:17:39 +00:00
discard_saved_state ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < MetaProperty > ( { m_source_code , rule_start . position ( ) , position ( ) } , MetaProperty : : Type : : NewTarget ) ;
2020-11-02 21:27:42 +00:00
}
2023-02-19 21:07:52 +00:00
RefPtr < MetaProperty const > Parser : : try_parse_import_meta_expression ( )
2021-11-26 22:45:10 +00:00
{
// Optimization which skips the save/load state.
if ( next_token ( ) . type ( ) ! = TokenType : : Period )
return { } ;
save_state ( ) ;
auto rule_start = push_start ( ) ;
ArmedScopeGuard state_rollback_guard = [ & ] {
load_state ( ) ;
} ;
consume ( TokenType : : Import ) ;
consume ( TokenType : : Period ) ;
if ( ! match ( TokenType : : Identifier ) )
return { } ;
// The string 'meta' cannot have escapes so we check original value.
if ( consume ( ) . original_value ( ) ! = " meta " sv )
return { } ;
state_rollback_guard . disarm ( ) ;
discard_saved_state ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < MetaProperty > ( { m_source_code , rule_start . position ( ) , position ( ) } , MetaProperty : : Type : : ImportMeta ) ;
2021-11-26 22:45:10 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ImportCall const > Parser : : parse_import_call ( )
2021-11-26 22:45:10 +00:00
{
auto rule_start = push_start ( ) ;
// We use the extended definition:
// ImportCall[Yield, Await]:
// import(AssignmentExpression[+In, ?Yield, ?Await] ,opt)
// import(AssignmentExpression[+In, ?Yield, ?Await] ,AssignmentExpression[+In, ?Yield, ?Await] ,opt)
// From https://tc39.es/proposal-import-assertions/#sec-evaluate-import-call
consume ( TokenType : : Import ) ;
consume ( TokenType : : ParenOpen ) ;
auto argument = parse_expression ( 2 ) ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > options ;
2021-11-26 22:45:10 +00:00
if ( match ( TokenType : : Comma ) ) {
consume ( TokenType : : Comma ) ;
if ( ! match ( TokenType : : ParenClose ) ) {
options = parse_expression ( 2 ) ;
// Second optional comma
if ( match ( TokenType : : Comma ) )
consume ( TokenType : : Comma ) ;
}
}
consume ( TokenType : : ParenClose ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ImportCall > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( argument ) , move ( options ) ) ;
2021-11-26 22:45:10 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ClassDeclaration const > Parser : : parse_class_declaration ( )
2020-06-08 18:31:21 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ClassDeclaration > ( { m_source_code , rule_start . position ( ) , position ( ) } , parse_class_expression ( true ) ) ;
2020-06-08 18:31:21 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ClassExpression const > Parser : : parse_class_expression ( bool expect_class_name )
2020-06-08 18:31:21 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-06-08 18:31:21 +00:00
// Classes are always in strict mode.
2021-06-19 12:43:09 +00:00
TemporaryChange strict_mode_rollback ( m_state . strict_mode , true ) ;
2020-06-08 18:31:21 +00:00
consume ( TokenType : : Class ) ;
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < ClassElement const > > elements ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > super_class ;
RefPtr < FunctionExpression const > constructor ;
2023-01-09 00:23:00 +00:00
HashTable < DeprecatedFlyString > found_private_names ;
2020-06-08 18:31:21 +00:00
2023-07-04 22:14:41 +00:00
RefPtr < Identifier const > class_name ;
2023-06-30 13:51:39 +00:00
if ( expect_class_name | | match_identifier ( ) | | match ( TokenType : : Yield ) | | match ( TokenType : : Await ) ) {
2023-07-04 22:14:41 +00:00
class_name = create_identifier_and_register_in_current_scope ( { m_source_code , rule_start . position ( ) , position ( ) } , consume_identifier_reference ( ) . DeprecatedFlyString_value ( ) ) ;
2023-06-30 13:51:39 +00:00
}
2020-06-08 18:31:21 +00:00
2023-07-04 22:14:41 +00:00
ScopePusher class_declaration_scope = ScopePusher : : class_declaration_scope ( * this , class_name ) ;
2023-06-30 13:51:39 +00:00
if ( class_name )
check_identifier_name_for_assignment_validity ( class_name - > string ( ) , true ) ;
if ( m_state . in_class_static_init_block & & class_name & & class_name - > string ( ) = = " await " sv )
2021-11-26 22:29:05 +00:00
syntax_error ( " Identifier must not be a reserved word in modules ('await') " ) ;
2020-06-08 18:31:21 +00:00
if ( match ( TokenType : : Extends ) ) {
consume ( ) ;
2021-06-14 11:16:41 +00:00
auto [ expression , should_continue_parsing ] = parse_primary_expression ( ) ;
2021-07-17 22:19:03 +00:00
// Basically a (much) simplified parse_secondary_expression().
for ( ; ; ) {
if ( match ( TokenType : : TemplateLiteralStart ) ) {
auto template_literal = parse_template_literal ( true ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
expression = create_ast_node < TaggedTemplateLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( expression ) , move ( template_literal ) ) ;
2021-07-17 22:19:03 +00:00
continue ;
}
if ( match ( TokenType : : BracketOpen ) | | match ( TokenType : : Period ) | | match ( TokenType : : ParenOpen ) ) {
auto precedence = g_operator_precedence . get ( m_state . current_token . type ( ) ) ;
2022-02-16 06:34:59 +00:00
expression = parse_secondary_expression ( move ( expression ) , precedence ) . expression ;
2021-07-17 22:19:03 +00:00
continue ;
}
break ;
}
2021-06-14 11:16:41 +00:00
super_class = move ( expression ) ;
( void ) should_continue_parsing ;
2020-06-08 18:31:21 +00:00
}
consume ( TokenType : : CurlyOpen ) ;
2021-10-12 20:45:52 +00:00
HashTable < StringView > referenced_private_names ;
HashTable < StringView > * outer_referenced_private_names = m_state . referenced_private_names ;
m_state . referenced_private_names = & referenced_private_names ;
ScopeGuard restore_private_name_table = [ & ] {
m_state . referenced_private_names = outer_referenced_private_names ;
} ;
2020-06-08 18:31:21 +00:00
while ( ! done ( ) & & ! match ( TokenType : : CurlyClose ) ) {
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > property_key ;
2020-06-08 18:31:21 +00:00
bool is_static = false ;
bool is_constructor = false ;
2021-07-02 10:07:00 +00:00
bool is_generator = false ;
2021-11-09 18:39:22 +00:00
bool is_async = false ;
2020-06-08 18:31:21 +00:00
auto method_kind = ClassMethod : : Kind : : Method ;
if ( match ( TokenType : : Semicolon ) ) {
consume ( ) ;
continue ;
}
2022-01-18 23:46:16 +00:00
auto function_start = position ( ) ;
2021-11-09 18:39:22 +00:00
if ( match ( TokenType : : Async ) ) {
2021-11-26 22:30:29 +00:00
auto lookahead_token = next_token ( ) ;
2022-02-17 17:03:41 +00:00
// If async is followed by a Semicolon or CurlyClose it is a field (CurlyClose indicates end of class)
// Otherwise if it is followed by a ParenOpen it is a function named async
if ( lookahead_token . type ( ) ! = TokenType : : Semicolon & & lookahead_token . type ( ) ! = TokenType : : CurlyClose & & lookahead_token . type ( ) ! = TokenType : : ParenOpen
2021-11-26 22:30:29 +00:00
& & ! lookahead_token . trivia_contains_line_terminator ( ) ) {
consume ( ) ;
is_async = true ;
}
2021-11-09 18:39:22 +00:00
}
2021-07-02 10:07:00 +00:00
if ( match ( TokenType : : Asterisk ) ) {
consume ( ) ;
is_generator = true ;
}
2021-08-28 15:11:05 +00:00
StringView name ;
2021-10-12 20:45:52 +00:00
if ( match_property_key ( ) | | match ( TokenType : : PrivateIdentifier ) ) {
2021-11-09 18:39:22 +00:00
if ( ! is_generator & & ! is_async & & m_state . current_token . original_value ( ) = = " static " sv ) {
2021-07-02 10:07:00 +00:00
if ( match ( TokenType : : Identifier ) ) {
consume ( ) ;
is_static = true ;
2022-01-18 23:46:16 +00:00
function_start = position ( ) ;
2021-11-09 18:39:22 +00:00
if ( match ( TokenType : : Async ) ) {
consume ( ) ;
is_async = true ;
}
2021-07-02 10:07:00 +00:00
if ( match ( TokenType : : Asterisk ) ) {
consume ( ) ;
is_generator = true ;
}
}
2020-06-08 18:31:21 +00:00
}
if ( match ( TokenType : : Identifier ) ) {
2021-08-28 15:11:05 +00:00
auto identifier_name = m_state . current_token . original_value ( ) ;
2020-06-08 18:31:21 +00:00
2021-08-28 15:11:05 +00:00
if ( identifier_name = = " get " sv ) {
2020-06-08 18:31:21 +00:00
method_kind = ClassMethod : : Kind : : Getter ;
consume ( ) ;
2021-08-28 15:11:05 +00:00
} else if ( identifier_name = = " set " sv ) {
2020-06-08 18:31:21 +00:00
method_kind = ClassMethod : : Kind : : Setter ;
consume ( ) ;
}
}
2021-10-12 20:45:52 +00:00
if ( match_property_key ( ) | | match ( TokenType : : PrivateIdentifier ) ) {
2021-06-19 12:43:09 +00:00
switch ( m_state . current_token . type ( ) ) {
2020-06-08 18:31:21 +00:00
case TokenType : : Identifier :
name = consume ( ) . value ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
property_key = create_ast_node < StringLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , name ) ;
2020-06-08 18:31:21 +00:00
break ;
2021-10-12 20:45:52 +00:00
case TokenType : : PrivateIdentifier :
name = consume ( ) . value ( ) ;
if ( name = = " #constructor " )
syntax_error ( " Private property with name '#constructor' is not allowed " ) ;
if ( method_kind ! = ClassMethod : : Kind : : Method ) {
// It is a Syntax Error if PrivateBoundIdentifiers of ClassElementList contains any duplicate entries,
// unless the name is used once for a getter and once for a setter and in no other entries,
// and the getter and setter are either both static or both non-static.
for ( auto & element : elements ) {
2023-03-06 13:17:01 +00:00
auto private_name = element - > private_bound_identifier ( ) ;
2021-10-12 20:45:52 +00:00
if ( ! private_name . has_value ( ) | | private_name . value ( ) ! = name )
continue ;
2023-03-06 13:17:01 +00:00
if ( element - > class_element_kind ( ) ! = ClassElement : : ElementKind : : Method
| | element - > is_static ( ) ! = is_static ) {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Duplicate private field or method named '{}' " , name ) ) ;
2021-10-12 20:45:52 +00:00
break ;
}
2023-03-06 13:17:01 +00:00
VERIFY ( is < ClassMethod > ( * element ) ) ;
auto & class_method_element = static_cast < ClassMethod const & > ( * element ) ;
2021-10-12 20:45:52 +00:00
if ( class_method_element . kind ( ) = = ClassMethod : : Kind : : Method | | class_method_element . kind ( ) = = method_kind ) {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Duplicate private field or method named '{}' " , name ) ) ;
2021-10-12 20:45:52 +00:00
break ;
}
}
found_private_names . set ( name ) ;
} else if ( found_private_names . set ( name ) ! = AK : : HashSetResult : : InsertedNewEntry ) {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Duplicate private field or method named '{}' " , name ) ) ;
2021-10-12 20:45:52 +00:00
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
property_key = create_ast_node < PrivateIdentifier > ( { m_source_code , rule_start . position ( ) , position ( ) } , name ) ;
2021-10-12 20:45:52 +00:00
break ;
2020-06-08 18:31:21 +00:00
case TokenType : : StringLiteral : {
auto string_literal = parse_string_literal ( consume ( ) ) ;
name = string_literal - > value ( ) ;
property_key = move ( string_literal ) ;
break ;
}
default :
property_key = parse_property_key ( ) ;
break ;
}
LibJS: Add an optimization to avoid needless arguments object creation
This gives FunctionNode a "might need arguments object" boolean flag and
sets it based on the simplest possible heuristic for this: if we
encounter an identifier called "arguments" or "eval" up to the next
(nested) function declaration or expression, we won't need an arguments
object. Otherwise, we *might* need one - the final decision is made in
the FunctionDeclarationInstantiation AO.
Now, this is obviously not perfect. Even if you avoid eval, something
like `foo.arguments` will still trigger a false positive - but it's a
start and already massively cuts down on needlessly allocated objects,
especially in real-world code that is often minified, and so a full
"arguments" identifier will be an actual arguments object more often
than not.
To illustrate the actual impact of this change, here's the number of
allocated arguments objects during a full test-js run:
Before:
- Unmapped arguments objects: 78765
- Mapped arguments objects: 2455
After:
- Unmapped arguments objects: 18
- Mapped arguments objects: 37
This results in a ~5% speedup of test-js on my Linux host machine, and
about 3.5% on i686 Serenity in QEMU (warm runs, average of 5).
The following microbenchmark (calling an empty function 1M times) runs
25% faster on Linux and 45% on Serenity:
function foo() {}
for (var i = 0; i < 1_000_000; ++i)
foo();
test262 reports no changes in either direction, apart from a speedup :^)
2021-10-05 07:44:58 +00:00
// https://tc39.es/ecma262/#sec-class-definitions-static-semantics-early-errors
2021-07-11 23:30:04 +00:00
// ClassElement : static MethodDefinition
// It is a Syntax Error if PropName of MethodDefinition is "prototype".
if ( is_static & & name = = " prototype " sv )
syntax_error ( " Classes may not have a static property named 'prototype' " ) ;
2021-11-26 22:30:29 +00:00
} else if ( ( match ( TokenType : : ParenOpen ) | | match ( TokenType : : Equals ) | | match ( TokenType : : Semicolon ) | | match ( TokenType : : CurlyClose ) ) & & ( is_static | | is_async | | method_kind ! = ClassMethod : : Kind : : Method ) ) {
2021-07-25 09:50:12 +00:00
switch ( method_kind ) {
case ClassMethod : : Kind : : Method :
2021-11-09 18:39:22 +00:00
if ( is_async ) {
2022-07-11 17:32:29 +00:00
name = " async " sv ;
2021-11-09 18:39:22 +00:00
is_async = false ;
} else {
VERIFY ( is_static ) ;
2022-07-11 17:32:29 +00:00
name = " static " sv ;
2021-11-09 18:39:22 +00:00
is_static = false ;
}
2021-07-25 09:50:12 +00:00
break ;
case ClassMethod : : Kind : : Getter :
2022-07-11 17:32:29 +00:00
name = " get " sv ;
2021-07-25 09:50:12 +00:00
method_kind = ClassMethod : : Kind : : Method ;
break ;
case ClassMethod : : Kind : : Setter :
2022-07-11 17:32:29 +00:00
name = " set " sv ;
2021-07-25 09:50:12 +00:00
method_kind = ClassMethod : : Kind : : Method ;
break ;
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
property_key = create_ast_node < StringLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , name ) ;
2021-10-20 19:29:47 +00:00
} else if ( match ( TokenType : : CurlyOpen ) & & is_static ) {
auto static_start = push_start ( ) ;
consume ( TokenType : : CurlyOpen ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto static_init_block = create_ast_node < FunctionBody > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2021-10-20 19:29:47 +00:00
TemporaryChange break_context_rollback ( m_state . in_break_context , false ) ;
TemporaryChange continue_context_rollback ( m_state . in_continue_context , false ) ;
TemporaryChange function_context_rollback ( m_state . in_function_context , false ) ;
TemporaryChange generator_function_context_rollback ( m_state . in_generator_function_context , false ) ;
2021-11-26 22:50:32 +00:00
TemporaryChange async_function_context_rollback ( m_state . await_expression_is_valid , false ) ;
2021-11-09 20:52:21 +00:00
TemporaryChange class_field_initializer_rollback ( m_state . in_class_field_initializer , true ) ;
TemporaryChange class_static_init_block_rollback ( m_state . in_class_static_init_block , true ) ;
2021-12-19 01:06:22 +00:00
TemporaryChange super_property_access_rollback ( m_state . allow_super_property_lookup , true ) ;
2021-10-20 19:29:47 +00:00
ScopePusher static_init_scope = ScopePusher : : static_init_block_scope ( * this , * static_init_block ) ;
parse_statement_list ( static_init_block ) ;
consume ( TokenType : : CurlyClose ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
elements . append ( create_ast_node < StaticInitializer > ( { m_source_code , static_start . position ( ) , position ( ) } , move ( static_init_block ) , static_init_scope . contains_direct_call_to_eval ( ) ) ) ;
2021-10-20 19:29:47 +00:00
continue ;
2020-06-08 18:31:21 +00:00
} else {
expected ( " property key " ) ;
}
// Constructor may be a StringLiteral or an Identifier.
2021-08-28 15:11:05 +00:00
if ( ! is_static & & name = = " constructor " sv ) {
2020-06-08 18:31:21 +00:00
if ( method_kind ! = ClassMethod : : Kind : : Method )
syntax_error ( " Class constructor may not be an accessor " ) ;
if ( ! constructor . is_null ( ) )
syntax_error ( " Classes may not have more than one constructor " ) ;
2021-07-02 10:07:00 +00:00
if ( is_generator )
syntax_error ( " Class constructor may not be a generator " ) ;
2021-11-09 18:39:22 +00:00
if ( is_async )
syntax_error ( " Class constructor may not be async " ) ;
2020-06-08 18:31:21 +00:00
is_constructor = true ;
}
}
if ( match ( TokenType : : ParenOpen ) ) {
2020-10-20 16:56:49 +00:00
u8 parse_options = FunctionNodeParseOptions : : AllowSuperPropertyLookup ;
2021-07-25 09:50:12 +00:00
if ( ! super_class . is_null ( ) & & ! is_static & & is_constructor )
2020-10-20 16:56:49 +00:00
parse_options | = FunctionNodeParseOptions : : AllowSuperConstructorCall ;
2020-10-20 17:32:51 +00:00
if ( method_kind = = ClassMethod : : Kind : : Getter )
parse_options | = FunctionNodeParseOptions : : IsGetterFunction ;
if ( method_kind = = ClassMethod : : Kind : : Setter )
parse_options | = FunctionNodeParseOptions : : IsSetterFunction ;
2021-07-02 10:07:00 +00:00
if ( is_generator )
parse_options | = FunctionNodeParseOptions : : IsGeneratorFunction ;
2021-11-09 18:39:22 +00:00
if ( is_async )
parse_options | = FunctionNodeParseOptions : : IsAsyncFunction ;
2022-01-18 23:46:16 +00:00
auto function = parse_function_node < FunctionExpression > ( parse_options , function_start ) ;
2020-06-08 18:31:21 +00:00
if ( is_constructor ) {
constructor = move ( function ) ;
} else if ( ! property_key . is_null ( ) ) {
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
elements . append ( create_ast_node < ClassMethod > ( { m_source_code , rule_start . position ( ) , position ( ) } , property_key . release_nonnull ( ) , move ( function ) , method_kind , is_static ) ) ;
2020-06-08 18:31:21 +00:00
} else {
syntax_error ( " No key for class method " ) ;
}
2021-11-09 18:39:22 +00:00
} else if ( is_generator | | is_async ) {
2020-06-08 18:31:21 +00:00
expected ( " ParenOpen " ) ;
consume ( ) ;
2021-08-28 15:11:05 +00:00
} else if ( property_key . is_null ( ) ) {
expected ( " property key " ) ;
consume ( ) ;
} else {
if ( name = = " constructor " sv )
syntax_error ( " Class cannot have field named 'constructor' " ) ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > initializer ;
2021-10-13 17:59:38 +00:00
bool contains_direct_call_to_eval = false ;
2021-08-28 15:11:05 +00:00
if ( match ( TokenType : : Equals ) ) {
consume ( ) ;
TemporaryChange super_property_access_rollback ( m_state . allow_super_property_lookup , true ) ;
TemporaryChange field_initializer_rollback ( m_state . in_class_field_initializer , true ) ;
2021-10-13 17:59:38 +00:00
2023-07-04 22:14:41 +00:00
auto class_scope_node = create_ast_node < BlockStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
auto class_field_scope = ScopePusher : : class_field_scope ( * this , * class_scope_node ) ;
2021-08-28 15:11:05 +00:00
initializer = parse_expression ( 2 ) ;
2021-10-13 17:59:38 +00:00
contains_direct_call_to_eval = class_field_scope . contains_direct_call_to_eval ( ) ;
2021-08-28 15:11:05 +00:00
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
elements . append ( create_ast_node < ClassField > ( { m_source_code , rule_start . position ( ) , position ( ) } , property_key . release_nonnull ( ) , move ( initializer ) , contains_direct_call_to_eval , is_static ) ) ;
2021-08-28 15:11:05 +00:00
consume_or_insert_semicolon ( ) ;
2020-06-08 18:31:21 +00:00
}
}
consume ( TokenType : : CurlyClose ) ;
if ( constructor . is_null ( ) ) {
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto constructor_body = create_ast_node < BlockStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2020-06-08 18:31:21 +00:00
if ( ! super_class . is_null ( ) ) {
// Set constructor to the result of parsing the source text
// constructor(... args){ super (...args);}
2022-08-20 15:27:02 +00:00
// However: The most notable distinction is that while the aforementioned ECMAScript
// source text observably calls the @@iterator method on %Array.prototype%,
// this function does not.
// So we use a custom version of SuperCall which doesn't use the @@iterator
// method on %Array.prototype% visibly.
2023-07-06 15:49:38 +00:00
auto argument_name = create_ast_node < Identifier const > ( { m_source_code , rule_start . position ( ) , position ( ) } , " args " ) ;
2021-07-02 17:30:38 +00:00
auto super_call = create_ast_node < SuperCall > (
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
2022-08-20 15:27:02 +00:00
SuperCall : : IsPartOfSyntheticConstructor : : Yes ,
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
CallExpression : : Argument { create_ast_node < Identifier > ( { m_source_code , rule_start . position ( ) , position ( ) } , " args " ) , true } ) ;
LibJS: Replace the custom unwind mechanism with completions :^)
This includes:
- Parsing proper LabelledStatements with try_parse_labelled_statement()
- Removing LabelableStatement
- Implementing the LoopEvaluation semantics via loop_evaluation() in
each IterationStatement subclass; and IterationStatement evaluation
via {For,ForIn,ForOf,ForAwaitOf,While,DoWhile}Statement::execute()
- Updating ReturnStatement, BreakStatement and ContinueStatement to
return the appropriate completion types
- Basically reimplementing TryStatement and SwitchStatement according to
the spec, using completions
- Honoring result completion types in AsyncBlockStart and
OrdinaryCallEvaluateBody
- Removing any uses of the VM unwind mechanism - most importantly,
VM::throw_exception() now exclusively sets an exception and no longer
triggers any unwinding mechanism.
However, we already did a good job updating all of LibWeb and userland
applications to not use it, and the few remaining uses elsewhere don't
rely on unwinding AFAICT.
2022-01-05 18:11:16 +00:00
// NOTE: While the JS approximation above doesn't do `return super(...args)`, the
// abstract closure is expected to capture and return the result, so we do need a
// return statement here to create the correct completion.
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
constructor_body - > append ( create_ast_node < ReturnStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( super_call ) ) ) ;
2020-06-08 18:31:21 +00:00
2021-06-10 23:08:05 +00:00
constructor = create_ast_node < FunctionExpression > (
2023-07-04 22:14:41 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } , class_name , " " ,
2022-11-23 12:12:36 +00:00
move ( constructor_body ) , Vector { FunctionParameter { move ( argument_name ) , nullptr , true } } , 0 , FunctionKind : : Normal ,
2023-07-04 22:14:41 +00:00
/* is_strict_mode */ true , /* might_need_arguments_object */ false , /* contains_direct_call_to_eval */ false , /* local_variables_names */ Vector < DeprecatedFlyString > { } ) ;
2020-06-08 18:31:21 +00:00
} else {
2021-06-10 23:08:05 +00:00
constructor = create_ast_node < FunctionExpression > (
2023-07-04 22:14:41 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } , class_name , " " ,
2022-11-23 12:12:36 +00:00
move ( constructor_body ) , Vector < FunctionParameter > { } , 0 , FunctionKind : : Normal ,
2023-07-04 22:14:41 +00:00
/* is_strict_mode */ true , /* might_need_arguments_object */ false , /* contains_direct_call_to_eval */ false , /* local_variables_names */ Vector < DeprecatedFlyString > { } ) ;
2020-06-08 18:31:21 +00:00
}
}
2021-10-12 20:45:52 +00:00
// We could be in a subclass defined within the main class so must move all non declared private names to outer.
for ( auto & private_name : referenced_private_names ) {
if ( found_private_names . contains ( private_name ) )
continue ;
if ( outer_referenced_private_names )
outer_referenced_private_names - > set ( private_name ) ;
else // FIXME: Make these error appear in the appropriate places.
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Reference to undeclared private field or method '{}' " , private_name ) ) ;
2021-10-12 20:45:52 +00:00
}
2022-01-18 23:46:16 +00:00
auto function_start_offset = rule_start . position ( ) . offset ;
auto function_end_offset = position ( ) . offset - m_state . current_token . trivia ( ) . length ( ) ;
2022-12-04 18:02:33 +00:00
auto source_text = DeprecatedString { m_state . lexer . source ( ) . substring_view ( function_start_offset , function_end_offset - function_start_offset ) } ;
2022-01-18 23:46:16 +00:00
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ClassExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( class_name ) , move ( source_text ) , move ( constructor ) , move ( super_class ) , move ( elements ) ) ;
2020-06-08 18:31:21 +00:00
}
2021-06-14 11:16:41 +00:00
Parser : : PrimaryExpressionParseResult Parser : : parse_primary_expression ( )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-25 08:51:54 +00:00
if ( match_unary_prefixed_expression ( ) )
2021-06-14 11:16:41 +00:00
return { parse_unary_prefixed_expression ( ) } ;
2020-03-25 08:51:54 +00:00
2023-02-19 21:07:52 +00:00
auto try_arrow_function_parse_or_fail = [ this ] ( Position const & position , bool expect_paren , bool is_async = false ) - > RefPtr < FunctionExpression const > {
2021-11-14 23:47:16 +00:00
if ( try_parse_arrow_function_expression_failed_at_position ( position ) )
return nullptr ;
auto arrow_function = try_parse_arrow_function_expression ( expect_paren , is_async ) ;
if ( arrow_function )
return arrow_function ;
set_try_parse_arrow_function_expression_failed_at_position ( position , true ) ;
return nullptr ;
} ;
2021-06-19 12:43:09 +00:00
switch ( m_state . current_token . type ( ) ) {
2020-03-11 18:27:43 +00:00
case TokenType : : ParenOpen : {
2021-04-11 20:41:51 +00:00
auto paren_position = position ( ) ;
2020-03-11 18:27:43 +00:00
consume ( TokenType : : ParenOpen ) ;
2021-11-14 23:47:16 +00:00
if ( ( match ( TokenType : : ParenClose ) | | match_identifier ( ) | | match ( TokenType : : TripleDot ) | | match ( TokenType : : CurlyOpen ) | | match ( TokenType : : BracketOpen ) ) ) {
if ( auto arrow_function_result = try_arrow_function_parse_or_fail ( paren_position , true ) )
2021-07-28 15:11:33 +00:00
return { arrow_function_result . release_nonnull ( ) , false } ;
2020-03-30 13:26:09 +00:00
}
2020-03-12 22:02:41 +00:00
auto expression = parse_expression ( 0 ) ;
2020-03-11 18:27:43 +00:00
consume ( TokenType : : ParenClose ) ;
2023-06-15 09:43:48 +00:00
if ( is < NewExpression > ( * expression ) ) {
auto & new_expression = static_cast < NewExpression & > ( * static_cast < NonnullRefPtr < Expression > > ( expression ) ) ;
new_expression . set_inside_parens ( ) ;
} else if ( is < FunctionExpression > ( * expression ) ) {
2023-02-19 21:07:52 +00:00
auto & function = static_cast < FunctionExpression const & > ( * expression ) ;
2021-07-28 23:45:55 +00:00
if ( function . kind ( ) = = FunctionKind : : Generator & & function . name ( ) = = " yield " sv )
syntax_error ( " function is not allowed to be called 'yield' in this context " , function . source_range ( ) . start ) ;
2021-11-09 18:39:22 +00:00
if ( function . kind ( ) = = FunctionKind : : Async & & function . name ( ) = = " await " sv )
syntax_error ( " function is not allowed to be called 'await' in this context " , function . source_range ( ) . start ) ;
2021-03-22 11:44:07 +00:00
}
2021-06-14 11:16:41 +00:00
return { move ( expression ) } ;
2020-03-11 18:27:43 +00:00
}
2020-04-12 22:42:14 +00:00
case TokenType : : This :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return { create_ast_node < ThisExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } ) } ;
2020-06-08 18:31:21 +00:00
case TokenType : : Class :
2021-06-14 11:16:41 +00:00
return { parse_class_expression ( false ) } ;
2020-06-08 18:31:21 +00:00
case TokenType : : Super :
consume ( ) ;
2021-06-19 12:43:09 +00:00
if ( ! m_state . allow_super_property_lookup )
2020-06-08 18:31:21 +00:00
syntax_error ( " 'super' keyword unexpected here " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return { create_ast_node < SuperExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } ) } ;
2021-08-21 09:27:20 +00:00
case TokenType : : EscapedKeyword :
2021-11-26 22:25:10 +00:00
if ( match_invalid_escaped_keyword ( ) )
2021-08-28 15:04:37 +00:00
syntax_error ( " Keyword must not contain escaped characters " ) ;
2021-08-21 09:27:20 +00:00
[[fallthrough]] ;
2020-03-30 13:26:09 +00:00
case TokenType : : Identifier : {
2021-06-10 21:08:30 +00:00
read_as_identifier : ;
2021-11-14 23:47:16 +00:00
if ( auto arrow_function_result = try_arrow_function_parse_or_fail ( position ( ) , false ) )
return { arrow_function_result . release_nonnull ( ) , false } ;
2021-04-11 20:41:51 +00:00
2021-08-28 15:04:37 +00:00
auto string = m_state . current_token . value ( ) ;
2021-07-11 23:27:35 +00:00
// This could be 'eval' or 'arguments' and thus needs a custom check (`eval[1] = true`)
if ( m_state . strict_mode & & ( string = = " let " | | is_strict_reserved_word ( string ) ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Identifier must not be a reserved word in strict mode ('{}') " , string ) ) ;
2021-08-28 15:04:37 +00:00
return { parse_identifier ( ) } ;
2020-03-30 13:26:09 +00:00
}
2020-03-11 18:27:43 +00:00
case TokenType : : NumericLiteral :
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return { create_ast_node < NumericLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume_and_validate_numeric_literal ( ) . double_value ( ) ) } ;
2020-06-06 00:14:10 +00:00
case TokenType : : BigIntLiteral :
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return { create_ast_node < BigIntLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume ( ) . value ( ) ) } ;
2020-03-11 18:27:43 +00:00
case TokenType : : BoolLiteral :
2023-05-27 22:08:52 +00:00
return { create_ast_node < BooleanLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume_and_allow_division ( ) . bool_value ( ) ) } ;
2020-03-12 12:05:06 +00:00
case TokenType : : StringLiteral :
2021-06-14 11:16:41 +00:00
return { parse_string_literal ( consume ( ) ) } ;
2020-03-15 21:32:34 +00:00
case TokenType : : NullLiteral :
2023-05-27 22:08:52 +00:00
consume_and_allow_division ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return { create_ast_node < NullLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } ) } ;
2020-03-11 18:27:43 +00:00
case TokenType : : CurlyOpen :
2021-06-14 11:16:41 +00:00
return { parse_object_expression ( ) } ;
2021-11-14 23:47:16 +00:00
case TokenType : : Async : {
auto lookahead_token = next_token ( ) ;
// No valid async function (arrow or not) can have a line terminator after the async since asi would kick in.
if ( lookahead_token . trivia_contains_line_terminator ( ) )
2021-11-09 18:39:22 +00:00
goto read_as_identifier ;
2021-11-14 23:47:16 +00:00
if ( lookahead_token . type ( ) = = TokenType : : Function )
return { parse_function_node < FunctionExpression > ( ) } ;
if ( lookahead_token . type ( ) = = TokenType : : ParenOpen ) {
if ( auto arrow_function_result = try_arrow_function_parse_or_fail ( position ( ) , true , true ) )
return { arrow_function_result . release_nonnull ( ) , false } ;
} else if ( lookahead_token . is_identifier_name ( ) ) {
if ( auto arrow_function_result = try_arrow_function_parse_or_fail ( position ( ) , false , true ) )
return { arrow_function_result . release_nonnull ( ) , false } ;
}
goto read_as_identifier ;
}
2020-03-19 10:52:56 +00:00
case TokenType : : Function :
2021-06-14 11:16:41 +00:00
return { parse_function_node < FunctionExpression > ( ) } ;
2020-03-20 19:29:57 +00:00
case TokenType : : BracketOpen :
2021-06-14 11:16:41 +00:00
return { parse_array_expression ( ) } ;
2020-06-03 23:05:49 +00:00
case TokenType : : RegexLiteral :
2021-06-14 11:16:41 +00:00
return { parse_regexp_literal ( ) } ;
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
case TokenType : : TemplateLiteralStart :
2021-06-14 11:16:41 +00:00
return { parse_template_literal ( false ) } ;
2020-11-02 21:27:42 +00:00
case TokenType : : New : {
auto new_start = position ( ) ;
auto new_target_result = try_parse_new_target_expression ( ) ;
if ( ! new_target_result . is_null ( ) ) {
2022-04-09 23:55:45 +00:00
if ( ! m_state . in_function_context & & ! m_state . in_eval_function_context & & ! m_state . in_class_static_init_block )
2020-11-02 21:27:42 +00:00
syntax_error ( " 'new.target' not allowed outside of a function " , new_start ) ;
2021-06-14 11:16:41 +00:00
return { new_target_result . release_nonnull ( ) } ;
2020-11-02 21:27:42 +00:00
}
2021-06-14 11:16:41 +00:00
return { parse_new_expression ( ) } ;
2020-11-02 21:27:42 +00:00
}
2021-11-26 22:45:10 +00:00
case TokenType : : Import : {
auto lookahead_token = next_token ( ) ;
if ( lookahead_token . type ( ) = = TokenType : : ParenOpen )
return { parse_import_call ( ) } ;
2021-12-29 11:15:29 +00:00
if ( lookahead_token . type ( ) = = TokenType : : Period ) {
if ( auto import_meta = try_parse_import_meta_expression ( ) ) {
if ( m_program_type ! = Program : : Type : : Module )
syntax_error ( " import.meta is only allowed in modules " ) ;
return { import_meta . release_nonnull ( ) } ;
}
} else {
consume ( ) ;
expected ( " import.meta or import call " ) ;
2021-11-26 22:45:10 +00:00
}
break ;
}
2021-06-10 21:08:30 +00:00
case TokenType : : Yield :
2021-06-19 12:43:09 +00:00
if ( ! m_state . in_generator_function_context )
2021-06-10 21:08:30 +00:00
goto read_as_identifier ;
2021-06-14 11:16:41 +00:00
return { parse_yield_expression ( ) , false } ;
2021-11-09 20:52:21 +00:00
case TokenType : : Await :
2021-11-26 22:50:32 +00:00
if ( ! m_state . await_expression_is_valid )
2021-11-09 20:52:21 +00:00
goto read_as_identifier ;
return { parse_await_expression ( ) } ;
2021-10-14 00:05:24 +00:00
case TokenType : : PrivateIdentifier :
if ( ! is_private_identifier_valid ( ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Reference to undeclared private field or method '{}' " , m_state . current_token . value ( ) ) ) ;
2022-11-17 09:26:27 +00:00
if ( next_token ( ) . type ( ) ! = TokenType : : In )
syntax_error ( " Cannot have a private identifier in expression if not followed by 'in' " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return { create_ast_node < PrivateIdentifier > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume ( ) . value ( ) ) } ;
2020-03-11 18:27:43 +00:00
default :
2021-07-11 11:04:55 +00:00
if ( match_identifier_name ( ) )
goto read_as_identifier ;
2021-11-26 22:45:10 +00:00
break ;
2020-03-11 18:27:43 +00:00
}
2021-11-26 22:45:10 +00:00
expected ( " primary expression " ) ;
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return { create_ast_node < ErrorExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } ) } ;
2020-03-11 18:27:43 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < RegExpLiteral const > Parser : : parse_regexp_literal ( )
2020-06-03 23:05:49 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-05-10 10:56:08 +00:00
auto pattern = consume ( ) . value ( ) ;
// Remove leading and trailing slash.
pattern = pattern . substring_view ( 1 , pattern . length ( ) - 2 ) ;
2021-07-29 14:34:37 +00:00
2022-12-04 18:02:33 +00:00
auto flags = DeprecatedString : : empty ( ) ;
2021-07-29 14:34:37 +00:00
auto parsed_flags = RegExpObject : : default_flags ;
2021-05-10 11:01:38 +00:00
if ( match ( TokenType : : RegexFlags ) ) {
auto flags_start = position ( ) ;
flags = consume ( ) . value ( ) ;
2021-07-29 14:34:37 +00:00
auto parsed_flags_or_error = regex_flags_from_string ( flags ) ;
if ( parsed_flags_or_error . is_error ( ) )
syntax_error ( parsed_flags_or_error . release_error ( ) , flags_start ) ;
else
parsed_flags = parsed_flags_or_error . release_value ( ) ;
2021-05-10 11:01:38 +00:00
}
2021-07-29 14:34:37 +00:00
2022-12-04 18:02:33 +00:00
DeprecatedString parsed_pattern ;
2022-07-16 05:44:03 +00:00
auto parsed_pattern_result = parse_regex_pattern ( pattern , parsed_flags . has_flag_set ( ECMAScriptFlags : : Unicode ) , parsed_flags . has_flag_set ( ECMAScriptFlags : : UnicodeSets ) ) ;
if ( parsed_pattern_result . is_error ( ) ) {
syntax_error ( parsed_pattern_result . release_error ( ) . error , rule_start . position ( ) ) ;
2022-12-04 18:02:33 +00:00
parsed_pattern = DeprecatedString : : empty ( ) ;
2022-07-16 05:44:03 +00:00
} else {
parsed_pattern = parsed_pattern_result . release_value ( ) ;
}
2021-07-29 14:34:37 +00:00
auto parsed_regex = Regex < ECMA262 > : : parse_pattern ( parsed_pattern , parsed_flags ) ;
if ( parsed_regex . error ! = regex : : Error : : NoError )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " RegExp compile error: {} " , Regex < ECMA262 > ( parsed_regex , parsed_pattern , parsed_flags ) . error_string ( ) ) , rule_start . position ( ) ) ;
2021-07-29 14:34:37 +00:00
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
SourceRange range { m_source_code , rule_start . position ( ) , position ( ) } ;
2022-12-06 01:12:49 +00:00
return create_ast_node < RegExpLiteral > ( move ( range ) , move ( parsed_regex ) , move ( parsed_pattern ) , move ( parsed_flags ) , pattern . to_deprecated_string ( ) , move ( flags ) ) ;
2020-06-03 23:05:49 +00:00
}
2022-11-30 00:47:25 +00:00
static bool is_simple_assignment_target ( Expression const & expression , bool allow_web_reality_call_expression = true )
{
return is < Identifier > ( expression ) | | is < MemberExpression > ( expression ) | | ( allow_web_reality_call_expression & & is < CallExpression > ( expression ) ) ;
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Expression const > Parser : : parse_unary_prefixed_expression ( )
2020-03-14 18:45:51 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2023-08-08 02:38:46 +00:00
auto precedence = g_operator_precedence . get_unary ( m_state . current_token . type ( ) ) ;
2021-06-19 12:43:09 +00:00
auto associativity = operator_associativity ( m_state . current_token . type ( ) ) ;
switch ( m_state . current_token . type ( ) ) {
2020-04-29 20:39:00 +00:00
case TokenType : : PlusPlus : {
consume ( ) ;
2020-11-02 21:03:19 +00:00
auto rhs_start = position ( ) ;
2020-04-29 20:39:00 +00:00
auto rhs = parse_expression ( precedence , associativity ) ;
2022-11-30 00:47:25 +00:00
if ( ! is_simple_assignment_target ( * rhs ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Right-hand side of prefix increment operator must be identifier or member expression, got {} " , rhs - > class_name ( ) ) , rhs_start ) ;
2021-07-11 23:27:35 +00:00
if ( m_state . strict_mode & & is < Identifier > ( * rhs ) ) {
2023-02-19 21:07:52 +00:00
auto & identifier = static_cast < Identifier const & > ( * rhs ) ;
2021-07-11 23:27:35 +00:00
auto & name = identifier . string ( ) ;
check_identifier_name_for_assignment_validity ( name ) ;
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UpdateExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UpdateOp : : Increment , move ( rhs ) , true ) ;
2020-04-29 20:39:00 +00:00
}
case TokenType : : MinusMinus : {
consume ( ) ;
2020-11-02 21:03:19 +00:00
auto rhs_start = position ( ) ;
2020-04-29 20:39:00 +00:00
auto rhs = parse_expression ( precedence , associativity ) ;
2022-11-30 00:47:25 +00:00
if ( ! is_simple_assignment_target ( * rhs ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Right-hand side of prefix decrement operator must be identifier or member expression, got {} " , rhs - > class_name ( ) ) , rhs_start ) ;
2021-07-11 23:27:35 +00:00
if ( m_state . strict_mode & & is < Identifier > ( * rhs ) ) {
2023-02-19 21:07:52 +00:00
auto & identifier = static_cast < Identifier const & > ( * rhs ) ;
2021-07-11 23:27:35 +00:00
auto & name = identifier . string ( ) ;
check_identifier_name_for_assignment_validity ( name ) ;
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UpdateExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UpdateOp : : Decrement , move ( rhs ) , true ) ;
2020-04-29 20:39:00 +00:00
}
2020-03-14 18:45:51 +00:00
case TokenType : : ExclamationMark :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UnaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UnaryOp : : Not , parse_expression ( precedence , associativity ) ) ;
2020-03-14 18:45:51 +00:00
case TokenType : : Tilde :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UnaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UnaryOp : : BitwiseNot , parse_expression ( precedence , associativity ) ) ;
2020-04-02 16:58:39 +00:00
case TokenType : : Plus :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UnaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UnaryOp : : Plus , parse_expression ( precedence , associativity ) ) ;
2020-04-02 16:58:39 +00:00
case TokenType : : Minus :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UnaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UnaryOp : : Minus , parse_expression ( precedence , associativity ) ) ;
2020-03-17 19:33:32 +00:00
case TokenType : : Typeof :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UnaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UnaryOp : : Typeof , parse_expression ( precedence , associativity ) ) ;
2020-04-15 16:55:03 +00:00
case TokenType : : Void :
consume ( ) ;
2021-12-19 01:28:18 +00:00
// FIXME: This check is really hiding the fact that we don't deal with different expressions correctly.
if ( match ( TokenType : : Yield ) & & m_state . in_generator_function_context )
syntax_error ( " 'yield' is not an identifier in generator function context " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UnaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UnaryOp : : Void , parse_expression ( precedence , associativity ) ) ;
2021-07-11 23:32:24 +00:00
case TokenType : : Delete : {
2020-04-26 11:53:40 +00:00
consume ( ) ;
2021-07-11 23:32:24 +00:00
auto rhs_start = position ( ) ;
auto rhs = parse_expression ( precedence , associativity ) ;
if ( is < Identifier > ( * rhs ) & & m_state . strict_mode ) {
syntax_error ( " Delete of an unqualified identifier in strict mode. " , rhs_start ) ;
}
2021-10-12 20:45:52 +00:00
if ( is < MemberExpression > ( * rhs ) ) {
auto & member_expression = static_cast < MemberExpression const & > ( * rhs ) ;
if ( member_expression . ends_in_private_name ( ) )
syntax_error ( " Private fields cannot be deleted " ) ;
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UnaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UnaryOp : : Delete , move ( rhs ) ) ;
2021-07-11 23:32:24 +00:00
}
2020-03-14 18:45:51 +00:00
default :
2020-10-22 22:30:07 +00:00
expected ( " primary expression " ) ;
2020-03-14 18:45:51 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ErrorExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2020-03-14 18:45:51 +00:00
}
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Expression const > Parser : : parse_property_key ( )
2020-06-08 18:31:21 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-06-08 18:31:21 +00:00
if ( match ( TokenType : : StringLiteral ) ) {
return parse_string_literal ( consume ( ) ) ;
} else if ( match ( TokenType : : NumericLiteral ) ) {
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < NumericLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume ( ) . double_value ( ) ) ;
2020-06-08 18:31:21 +00:00
} else if ( match ( TokenType : : BigIntLiteral ) ) {
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BigIntLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume ( ) . value ( ) ) ;
2020-06-08 18:31:21 +00:00
} else if ( match ( TokenType : : BracketOpen ) ) {
consume ( TokenType : : BracketOpen ) ;
2021-06-11 15:43:28 +00:00
auto result = parse_expression ( 2 ) ;
2020-06-08 18:31:21 +00:00
consume ( TokenType : : BracketClose ) ;
return result ;
} else {
if ( ! match_identifier_name ( ) )
expected ( " IdentifierName " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < StringLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume ( ) . value ( ) ) ;
2020-06-08 18:31:21 +00:00
}
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ObjectExpression const > Parser : : parse_object_expression ( )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-11 18:27:43 +00:00
consume ( TokenType : : CurlyOpen ) ;
2020-03-21 00:29:00 +00:00
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < ObjectProperty > > properties ;
2020-05-29 05:50:06 +00:00
ObjectProperty : : Type property_type ;
2021-07-11 10:18:30 +00:00
Optional < SourceRange > invalid_object_literal_property_range ;
2020-05-29 05:50:06 +00:00
auto skip_to_next_property = [ & ] {
while ( ! done ( ) & & ! match ( TokenType : : Comma ) & & ! match ( TokenType : : CurlyOpen ) )
consume ( ) ;
} ;
2020-05-22 00:28:28 +00:00
2021-07-11 23:32:01 +00:00
// It is a Syntax Error if PropertyNameList of PropertyDefinitionList contains any duplicate
// entries for "__proto__" and at least two of those entries were obtained from productions of
2021-10-24 14:01:24 +00:00
// the form PropertyDefinition : PropertyKey : AssignmentExpression .
2021-07-11 23:32:01 +00:00
bool has_direct_proto_property = false ;
2020-04-07 17:29:37 +00:00
while ( ! done ( ) & & ! match ( TokenType : : CurlyClose ) ) {
2020-05-29 05:50:06 +00:00
property_type = ObjectProperty : : Type : : KeyValue ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > property_key ;
RefPtr < Expression const > property_value ;
2022-01-14 23:30:02 +00:00
FunctionKind function_kind { FunctionKind : : Normal } ;
2020-04-28 04:52:47 +00:00
2020-05-29 05:50:06 +00:00
if ( match ( TokenType : : TripleDot ) ) {
consume ( ) ;
2023-08-29 20:38:42 +00:00
property_key = parse_expression ( 2 ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
properties . append ( create_ast_node < ObjectProperty > ( { m_source_code , rule_start . position ( ) , position ( ) } , * property_key , nullptr , ObjectProperty : : Type : : Spread , false ) ) ;
2020-05-29 05:50:06 +00:00
if ( ! match ( TokenType : : Comma ) )
break ;
consume ( TokenType : : Comma ) ;
continue ;
}
2021-07-11 23:32:01 +00:00
auto type = m_state . current_token . type ( ) ;
2022-01-18 23:46:16 +00:00
auto function_start = position ( ) ;
2021-07-11 23:32:01 +00:00
2021-11-15 00:53:24 +00:00
if ( match ( TokenType : : Async ) ) {
auto lookahead_token = next_token ( ) ;
2021-11-26 15:40:27 +00:00
if ( lookahead_token . type ( ) ! = TokenType : : ParenOpen & & lookahead_token . type ( ) ! = TokenType : : Colon
2021-11-26 22:36:43 +00:00
& & lookahead_token . type ( ) ! = TokenType : : Comma & & lookahead_token . type ( ) ! = TokenType : : CurlyClose
& & lookahead_token . type ( ) ! = TokenType : : Async
2021-11-26 15:40:27 +00:00
& & ! lookahead_token . trivia_contains_line_terminator ( ) ) {
2021-11-15 00:53:24 +00:00
consume ( TokenType : : Async ) ;
function_kind = FunctionKind : : Async ;
}
}
2021-06-14 10:22:59 +00:00
if ( match ( TokenType : : Asterisk ) ) {
consume ( ) ;
property_type = ObjectProperty : : Type : : KeyValue ;
2022-02-06 15:59:04 +00:00
property_key = parse_property_key ( ) ;
2022-01-14 23:30:02 +00:00
VERIFY ( function_kind = = FunctionKind : : Normal | | function_kind = = FunctionKind : : Async ) ;
function_kind = function_kind = = FunctionKind : : Normal ? FunctionKind : : Generator : FunctionKind : : AsyncGenerator ;
2021-07-28 23:45:55 +00:00
} else if ( match_identifier ( ) ) {
2021-08-28 15:04:37 +00:00
auto identifier = consume ( ) ;
2021-11-15 00:53:24 +00:00
if ( identifier . original_value ( ) = = " get " sv & & match_property_key ( ) ) {
2020-05-29 05:50:06 +00:00
property_type = ObjectProperty : : Type : : Getter ;
2022-02-06 15:59:04 +00:00
property_key = parse_property_key ( ) ;
2021-08-28 15:04:37 +00:00
} else if ( identifier . original_value ( ) = = " set " sv & & match_property_key ( ) ) {
2020-05-29 05:50:06 +00:00
property_type = ObjectProperty : : Type : : Setter ;
2022-02-06 15:59:04 +00:00
property_key = parse_property_key ( ) ;
2020-05-29 05:50:06 +00:00
} else {
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
property_key = create_ast_node < StringLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , identifier . value ( ) ) ;
2023-07-04 22:14:41 +00:00
property_value = create_identifier_and_register_in_current_scope ( { m_source_code , rule_start . position ( ) , position ( ) } , identifier . DeprecatedFlyString_value ( ) ) ;
2020-05-22 00:28:28 +00:00
}
2020-04-06 20:17:05 +00:00
} else {
2022-02-06 15:59:04 +00:00
property_key = parse_property_key ( ) ;
2020-05-29 05:50:06 +00:00
}
2022-03-05 21:44:49 +00:00
// 4. Else if propKey is the String value "__proto__" and if IsComputedPropertyKey of PropertyName is false, then
// a. Let isProtoSetter be true.
2022-02-06 15:59:04 +00:00
bool is_proto = ( type = = TokenType : : StringLiteral | | type = = TokenType : : Identifier ) & & is < StringLiteral > ( * property_key ) & & static_cast < StringLiteral const & > ( * property_key ) . value ( ) = = " __proto__ " ;
2021-07-11 23:32:01 +00:00
2020-05-29 05:50:06 +00:00
if ( property_type = = ObjectProperty : : Type : : Getter | | property_type = = ObjectProperty : : Type : : Setter ) {
if ( ! match ( TokenType : : ParenOpen ) ) {
2021-07-11 19:00:55 +00:00
expected ( " '(' for object getter or setter property " ) ;
2020-05-29 05:50:06 +00:00
skip_to_next_property ( ) ;
2020-05-22 00:28:28 +00:00
continue ;
}
2020-04-06 20:17:05 +00:00
}
2021-07-11 10:18:30 +00:00
if ( match ( TokenType : : Equals ) ) {
// Not a valid object literal, but a valid assignment target
consume ( ) ;
// Parse the expression and throw it away
auto expression = parse_expression ( 2 ) ;
if ( ! invalid_object_literal_property_range . has_value ( ) )
invalid_object_literal_property_range = expression - > source_range ( ) ;
} else if ( match ( TokenType : : ParenOpen ) ) {
2022-02-06 15:59:04 +00:00
VERIFY ( property_key ) ;
2020-10-20 17:32:51 +00:00
u8 parse_options = FunctionNodeParseOptions : : AllowSuperPropertyLookup ;
if ( property_type = = ObjectProperty : : Type : : Getter )
parse_options | = FunctionNodeParseOptions : : IsGetterFunction ;
if ( property_type = = ObjectProperty : : Type : : Setter )
parse_options | = FunctionNodeParseOptions : : IsSetterFunction ;
2021-11-15 00:53:24 +00:00
if ( function_kind = = FunctionKind : : Generator | | function_kind = = FunctionKind : : AsyncGenerator )
2021-06-14 10:22:59 +00:00
parse_options | = FunctionNodeParseOptions : : IsGeneratorFunction ;
2021-11-15 00:53:24 +00:00
if ( function_kind = = FunctionKind : : Async | | function_kind = = FunctionKind : : AsyncGenerator )
2021-11-09 18:39:22 +00:00
parse_options | = FunctionNodeParseOptions : : IsAsyncFunction ;
2022-01-18 23:46:16 +00:00
auto function = parse_function_node < FunctionExpression > ( parse_options , function_start ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
properties . append ( create_ast_node < ObjectProperty > ( { m_source_code , rule_start . position ( ) , position ( ) } , * property_key , function , property_type , true ) ) ;
2020-05-29 05:50:06 +00:00
} else if ( match ( TokenType : : Colon ) ) {
2022-02-06 15:59:04 +00:00
if ( ! property_key ) {
2021-07-11 19:00:55 +00:00
expected ( " a property name " ) ;
2020-06-01 14:08:34 +00:00
skip_to_next_property ( ) ;
continue ;
}
2020-05-29 05:50:06 +00:00
consume ( ) ;
2021-07-11 23:32:01 +00:00
if ( is_proto ) {
if ( has_direct_proto_property )
syntax_error ( " Property name '__proto__' must not appear more than once in object literal " ) ;
has_direct_proto_property = true ;
}
2022-03-05 21:44:49 +00:00
if ( is_proto & & property_type = = ObjectProperty : : Type : : KeyValue )
property_type = ObjectProperty : : Type : : ProtoSetter ;
2022-12-13 00:30:32 +00:00
auto rhs_expression = parse_expression ( 2 ) ;
bool is_method = is < FunctionExpression > ( * rhs_expression ) ;
properties . append ( create_ast_node < ObjectProperty > ( { m_source_code , rule_start . position ( ) , position ( ) } , * property_key , move ( rhs_expression ) , property_type , is_method ) ) ;
2022-02-06 15:59:04 +00:00
} else if ( property_key & & property_value ) {
if ( m_state . strict_mode & & is < StringLiteral > ( * property_key ) ) {
auto & string_literal = static_cast < StringLiteral const & > ( * property_key ) ;
2021-11-26 22:36:43 +00:00
if ( is_strict_reserved_word ( string_literal . value ( ) ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " '{}' is a reserved keyword " , string_literal . value ( ) ) ) ;
2021-11-26 22:36:43 +00:00
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
properties . append ( create_ast_node < ObjectProperty > ( { m_source_code , rule_start . position ( ) , position ( ) } , * property_key , * property_value , property_type , false ) ) ;
2020-06-01 14:08:34 +00:00
} else {
2021-07-11 19:00:55 +00:00
expected ( " a property " ) ;
2020-06-01 14:08:34 +00:00
skip_to_next_property ( ) ;
continue ;
2020-03-21 00:29:00 +00:00
}
if ( ! match ( TokenType : : Comma ) )
break ;
consume ( TokenType : : Comma ) ;
}
2020-03-11 18:27:43 +00:00
consume ( TokenType : : CurlyClose ) ;
2022-11-27 01:24:38 +00:00
if ( invalid_object_literal_property_range . has_value ( ) ) {
size_t object_expression_offset = rule_start . position ( ) . offset ;
VERIFY ( ! m_state . invalid_property_range_in_object_expression . contains ( object_expression_offset ) ) ;
m_state . invalid_property_range_in_object_expression . set ( object_expression_offset , invalid_object_literal_property_range - > start ) ;
}
2022-11-26 19:45:06 +00:00
properties . shrink_to_fit ( ) ;
2021-07-11 10:18:30 +00:00
return create_ast_node < ObjectExpression > (
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
2022-11-27 01:24:38 +00:00
move ( properties ) ) ;
2020-03-11 18:27:43 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ArrayExpression const > Parser : : parse_array_expression ( )
2020-03-20 19:29:57 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-20 19:29:57 +00:00
consume ( TokenType : : BracketOpen ) ;
2023-02-19 21:07:52 +00:00
Vector < RefPtr < Expression const > > elements ;
2020-04-27 06:05:37 +00:00
while ( match_expression ( ) | | match ( TokenType : : TripleDot ) | | match ( TokenType : : Comma ) ) {
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > expression ;
2020-04-27 06:05:37 +00:00
if ( match ( TokenType : : TripleDot ) ) {
consume ( TokenType : : TripleDot ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
expression = create_ast_node < SpreadExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , parse_expression ( 2 ) ) ;
2020-04-27 06:05:37 +00:00
} else if ( match_expression ( ) ) {
2020-05-11 16:27:31 +00:00
expression = parse_expression ( 2 ) ;
2020-04-27 06:05:37 +00:00
}
2020-04-15 19:09:06 +00:00
elements . append ( expression ) ;
2020-03-20 19:29:57 +00:00
if ( ! match ( TokenType : : Comma ) )
break ;
consume ( TokenType : : Comma ) ;
}
consume ( TokenType : : BracketClose ) ;
2022-11-26 19:45:06 +00:00
elements . shrink_to_fit ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ArrayExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( elements ) ) ;
2020-03-20 19:29:57 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < StringLiteral const > Parser : : parse_string_literal ( Token const & token , StringLiteralType string_literal_type , bool * contains_invalid_escape )
2020-05-17 06:27:25 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-05-17 06:27:25 +00:00
auto status = Token : : StringValueStatus : : Ok ;
auto string = token . string_value ( status ) ;
2022-08-17 00:04:27 +00:00
// NOTE: Tagged templates should not fail on invalid strings as their raw contents can still be accessed.
2020-05-17 06:27:25 +00:00
if ( status ! = Token : : StringValueStatus : : Ok ) {
2022-12-04 18:02:33 +00:00
DeprecatedString message ;
2020-10-24 12:30:57 +00:00
if ( status = = Token : : StringValueStatus : : LegacyOctalEscapeSequence ) {
2021-06-19 12:43:09 +00:00
m_state . string_legacy_octal_escape_sequence_in_scope = true ;
2022-08-17 00:04:27 +00:00
// It is a Syntax Error if the [Tagged] parameter was not set and Template{Head, Middle, Tail} Contains NotEscapeSequence.
if ( string_literal_type ! = StringLiteralType : : Normal )
2020-10-24 12:30:57 +00:00
message = " Octal escape sequence not allowed in template literal " ;
2021-06-19 12:43:09 +00:00
else if ( m_state . strict_mode )
2020-10-24 12:30:57 +00:00
message = " Octal escape sequence in string literal not allowed in strict mode " ;
} else if ( status = = Token : : StringValueStatus : : MalformedHexEscape | | status = = Token : : StringValueStatus : : MalformedUnicodeEscape ) {
2020-05-17 06:27:25 +00:00
auto type = status = = Token : : StringValueStatus : : MalformedUnicodeEscape ? " unicode " : " hexadecimal " ;
2022-12-04 18:02:33 +00:00
message = DeprecatedString : : formatted ( " Malformed {} escape sequence " , type ) ;
2020-05-17 06:27:25 +00:00
} else if ( status = = Token : : StringValueStatus : : UnicodeEscapeOverflow ) {
2020-08-05 20:31:20 +00:00
message = " Unicode code_point must not be greater than 0x10ffff in escape sequence " ;
2020-10-24 12:30:57 +00:00
} else {
2021-02-23 19:42:32 +00:00
VERIFY_NOT_REACHED ( ) ;
2020-05-17 06:27:25 +00:00
}
2022-08-17 00:04:27 +00:00
if ( ! message . is_empty ( ) ) {
if ( contains_invalid_escape ! = nullptr ) {
VERIFY ( string_literal_type = = StringLiteralType : : TaggedTemplate ) ;
* contains_invalid_escape = true ;
} else {
syntax_error ( message , Position { token . line_number ( ) , token . line_column ( ) } ) ;
}
}
2020-05-17 06:27:25 +00:00
}
2020-05-28 05:22:08 +00:00
2022-11-27 01:21:25 +00:00
return create_ast_node < StringLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , string ) ;
2020-05-17 06:27:25 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < TemplateLiteral const > Parser : : parse_template_literal ( bool is_tagged )
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
consume ( TokenType : : TemplateLiteralStart ) ;
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < Expression const > > expressions ;
Vector < NonnullRefPtr < Expression const > > raw_strings ;
2020-05-06 23:34:14 +00:00
2020-12-28 17:15:22 +00:00
auto append_empty_string = [ this , & rule_start , & expressions , & raw_strings , is_tagged ] ( ) {
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto string_literal = create_ast_node < StringLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , " " ) ;
2020-05-06 23:34:14 +00:00
expressions . append ( string_literal ) ;
if ( is_tagged )
raw_strings . append ( string_literal ) ;
} ;
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
2020-05-06 09:17:35 +00:00
if ( ! match ( TokenType : : TemplateLiteralString ) )
2020-05-06 23:34:14 +00:00
append_empty_string ( ) ;
2020-05-06 09:17:35 +00:00
2020-06-03 22:36:25 +00:00
while ( ! done ( ) & & ! match ( TokenType : : TemplateLiteralEnd ) & & ! match ( TokenType : : UnterminatedTemplateLiteral ) ) {
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
if ( match ( TokenType : : TemplateLiteralString ) ) {
2020-05-06 23:34:14 +00:00
auto token = consume ( ) ;
2022-08-17 00:04:27 +00:00
bool contains_invalid_escape = false ;
auto parsed_string_value = parse_string_literal ( token ,
is_tagged ? StringLiteralType : : TaggedTemplate : StringLiteralType : : NonTaggedTemplate ,
is_tagged ? & contains_invalid_escape : nullptr ) ;
// An invalid string leads to a cooked value of `undefined` but still gives the raw string.
if ( contains_invalid_escape )
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
expressions . append ( create_ast_node < NullLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ) ;
2022-08-17 00:04:27 +00:00
else
expressions . append ( move ( parsed_string_value ) ) ;
2020-05-06 23:34:14 +00:00
if ( is_tagged )
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
raw_strings . append ( create_ast_node < StringLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , token . raw_template_value ( ) ) ) ;
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
} else if ( match ( TokenType : : TemplateLiteralExprStart ) ) {
consume ( TokenType : : TemplateLiteralExprStart ) ;
if ( match ( TokenType : : TemplateLiteralExprEnd ) ) {
syntax_error ( " Empty template literal expression block " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < TemplateLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , expressions ) ;
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
}
expressions . append ( parse_expression ( 0 ) ) ;
if ( match ( TokenType : : UnterminatedTemplateLiteral ) ) {
syntax_error ( " Unterminated template literal " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < TemplateLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , expressions ) ;
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
}
consume ( TokenType : : TemplateLiteralExprEnd ) ;
2020-05-06 09:17:35 +00:00
if ( ! match ( TokenType : : TemplateLiteralString ) )
2020-05-06 23:34:14 +00:00
append_empty_string ( ) ;
2020-06-03 22:36:25 +00:00
} else {
expected ( " Template literal string or expression " ) ;
break ;
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
}
}
if ( match ( TokenType : : UnterminatedTemplateLiteral ) ) {
syntax_error ( " Unterminated template literal " ) ;
} else {
consume ( TokenType : : TemplateLiteralEnd ) ;
}
2020-05-06 23:34:14 +00:00
if ( is_tagged )
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < TemplateLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , expressions , raw_strings ) ;
return create_ast_node < TemplateLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , expressions ) ;
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Expression const > Parser : : parse_expression ( int min_precedence , Associativity associativity , ForbiddenTokens forbidden )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-06-14 11:16:41 +00:00
auto [ expression , should_continue_parsing ] = parse_primary_expression ( ) ;
2021-07-11 10:18:30 +00:00
auto check_for_invalid_object_property = [ & ] ( auto & expression ) {
if ( is < ObjectExpression > ( * expression ) ) {
2022-11-27 01:24:38 +00:00
if ( auto start_offset = m_state . invalid_property_range_in_object_expression . get ( expression - > start_offset ( ) ) ; start_offset . has_value ( ) )
syntax_error ( " Invalid property in object literal " , start_offset . value ( ) ) ;
2021-07-11 10:18:30 +00:00
}
} ;
2021-10-07 16:43:22 +00:00
if ( is < Identifier > ( * expression ) & & m_state . current_scope_pusher ) {
2023-02-19 21:07:52 +00:00
auto identifier_instance = static_ptr_cast < Identifier const > ( expression ) ;
2021-10-07 16:43:22 +00:00
auto function_scope = m_state . current_scope_pusher - > last_function_scope ( ) ;
auto function_parent_scope = function_scope ? function_scope - > parent_scope ( ) : nullptr ;
bool has_not_been_declared_as_variable = true ;
for ( auto scope = m_state . current_scope_pusher ; scope ! = function_parent_scope ; scope = scope - > parent_scope ( ) ) {
if ( scope - > has_declaration ( identifier_instance - > string ( ) ) ) {
has_not_been_declared_as_variable = false ;
break ;
}
}
if ( has_not_been_declared_as_variable ) {
if ( identifier_instance - > string ( ) = = " arguments " sv )
m_state . current_scope_pusher - > set_contains_access_to_arguments_object ( ) ;
}
}
2020-05-06 09:17:35 +00:00
while ( match ( TokenType : : TemplateLiteralStart ) ) {
2020-05-06 23:34:14 +00:00
auto template_literal = parse_template_literal ( true ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
expression = create_ast_node < TaggedTemplateLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( expression ) , move ( template_literal ) ) ;
2020-05-06 09:17:35 +00:00
}
2021-06-14 11:16:41 +00:00
if ( should_continue_parsing ) {
2023-04-01 19:44:32 +00:00
auto original_forbidden = forbidden ;
2021-06-14 11:16:41 +00:00
while ( match_secondary_expression ( forbidden ) ) {
2021-06-19 12:43:09 +00:00
int new_precedence = g_operator_precedence . get ( m_state . current_token . type ( ) ) ;
2021-06-14 11:16:41 +00:00
if ( new_precedence < min_precedence )
break ;
if ( new_precedence = = min_precedence & & associativity = = Associativity : : Left )
break ;
2021-07-11 10:18:30 +00:00
check_for_invalid_object_property ( expression ) ;
2020-03-12 22:02:41 +00:00
2021-06-19 12:43:09 +00:00
Associativity new_associativity = operator_associativity ( m_state . current_token . type ( ) ) ;
2023-04-01 19:44:32 +00:00
auto result = parse_secondary_expression ( move ( expression ) , new_precedence , new_associativity , original_forbidden ) ;
2022-02-16 06:34:59 +00:00
expression = result . expression ;
forbidden = forbidden . merge ( result . forbidden ) ;
2021-08-14 15:02:15 +00:00
while ( match ( TokenType : : TemplateLiteralStart ) & & ! is < UpdateExpression > ( * expression ) ) {
2021-06-14 11:16:41 +00:00
auto template_literal = parse_template_literal ( true ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
expression = create_ast_node < TaggedTemplateLiteral > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( expression ) , move ( template_literal ) ) ;
2021-06-14 11:16:41 +00:00
}
2020-05-06 09:17:35 +00:00
}
2020-03-11 18:27:43 +00:00
}
2021-07-11 10:18:30 +00:00
2021-08-14 15:08:09 +00:00
if ( is < SuperExpression > ( * expression ) )
syntax_error ( " 'super' keyword unexpected here " ) ;
2021-07-11 10:18:30 +00:00
check_for_invalid_object_property ( expression ) ;
2021-10-07 16:43:22 +00:00
if ( is < CallExpression > ( * expression ) & & m_state . current_scope_pusher ) {
2023-02-19 21:07:52 +00:00
auto & callee = static_ptr_cast < CallExpression const > ( expression ) - > callee ( ) ;
2023-07-21 06:17:01 +00:00
if ( is < Identifier > ( callee ) & & static_cast < Identifier const & > ( callee ) . string ( ) = = " eval " sv ) {
m_state . current_scope_pusher - > set_contains_direct_call_to_eval ( ) ;
2021-10-07 16:43:22 +00:00
}
}
2020-05-11 16:27:31 +00:00
if ( match ( TokenType : : Comma ) & & min_precedence < = 1 ) {
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < Expression const > > expressions ;
2020-05-11 16:27:31 +00:00
expressions . append ( expression ) ;
while ( match ( TokenType : : Comma ) ) {
consume ( ) ;
expressions . append ( parse_expression ( 2 ) ) ;
}
2022-11-26 19:45:06 +00:00
expressions . shrink_to_fit ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
expression = create_ast_node < SequenceExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( expressions ) ) ;
2020-05-11 16:27:31 +00:00
}
2020-03-11 18:27:43 +00:00
return expression ;
}
2023-02-19 21:07:52 +00:00
Parser : : ExpressionResult Parser : : parse_secondary_expression ( NonnullRefPtr < Expression const > lhs , int min_precedence , Associativity associativity , ForbiddenTokens forbidden )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-06-19 12:43:09 +00:00
switch ( m_state . current_token . type ( ) ) {
2020-03-11 18:27:43 +00:00
case TokenType : : Plus :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : Addition , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:09:15 +00:00
case TokenType : : PlusEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : AdditionAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-03-11 18:27:43 +00:00
case TokenType : : Minus :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : Subtraction , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:09:15 +00:00
case TokenType : : MinusEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : SubtractionAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-03-12 12:04:52 +00:00
case TokenType : : Asterisk :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : Multiplication , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:09:15 +00:00
case TokenType : : AsteriskEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : MultiplicationAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-03-12 12:04:52 +00:00
case TokenType : : Slash :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : Division , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:09:15 +00:00
case TokenType : : SlashEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : DivisionAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-04-04 19:17:34 +00:00
case TokenType : : Percent :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : Modulo , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-05-04 22:07:05 +00:00
case TokenType : : PercentEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : ModuloAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-04-12 22:42:14 +00:00
case TokenType : : DoubleAsterisk :
2020-04-05 12:40:00 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : Exponentiation , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-05-04 22:03:35 +00:00
case TokenType : : DoubleAsteriskEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : ExponentiationAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-03-12 12:10:27 +00:00
case TokenType : : GreaterThan :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : GreaterThan , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:07:08 +00:00
case TokenType : : GreaterThanEquals :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : GreaterThanEquals , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:10:27 +00:00
case TokenType : : LessThan :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : LessThan , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:07:08 +00:00
case TokenType : : LessThanEquals :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : LessThanEquals , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:11:33 +00:00
case TokenType : : EqualsEqualsEquals :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : StrictlyEquals , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-12 12:11:33 +00:00
case TokenType : : ExclamationMarkEqualsEquals :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : StrictlyInequals , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-15 22:23:38 +00:00
case TokenType : : EqualsEquals :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : LooselyEquals , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-03-15 22:23:38 +00:00
case TokenType : : ExclamationMarkEquals :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : LooselyInequals , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-04-23 15:06:01 +00:00
case TokenType : : In :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : In , move ( lhs ) , parse_expression ( min_precedence , associativity ) ) ;
2020-03-28 15:56:54 +00:00
case TokenType : : Instanceof :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : InstanceOf , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-04-03 12:02:31 +00:00
case TokenType : : Ampersand :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : BitwiseAnd , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-05-04 21:34:45 +00:00
case TokenType : : AmpersandEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : BitwiseAndAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-04-03 12:02:31 +00:00
case TokenType : : Pipe :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : BitwiseOr , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-05-04 21:34:45 +00:00
case TokenType : : PipeEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : BitwiseOrAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-04-03 12:02:31 +00:00
case TokenType : : Caret :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : BitwiseXor , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-05-04 21:34:45 +00:00
case TokenType : : CaretEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : BitwiseXorAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-04-23 12:36:14 +00:00
case TokenType : : ShiftLeft :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : LeftShift , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-04-23 12:36:14 +00:00
case TokenType : : ShiftLeftEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : LeftShiftAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-04-23 12:45:19 +00:00
case TokenType : : ShiftRight :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : RightShift , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-04-23 12:45:19 +00:00
case TokenType : : ShiftRightEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : RightShiftAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-04-23 14:43:10 +00:00
case TokenType : : UnsignedShiftRight :
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BinaryExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , BinaryOp : : UnsignedRightShift , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden ) ) ;
2020-04-23 14:43:10 +00:00
case TokenType : : UnsignedShiftRightEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : UnsignedRightShiftAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-03-11 18:27:43 +00:00
case TokenType : : ParenOpen :
return parse_call_expression ( move ( lhs ) ) ;
case TokenType : : Equals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : Assignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-03-12 12:05:57 +00:00
case TokenType : : Period :
consume ( ) ;
2021-10-12 20:45:52 +00:00
if ( match ( TokenType : : PrivateIdentifier ) ) {
if ( ! is_private_identifier_valid ( ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Reference to undeclared private field or method '{}' " , m_state . current_token . value ( ) ) ) ;
2021-10-12 20:45:52 +00:00
else if ( is < SuperExpression > ( * lhs ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Cannot access private field or method '{}' on super " , m_state . current_token . value ( ) ) ) ;
2021-10-12 20:45:52 +00:00
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < MemberExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( lhs ) , create_ast_node < PrivateIdentifier > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume ( ) . value ( ) ) ) ;
2021-10-12 20:45:52 +00:00
} else if ( ! match_identifier_name ( ) ) {
2020-04-18 18:31:27 +00:00
expected ( " IdentifierName " ) ;
2021-10-12 20:45:52 +00:00
}
2023-05-27 22:08:52 +00:00
return create_ast_node < MemberExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( lhs ) , create_ast_node < Identifier > ( { m_source_code , rule_start . position ( ) , position ( ) } , consume_and_allow_division ( ) . DeprecatedFlyString_value ( ) ) ) ;
2020-03-20 19:51:03 +00:00
case TokenType : : BracketOpen : {
consume ( TokenType : : BracketOpen ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto expression = create_ast_node < MemberExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( lhs ) , parse_expression ( 0 ) , true ) ;
2020-03-20 19:51:03 +00:00
consume ( TokenType : : BracketClose ) ;
return expression ;
}
2020-03-12 11:45:45 +00:00
case TokenType : : PlusPlus :
2022-11-30 00:47:25 +00:00
if ( ! is_simple_assignment_target ( * lhs ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Left-hand side of postfix increment operator must be identifier or member expression, got {} " , lhs - > class_name ( ) ) ) ;
2021-07-11 23:27:35 +00:00
if ( m_state . strict_mode & & is < Identifier > ( * lhs ) ) {
2023-02-19 21:07:52 +00:00
auto & identifier = static_cast < Identifier const & > ( * lhs ) ;
2021-07-11 23:27:35 +00:00
auto & name = identifier . string ( ) ;
check_identifier_name_for_assignment_validity ( name ) ;
}
2020-03-12 11:45:45 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UpdateExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UpdateOp : : Increment , move ( lhs ) ) ;
2020-03-12 11:45:45 +00:00
case TokenType : : MinusMinus :
2022-11-30 00:47:25 +00:00
if ( ! is_simple_assignment_target ( * lhs ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Left-hand side of postfix increment operator must be identifier or member expression, got {} " , lhs - > class_name ( ) ) ) ;
2021-07-11 23:27:35 +00:00
if ( m_state . strict_mode & & is < Identifier > ( * lhs ) ) {
2023-02-19 21:07:52 +00:00
auto & identifier = static_cast < Identifier const & > ( * lhs ) ;
2021-07-11 23:27:35 +00:00
auto & name = identifier . string ( ) ;
check_identifier_name_for_assignment_validity ( name ) ;
}
2020-03-12 11:45:45 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < UpdateExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , UpdateOp : : Decrement , move ( lhs ) ) ;
2022-02-16 06:34:59 +00:00
case TokenType : : DoubleAmpersand : {
2020-03-15 21:35:22 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto expression = create_ast_node < LogicalExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , LogicalOp : : And , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden . forbid ( { TokenType : : DoubleQuestionMark } ) ) ) ;
2022-02-16 06:34:59 +00:00
return { expression , { TokenType : : DoubleQuestionMark } } ;
}
2020-10-05 15:49:43 +00:00
case TokenType : : DoubleAmpersandEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : AndAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2022-02-16 06:34:59 +00:00
case TokenType : : DoublePipe : {
2020-03-15 21:35:22 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto expression = create_ast_node < LogicalExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , LogicalOp : : Or , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden . forbid ( { TokenType : : DoubleQuestionMark } ) ) ) ;
2022-02-16 06:34:59 +00:00
return { expression , { TokenType : : DoubleQuestionMark } } ;
}
2020-10-05 15:49:43 +00:00
case TokenType : : DoublePipeEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : OrAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2022-02-16 06:34:59 +00:00
case TokenType : : DoubleQuestionMark : {
2020-04-17 23:49:11 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto expression = create_ast_node < LogicalExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , LogicalOp : : NullishCoalescing , move ( lhs ) , parse_expression ( min_precedence , associativity , forbidden . forbid ( { TokenType : : DoubleAmpersand , TokenType : : DoublePipe } ) ) ) ;
2022-02-16 06:34:59 +00:00
return { expression , { TokenType : : DoubleAmpersand , TokenType : : DoublePipe } } ;
}
2020-10-05 15:49:43 +00:00
case TokenType : : DoubleQuestionMarkEquals :
2022-02-15 10:11:56 +00:00
return parse_assignment_expression ( AssignmentOp : : NullishAssignment , move ( lhs ) , min_precedence , associativity , forbidden ) ;
2020-04-03 10:14:28 +00:00
case TokenType : : QuestionMark :
2022-02-15 10:11:56 +00:00
return parse_conditional_expression ( move ( lhs ) , forbidden ) ;
2023-06-15 09:43:48 +00:00
case TokenType : : QuestionMarkPeriod : {
auto const * lhs_expression = lhs . ptr ( ) ;
if ( is < NewExpression > ( lhs_expression ) ) {
auto const & new_expression = static_cast < NewExpression const & > ( * lhs_expression ) ;
if ( ! new_expression . is_parenthesized ( ) & & ! new_expression . is_inside_parens ( ) ) {
syntax_error ( " 'new' cannot be used with optional chaining " , position ( ) ) ;
consume ( ) ;
return lhs ;
}
2021-09-14 02:26:31 +00:00
}
return parse_optional_chain ( move ( lhs ) ) ;
2023-06-15 09:43:48 +00:00
}
2020-03-11 18:27:43 +00:00
default :
2020-10-22 22:30:07 +00:00
expected ( " secondary expression " ) ;
2020-03-11 18:27:43 +00:00
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ErrorExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2020-03-11 18:27:43 +00:00
}
}
2021-10-12 20:45:52 +00:00
bool Parser : : is_private_identifier_valid ( ) const
{
VERIFY ( match ( TokenType : : PrivateIdentifier ) ) ;
if ( ! m_state . referenced_private_names )
return false ;
// We might not have hit the declaration yet so class will check this in the end
m_state . referenced_private_names - > set ( m_state . current_token . value ( ) ) ;
return true ;
}
2023-02-19 21:07:52 +00:00
RefPtr < BindingPattern const > Parser : : synthesize_binding_pattern ( Expression const & expression )
2021-09-17 23:11:32 +00:00
{
VERIFY ( is < ArrayExpression > ( expression ) | | is < ObjectExpression > ( expression ) ) ;
// Clear any syntax error that has occurred in the range that 'expression' spans.
m_state . errors . remove_all_matching ( [ range = expression . source_range ( ) ] ( auto const & error ) {
return error . position . has_value ( ) & & range . contains ( * error . position ) ;
} ) ;
// Make a parser and parse the source for this expression as a binding pattern.
2022-01-18 23:44:33 +00:00
// NOTE: There's currently a fundamental problem that we pass the *next* (a.k.a. `current_token`)
// token's position to most nodes' SourceRange when using `rule_start.position(), position()`.
// This means that `source` will contain the subsequent token's trivia, if any (which is fine).
auto source_start_offset = expression . source_range ( ) . start . offset ;
auto source_end_offset = expression . source_range ( ) . end . offset ;
auto source = m_state . lexer . source ( ) . substring_view ( source_start_offset , source_end_offset - source_start_offset ) ;
2021-09-17 23:11:32 +00:00
Lexer lexer { source , m_state . lexer . filename ( ) , expression . source_range ( ) . start . line , expression . source_range ( ) . start . column } ;
Parser parser { lexer } ;
2023-07-04 22:14:41 +00:00
parser . m_state . current_scope_pusher = m_state . current_scope_pusher ;
2021-09-17 23:11:32 +00:00
parser . m_state . strict_mode = m_state . strict_mode ;
parser . m_state . allow_super_property_lookup = m_state . allow_super_property_lookup ;
parser . m_state . allow_super_constructor_call = m_state . allow_super_constructor_call ;
parser . m_state . in_function_context = m_state . in_function_context ;
parser . m_state . in_formal_parameter_context = m_state . in_formal_parameter_context ;
parser . m_state . in_generator_function_context = m_state . in_generator_function_context ;
2021-11-26 22:50:32 +00:00
parser . m_state . await_expression_is_valid = m_state . await_expression_is_valid ;
2021-09-17 23:11:32 +00:00
parser . m_state . in_arrow_function_context = m_state . in_arrow_function_context ;
parser . m_state . in_break_context = m_state . in_break_context ;
parser . m_state . in_continue_context = m_state . in_continue_context ;
parser . m_state . string_legacy_octal_escape_sequence_in_scope = m_state . string_legacy_octal_escape_sequence_in_scope ;
parser . m_state . in_class_field_initializer = m_state . in_class_field_initializer ;
2021-11-09 20:52:21 +00:00
parser . m_state . in_class_static_init_block = m_state . in_class_static_init_block ;
2021-11-26 20:10:24 +00:00
parser . m_state . referenced_private_names = m_state . referenced_private_names ;
2021-09-17 23:11:32 +00:00
auto result = parser . parse_binding_pattern ( AllowDuplicates : : Yes , AllowMemberExpressions : : Yes ) ;
if ( parser . has_errors ( ) )
m_state . errors . extend ( parser . errors ( ) ) ;
return result ;
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < AssignmentExpression const > Parser : : parse_assignment_expression ( AssignmentOp assignment_op , NonnullRefPtr < Expression const > lhs , int min_precedence , Associativity associativity , ForbiddenTokens forbidden )
2020-10-04 22:58:57 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-02-23 19:42:32 +00:00
VERIFY ( match ( TokenType : : Equals )
2020-10-04 22:58:57 +00:00
| | match ( TokenType : : PlusEquals )
| | match ( TokenType : : MinusEquals )
| | match ( TokenType : : AsteriskEquals )
| | match ( TokenType : : SlashEquals )
| | match ( TokenType : : PercentEquals )
| | match ( TokenType : : DoubleAsteriskEquals )
| | match ( TokenType : : AmpersandEquals )
| | match ( TokenType : : PipeEquals )
| | match ( TokenType : : CaretEquals )
| | match ( TokenType : : ShiftLeftEquals )
| | match ( TokenType : : ShiftRightEquals )
2020-10-05 15:49:43 +00:00
| | match ( TokenType : : UnsignedShiftRightEquals )
| | match ( TokenType : : DoubleAmpersandEquals )
| | match ( TokenType : : DoublePipeEquals )
| | match ( TokenType : : DoubleQuestionMarkEquals ) ) ;
2020-10-04 22:58:57 +00:00
consume ( ) ;
2021-07-10 20:46:17 +00:00
if ( assignment_op = = AssignmentOp : : Assignment ) {
if ( is < ArrayExpression > ( * lhs ) | | is < ObjectExpression > ( * lhs ) ) {
auto binding_pattern = synthesize_binding_pattern ( * lhs ) ;
if ( binding_pattern ) {
auto rhs = parse_expression ( min_precedence , associativity ) ;
return create_ast_node < AssignmentExpression > (
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
2021-07-10 20:46:17 +00:00
assignment_op ,
binding_pattern . release_nonnull ( ) ,
move ( rhs ) ) ;
}
}
}
2022-11-30 00:47:25 +00:00
// Note: The web reality is that all but &&=, ||= and ??= do allow left hand side CallExpresions.
// These are the exception as they are newer.
auto has_web_reality_assignment_target_exceptions = assignment_op ! = AssignmentOp : : AndAssignment
& & assignment_op ! = AssignmentOp : : OrAssignment
& & assignment_op ! = AssignmentOp : : NullishAssignment ;
if ( ! is_simple_assignment_target ( * lhs , has_web_reality_assignment_target_exceptions ) ) {
2020-10-04 22:58:57 +00:00
syntax_error ( " Invalid left-hand side in assignment " ) ;
2021-06-19 12:43:09 +00:00
} else if ( m_state . strict_mode & & is < Identifier > ( * lhs ) ) {
2022-02-13 12:34:26 +00:00
auto const & name = static_cast < Identifier const & > ( * lhs ) . string ( ) ;
2021-07-11 23:27:35 +00:00
check_identifier_name_for_assignment_validity ( name ) ;
2020-10-04 22:58:57 +00:00
}
2022-02-15 10:11:56 +00:00
auto rhs = parse_expression ( min_precedence , associativity , forbidden ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < AssignmentExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , assignment_op , move ( lhs ) , move ( rhs ) ) ;
2020-10-04 22:58:57 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Identifier const > Parser : : parse_identifier ( )
2021-06-13 01:04:28 +00:00
{
auto identifier_start = position ( ) ;
2021-07-11 11:04:55 +00:00
auto token = consume_identifier ( ) ;
2021-08-28 15:11:05 +00:00
if ( m_state . in_class_field_initializer & & token . value ( ) = = " arguments " sv )
syntax_error ( " 'arguments' is not allowed in class field initializer " ) ;
2023-07-04 22:14:41 +00:00
return create_identifier_and_register_in_current_scope ( { m_source_code , identifier_start , position ( ) } , token . DeprecatedFlyString_value ( ) ) ;
2021-06-13 01:04:28 +00:00
}
2021-09-14 02:26:31 +00:00
Vector < CallExpression : : Argument > Parser : : parse_arguments ( )
2020-03-11 18:27:43 +00:00
{
2020-05-06 05:36:24 +00:00
Vector < CallExpression : : Argument > arguments ;
2020-03-12 18:35:23 +00:00
2021-09-14 02:26:31 +00:00
consume ( TokenType : : ParenOpen ) ;
2020-05-06 05:36:24 +00:00
while ( match_expression ( ) | | match ( TokenType : : TripleDot ) ) {
if ( match ( TokenType : : TripleDot ) ) {
consume ( ) ;
2020-05-11 16:27:31 +00:00
arguments . append ( { parse_expression ( 2 ) , true } ) ;
2020-05-06 05:36:24 +00:00
} else {
2020-05-11 16:27:31 +00:00
arguments . append ( { parse_expression ( 2 ) , false } ) ;
2020-05-06 05:36:24 +00:00
}
2020-03-12 19:03:12 +00:00
if ( ! match ( TokenType : : Comma ) )
break ;
consume ( ) ;
2020-03-12 18:35:23 +00:00
}
2020-03-11 18:27:43 +00:00
consume ( TokenType : : ParenClose ) ;
2021-09-14 02:26:31 +00:00
return arguments ;
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Expression const > Parser : : parse_call_expression ( NonnullRefPtr < Expression const > lhs )
2021-09-14 02:26:31 +00:00
{
auto rule_start = push_start ( ) ;
if ( ! m_state . allow_super_constructor_call & & is < SuperExpression > ( * lhs ) )
syntax_error ( " 'super' keyword unexpected here " ) ;
auto arguments = parse_arguments ( ) ;
2020-03-11 18:27:43 +00:00
2021-07-02 17:30:38 +00:00
if ( is < SuperExpression > ( * lhs ) )
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < SuperCall > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( arguments ) ) ;
2021-07-02 17:30:38 +00:00
2023-06-15 09:43:48 +00:00
return CallExpression : : create ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( lhs ) , arguments . span ( ) , InvocationStyleEnum : : Parenthesized , InsideParenthesesEnum : : NotInsideParentheses ) ;
2020-03-11 18:27:43 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < NewExpression const > Parser : : parse_new_expression ( )
2020-03-28 15:33:52 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-28 15:33:52 +00:00
consume ( TokenType : : New ) ;
2021-09-14 02:26:31 +00:00
auto callee = parse_expression ( g_operator_precedence . get ( TokenType : : New ) , Associativity : : Right , { TokenType : : ParenOpen , TokenType : : QuestionMarkPeriod } ) ;
2021-11-26 22:45:10 +00:00
if ( is < ImportCall > ( * callee ) )
syntax_error ( " Cannot call new on dynamic import " , callee - > source_range ( ) . start ) ;
2020-03-28 15:33:52 +00:00
2020-05-06 05:36:24 +00:00
Vector < CallExpression : : Argument > arguments ;
2020-03-28 15:33:52 +00:00
2023-06-15 09:43:48 +00:00
auto is_parenthesized = match ( TokenType : : ParenOpen ) ;
if ( is_parenthesized ) {
consume ( ) ;
2020-05-06 05:36:24 +00:00
while ( match_expression ( ) | | match ( TokenType : : TripleDot ) ) {
if ( match ( TokenType : : TripleDot ) ) {
consume ( ) ;
2020-05-11 16:27:31 +00:00
arguments . append ( { parse_expression ( 2 ) , true } ) ;
2020-05-06 05:36:24 +00:00
} else {
2020-05-11 16:27:31 +00:00
arguments . append ( { parse_expression ( 2 ) , false } ) ;
2020-05-06 05:36:24 +00:00
}
2020-03-28 15:33:52 +00:00
if ( ! match ( TokenType : : Comma ) )
break ;
consume ( ) ;
}
consume ( TokenType : : ParenClose ) ;
}
2023-06-15 09:43:48 +00:00
InvocationStyleEnum invocation_style = is_parenthesized ? InvocationStyleEnum : : Parenthesized : InvocationStyleEnum : : NotParenthesized ;
return NewExpression : : create ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( callee ) , move ( arguments ) , invocation_style , InsideParenthesesEnum : : NotInsideParentheses ) ;
2020-03-28 15:33:52 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < YieldExpression const > Parser : : parse_yield_expression ( )
2021-06-10 21:08:30 +00:00
{
auto rule_start = push_start ( ) ;
2021-08-21 09:31:36 +00:00
if ( m_state . in_formal_parameter_context )
syntax_error ( " 'Yield' expression is not allowed in formal parameters of generator function " ) ;
2021-06-10 21:08:30 +00:00
consume ( TokenType : : Yield ) ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > argument ;
2021-06-14 11:16:41 +00:00
bool yield_from = false ;
2021-06-19 12:43:09 +00:00
if ( ! m_state . current_token . trivia_contains_line_terminator ( ) ) {
2021-06-14 11:16:41 +00:00
if ( match ( TokenType : : Asterisk ) ) {
consume ( ) ;
yield_from = true ;
}
2021-07-29 00:03:38 +00:00
if ( yield_from | | match_expression ( ) | | match ( TokenType : : Class ) )
2021-11-26 22:38:36 +00:00
argument = parse_expression ( 2 ) ;
2021-06-14 11:16:41 +00:00
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < YieldExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( argument ) , yield_from ) ;
2021-06-10 21:08:30 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < AwaitExpression const > Parser : : parse_await_expression ( )
2021-11-09 20:52:21 +00:00
{
auto rule_start = push_start ( ) ;
if ( m_state . in_formal_parameter_context )
syntax_error ( " 'Await' expression is not allowed in formal parameters of an async function " ) ;
consume ( TokenType : : Await ) ;
auto precedence = g_operator_precedence . get ( TokenType : : Await ) ;
auto associativity = operator_associativity ( TokenType : : Await ) ;
auto argument = parse_expression ( precedence , associativity ) ;
2022-01-18 17:55:19 +00:00
m_state . current_scope_pusher - > set_contains_await_expression ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < AwaitExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( argument ) ) ;
2021-11-09 20:52:21 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ReturnStatement const > Parser : : parse_return_statement ( )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-06-19 12:43:09 +00:00
if ( ! m_state . in_function_context & & ! m_state . in_arrow_function_context )
2020-10-07 18:33:48 +00:00
syntax_error ( " 'return' not allowed outside of a function " ) ;
2020-03-11 18:27:43 +00:00
consume ( TokenType : : Return ) ;
2020-04-17 13:05:58 +00:00
// Automatic semicolon insertion: terminate statement when return is followed by newline
2021-06-19 12:43:09 +00:00
if ( m_state . current_token . trivia_contains_line_terminator ( ) )
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ReturnStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , nullptr ) ;
2020-04-17 13:05:58 +00:00
2020-03-11 18:27:43 +00:00
if ( match_expression ( ) ) {
2020-04-18 12:00:43 +00:00
auto expression = parse_expression ( 0 ) ;
consume_or_insert_semicolon ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ReturnStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( expression ) ) ;
2020-03-11 18:27:43 +00:00
}
2020-04-18 12:00:43 +00:00
2020-04-17 13:05:58 +00:00
consume_or_insert_semicolon ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ReturnStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , nullptr ) ;
2020-03-11 18:27:43 +00:00
}
2021-09-22 10:44:56 +00:00
void Parser : : parse_statement_list ( ScopeNode & output_node , AllowLabelledFunction allow_labelled_functions )
2020-10-04 00:02:43 +00:00
{
2021-09-22 10:44:56 +00:00
while ( ! done ( ) ) {
2022-12-20 21:09:57 +00:00
if ( match_declaration ( AllowUsingDeclaration : : Yes ) ) {
2021-09-22 10:44:56 +00:00
auto declaration = parse_declaration ( ) ;
VERIFY ( m_state . current_scope_pusher ) ;
m_state . current_scope_pusher - > add_declaration ( declaration ) ;
output_node . append ( move ( declaration ) ) ;
} else if ( match_statement ( ) ) {
output_node . append ( parse_statement ( allow_labelled_functions ) ) ;
} else {
break ;
}
}
2022-11-26 19:45:06 +00:00
output_node . shrink_to_fit ( ) ;
2020-10-04 00:02:43 +00:00
}
2021-09-22 10:44:56 +00:00
// FunctionBody, https://tc39.es/ecma262/#prod-FunctionBody
2023-02-19 21:07:52 +00:00
NonnullRefPtr < FunctionBody const > Parser : : parse_function_body ( Vector < FunctionParameter > const & parameters , FunctionKind function_kind , bool & contains_direct_call_to_eval )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto function_body = create_ast_node < FunctionBody > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2023-07-07 21:14:03 +00:00
VERIFY ( m_state . current_scope_pusher - > type ( ) = = ScopePusher : : ScopeType : : Function ) ;
m_state . current_scope_pusher - > set_scope_node ( function_body ) ;
m_state . current_scope_pusher - > set_function_parameters ( parameters ) ;
2021-09-22 10:44:56 +00:00
auto has_use_strict = parse_directive ( function_body ) ;
bool previous_strict_mode = m_state . strict_mode ;
if ( has_use_strict ) {
m_state . strict_mode = true ;
function_body - > set_strict_mode ( ) ;
if ( ! is_simple_parameter_list ( parameters ) )
syntax_error ( " Illegal 'use strict' directive in function with non-simple parameter list " ) ;
} else if ( previous_strict_mode ) {
function_body - > set_strict_mode ( ) ;
}
2020-05-28 05:22:08 +00:00
2021-09-22 10:44:56 +00:00
parse_statement_list ( function_body ) ;
2020-05-28 05:22:08 +00:00
2022-01-15 16:07:51 +00:00
// If we're parsing the function body standalone, e.g. via CreateDynamicFunction, we must have reached EOF here.
// Otherwise, we need a closing curly bracket (which is consumed elsewhere). If we get neither, it's an error.
if ( ! match ( TokenType : : Eof ) & & ! match ( TokenType : : CurlyClose ) )
expected ( Token : : name ( TokenType : : CurlyClose ) ) ;
2021-09-22 10:44:56 +00:00
// If the function contains 'use strict' we need to check the parameters (again).
2022-01-14 23:30:02 +00:00
if ( function_body - > in_strict_mode ( ) | | function_kind ! = FunctionKind : : Normal ) {
2021-09-22 10:44:56 +00:00
Vector < StringView > parameter_names ;
for ( auto & parameter : parameters ) {
parameter . binding . visit (
2023-07-06 15:49:38 +00:00
[ & ] ( Identifier const & identifier ) {
auto const & parameter_name = identifier . string ( ) ;
2021-09-22 10:44:56 +00:00
check_identifier_name_for_assignment_validity ( parameter_name , function_body - > in_strict_mode ( ) ) ;
if ( function_kind = = FunctionKind : : Generator & & parameter_name = = " yield " sv )
syntax_error ( " Parameter name 'yield' not allowed in this context " ) ;
2021-11-09 18:39:22 +00:00
if ( function_kind = = FunctionKind : : Async & & parameter_name = = " await " sv )
syntax_error ( " Parameter name 'await' not allowed in this context " ) ;
2021-09-22 10:44:56 +00:00
for ( auto & previous_name : parameter_names ) {
if ( previous_name = = parameter_name ) {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Duplicate parameter '{}' not allowed in strict mode " , parameter_name ) ) ;
2021-09-22 10:44:56 +00:00
}
2021-07-11 23:25:32 +00:00
}
2021-09-22 10:44:56 +00:00
parameter_names . append ( parameter_name ) ;
} ,
2023-02-19 21:07:52 +00:00
[ & ] ( NonnullRefPtr < BindingPattern const > const & binding ) {
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( binding - > for_each_bound_identifier ( [ & ] ( auto & bound_identifier ) {
auto const & bound_name = bound_identifier . string ( ) ;
2021-09-22 10:44:56 +00:00
if ( function_kind = = FunctionKind : : Generator & & bound_name = = " yield " sv )
syntax_error ( " Parameter name 'yield' not allowed in this context " ) ;
2021-07-11 23:25:32 +00:00
2021-11-09 18:39:22 +00:00
if ( function_kind = = FunctionKind : : Async & & bound_name = = " await " sv )
syntax_error ( " Parameter name 'await' not allowed in this context " ) ;
2021-09-22 10:44:56 +00:00
for ( auto & previous_name : parameter_names ) {
if ( previous_name = = bound_name ) {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Duplicate parameter '{}' not allowed in strict mode " , bound_name ) ) ;
2021-09-22 10:44:56 +00:00
break ;
}
}
parameter_names . append ( bound_name ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-09-22 10:44:56 +00:00
} ) ;
2020-03-11 18:27:43 +00:00
}
}
2021-09-22 10:44:56 +00:00
m_state . strict_mode = previous_strict_mode ;
2023-07-07 21:14:03 +00:00
VERIFY ( m_state . current_scope_pusher - > type ( ) = = ScopePusher : : ScopeType : : Function ) ;
contains_direct_call_to_eval = m_state . current_scope_pusher - > contains_direct_call_to_eval ( ) ;
2021-09-22 10:44:56 +00:00
return function_body ;
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < BlockStatement const > Parser : : parse_block_statement ( )
2021-09-22 10:44:56 +00:00
{
auto rule_start = push_start ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto block = create_ast_node < BlockStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2021-09-22 10:44:56 +00:00
ScopePusher block_scope = ScopePusher : : block_scope ( * this , block ) ;
consume ( TokenType : : CurlyOpen ) ;
parse_statement_list ( block ) ;
consume ( TokenType : : CurlyClose ) ;
2020-03-11 18:27:43 +00:00
return block ;
}
2020-03-19 10:52:56 +00:00
template < typename FunctionNodeType >
2022-09-01 22:46:37 +00:00
NonnullRefPtr < FunctionNodeType > Parser : : parse_function_node ( u16 parse_options , Optional < Position > const & function_start )
2020-03-11 18:27:43 +00:00
{
2022-01-18 23:46:16 +00:00
auto rule_start = function_start . has_value ( )
? RulePosition { * this , * function_start }
: push_start ( ) ;
2021-02-23 19:42:32 +00:00
VERIFY ( ! ( parse_options & FunctionNodeParseOptions : : IsGetterFunction & & parse_options & FunctionNodeParseOptions : : IsSetterFunction ) ) ;
2020-10-20 17:32:51 +00:00
2021-06-19 12:43:09 +00:00
TemporaryChange super_property_access_rollback ( m_state . allow_super_property_lookup , ! ! ( parse_options & FunctionNodeParseOptions : : AllowSuperPropertyLookup ) ) ;
TemporaryChange super_constructor_call_rollback ( m_state . allow_super_constructor_call , ! ! ( parse_options & FunctionNodeParseOptions : : AllowSuperConstructorCall ) ) ;
2021-07-25 09:51:33 +00:00
TemporaryChange break_context_rollback ( m_state . in_break_context , false ) ;
TemporaryChange continue_context_rollback ( m_state . in_continue_context , false ) ;
2021-08-28 15:11:05 +00:00
TemporaryChange class_field_initializer_rollback ( m_state . in_class_field_initializer , false ) ;
LibJS: Add an optimization to avoid needless arguments object creation
This gives FunctionNode a "might need arguments object" boolean flag and
sets it based on the simplest possible heuristic for this: if we
encounter an identifier called "arguments" or "eval" up to the next
(nested) function declaration or expression, we won't need an arguments
object. Otherwise, we *might* need one - the final decision is made in
the FunctionDeclarationInstantiation AO.
Now, this is obviously not perfect. Even if you avoid eval, something
like `foo.arguments` will still trigger a false positive - but it's a
start and already massively cuts down on needlessly allocated objects,
especially in real-world code that is often minified, and so a full
"arguments" identifier will be an actual arguments object more often
than not.
To illustrate the actual impact of this change, here's the number of
allocated arguments objects during a full test-js run:
Before:
- Unmapped arguments objects: 78765
- Mapped arguments objects: 2455
After:
- Unmapped arguments objects: 18
- Mapped arguments objects: 37
This results in a ~5% speedup of test-js on my Linux host machine, and
about 3.5% on i686 Serenity in QEMU (warm runs, average of 5).
The following microbenchmark (calling an empty function 1M times) runs
25% faster on Linux and 45% on Serenity:
function foo() {}
for (var i = 0; i < 1_000_000; ++i)
foo();
test262 reports no changes in either direction, apart from a speedup :^)
2021-10-05 07:44:58 +00:00
TemporaryChange might_need_arguments_object_rollback ( m_state . function_might_need_arguments_object , false ) ;
2020-06-08 18:31:21 +00:00
2021-07-02 10:41:11 +00:00
constexpr auto is_function_expression = IsSame < FunctionNodeType , FunctionExpression > ;
2021-11-09 18:39:22 +00:00
FunctionKind function_kind ;
if ( ( parse_options & FunctionNodeParseOptions : : IsGeneratorFunction ) ! = 0 & & ( parse_options & FunctionNodeParseOptions : : IsAsyncFunction ) ! = 0 )
2021-11-15 00:53:24 +00:00
function_kind = FunctionKind : : AsyncGenerator ;
2021-11-09 18:39:22 +00:00
else if ( ( parse_options & FunctionNodeParseOptions : : IsGeneratorFunction ) ! = 0 )
function_kind = FunctionKind : : Generator ;
else if ( ( parse_options & FunctionNodeParseOptions : : IsAsyncFunction ) ! = 0 )
function_kind = FunctionKind : : Async ;
else
2022-01-14 23:30:02 +00:00
function_kind = FunctionKind : : Normal ;
2023-07-04 22:14:41 +00:00
RefPtr < Identifier const > name ;
2020-10-20 16:56:49 +00:00
if ( parse_options & FunctionNodeParseOptions : : CheckForFunctionAndName ) {
2022-01-14 23:30:02 +00:00
if ( function_kind = = FunctionKind : : Normal & & match ( TokenType : : Async ) & & ! next_token ( ) . trivia_contains_line_terminator ( ) ) {
2021-11-09 18:39:22 +00:00
function_kind = FunctionKind : : Async ;
consume ( TokenType : : Async ) ;
2022-01-15 16:26:06 +00:00
parse_options | = FunctionNodeParseOptions : : IsAsyncFunction ;
2021-11-09 18:39:22 +00:00
}
2020-10-20 16:56:49 +00:00
consume ( TokenType : : Function ) ;
2021-11-15 00:53:24 +00:00
if ( match ( TokenType : : Asterisk ) ) {
2022-01-14 23:30:02 +00:00
function_kind = function_kind = = FunctionKind : : Normal ? FunctionKind : : Generator : FunctionKind : : AsyncGenerator ;
2021-11-09 18:39:22 +00:00
consume ( TokenType : : Asterisk ) ;
2022-01-15 16:26:06 +00:00
parse_options | = FunctionNodeParseOptions : : IsGeneratorFunction ;
2021-06-14 10:22:59 +00:00
}
2021-06-10 21:08:30 +00:00
2023-07-04 22:14:41 +00:00
if ( parse_options & FunctionNodeParseOptions : : HasDefaultExportName ) {
2023-07-08 17:31:41 +00:00
name = create_identifier_and_register_in_current_scope (
2023-07-04 22:14:41 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
ExportStatement : : local_name_for_default ) ;
} else if ( FunctionNodeType : : must_have_name ( ) | | match_identifier ( ) ) {
2023-07-08 17:31:41 +00:00
name = create_identifier_and_register_in_current_scope (
2023-07-04 22:14:41 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
consume_identifier ( ) . DeprecatedFlyString_value ( ) ) ;
} else if ( is_function_expression & & ( match ( TokenType : : Yield ) | | match ( TokenType : : Await ) ) ) {
2023-07-08 17:31:41 +00:00
name = create_identifier_and_register_in_current_scope (
2023-07-04 22:14:41 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
consume ( ) . DeprecatedFlyString_value ( ) ) ;
}
2021-07-11 23:27:35 +00:00
2023-07-04 22:14:41 +00:00
if ( name ) {
check_identifier_name_for_assignment_validity ( name - > string ( ) ) ;
2021-11-26 22:29:05 +00:00
2023-07-04 22:14:41 +00:00
if ( function_kind = = FunctionKind : : AsyncGenerator & & ( name - > string ( ) = = " await " sv | | name - > string ( ) = = " yield " sv ) )
syntax_error ( DeprecatedString : : formatted ( " async generator function is not allowed to be called '{}' " , name - > string ( ) ) ) ;
2021-12-19 19:19:15 +00:00
2023-07-04 22:14:41 +00:00
if ( m_state . in_class_static_init_block & & name - > string ( ) = = " await " sv )
syntax_error ( " 'await' is a reserved word " ) ;
}
2020-03-19 10:52:56 +00:00
}
2021-11-26 22:29:05 +00:00
TemporaryChange class_static_initializer_rollback ( m_state . in_class_static_init_block , false ) ;
2021-11-15 00:53:24 +00:00
TemporaryChange generator_change ( m_state . in_generator_function_context , function_kind = = FunctionKind : : Generator | | function_kind = = FunctionKind : : AsyncGenerator ) ;
2021-11-26 22:50:32 +00:00
TemporaryChange async_change ( m_state . await_expression_is_valid , function_kind = = FunctionKind : : Async | | function_kind = = FunctionKind : : AsyncGenerator ) ;
2021-07-28 23:45:55 +00:00
2020-05-06 03:02:14 +00:00
i32 function_length = - 1 ;
2023-07-07 21:14:03 +00:00
Vector < FunctionParameter > parameters ;
bool contains_direct_call_to_eval = false ;
auto body = [ & ] {
2023-07-12 02:02:27 +00:00
ScopePusher function_scope = ScopePusher : : function_scope ( * this , name ) ;
2020-10-18 23:26:41 +00:00
2023-07-07 21:14:03 +00:00
consume ( TokenType : : ParenOpen ) ;
parameters = parse_formal_parameters ( function_length , parse_options ) ;
consume ( TokenType : : ParenClose ) ;
2020-10-18 23:26:41 +00:00
2023-07-07 21:14:03 +00:00
if ( function_length = = - 1 )
function_length = parameters . size ( ) ;
2021-07-28 23:45:55 +00:00
2023-07-07 21:14:03 +00:00
TemporaryChange function_context_rollback ( m_state . in_function_context , true ) ;
auto old_labels_in_scope = move ( m_state . labels_in_scope ) ;
ScopeGuard guard ( [ & ] ( ) {
m_state . labels_in_scope = move ( old_labels_in_scope ) ;
} ) ;
consume ( TokenType : : CurlyOpen ) ;
auto body = parse_function_body ( parameters , function_kind , contains_direct_call_to_eval ) ;
return body ;
} ( ) ;
2020-10-18 23:26:41 +00:00
2023-07-04 22:14:41 +00:00
auto local_variables_names = body - > local_variables_names ( ) ;
2022-01-15 16:07:51 +00:00
consume ( TokenType : : CurlyClose ) ;
2021-07-28 23:45:55 +00:00
2021-09-22 10:44:56 +00:00
auto has_strict_directive = body - > in_strict_mode ( ) ;
2021-07-11 23:29:07 +00:00
2023-07-04 22:14:41 +00:00
if ( has_strict_directive & & name )
check_identifier_name_for_assignment_validity ( name - > string ( ) , true ) ;
2021-07-04 01:15:52 +00:00
2022-01-18 23:46:16 +00:00
auto function_start_offset = rule_start . position ( ) . offset ;
auto function_end_offset = position ( ) . offset - m_state . current_token . trivia ( ) . length ( ) ;
2022-12-04 18:02:33 +00:00
auto source_text = DeprecatedString { m_state . lexer . source ( ) . substring_view ( function_start_offset , function_end_offset - function_start_offset ) } ;
2021-06-10 23:08:05 +00:00
return create_ast_node < FunctionNodeType > (
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
2022-01-18 23:46:16 +00:00
name , move ( source_text ) , move ( body ) , move ( parameters ) , function_length ,
2021-10-08 10:43:38 +00:00
function_kind , has_strict_directive , m_state . function_might_need_arguments_object ,
2023-07-04 22:14:41 +00:00
contains_direct_call_to_eval ,
move ( local_variables_names ) ) ;
2020-10-18 23:26:41 +00:00
}
2022-11-23 12:12:36 +00:00
Vector < FunctionParameter > Parser : : parse_formal_parameters ( int & function_length , u16 parse_options )
2020-10-18 23:26:41 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-10-25 11:14:04 +00:00
bool has_default_parameter = false ;
bool has_rest_parameter = false ;
2021-08-21 09:31:36 +00:00
TemporaryChange formal_parameter_context_change { m_state . in_formal_parameter_context , true } ;
2020-10-25 11:14:04 +00:00
2022-11-23 12:12:36 +00:00
Vector < FunctionParameter > parameters ;
2020-10-25 11:14:04 +00:00
2023-07-06 15:49:38 +00:00
auto consume_identifier_or_binding_pattern = [ & ] ( ) - > Variant < NonnullRefPtr < Identifier const > , NonnullRefPtr < BindingPattern const > > {
2021-11-26 22:37:14 +00:00
if ( auto pattern = parse_binding_pattern ( AllowDuplicates : : No , AllowMemberExpressions : : No ) )
2021-05-29 11:33:19 +00:00
return pattern . release_nonnull ( ) ;
2021-07-11 11:04:55 +00:00
auto token = consume_identifier ( ) ;
2023-01-09 00:23:00 +00:00
auto parameter_name = token . DeprecatedFlyString_value ( ) ;
2020-10-25 11:14:04 +00:00
2021-07-11 23:27:35 +00:00
check_identifier_name_for_assignment_validity ( parameter_name ) ;
2020-10-25 11:14:04 +00:00
for ( auto & parameter : parameters ) {
2021-07-11 23:29:07 +00:00
bool has_same_name = parameter . binding . visit (
2023-07-06 15:49:38 +00:00
[ & ] ( Identifier const & identifier ) {
return identifier . string ( ) = = parameter_name ;
2021-07-11 23:29:07 +00:00
} ,
2023-02-19 21:07:52 +00:00
[ & ] ( NonnullRefPtr < BindingPattern const > const & bindings ) {
2021-07-11 23:29:07 +00:00
bool found_duplicate = false ;
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( bindings - > for_each_bound_identifier ( [ & ] ( auto & bound_identifier ) {
if ( bound_identifier . string ( ) = = parameter_name )
2021-07-11 23:29:07 +00:00
found_duplicate = true ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-07-11 23:29:07 +00:00
return found_duplicate ;
} ) ;
if ( ! has_same_name )
2020-10-25 11:14:04 +00:00
continue ;
2021-07-11 23:29:07 +00:00
2022-12-04 18:02:33 +00:00
DeprecatedString message ;
2020-10-25 11:14:04 +00:00
if ( parse_options & FunctionNodeParseOptions : : IsArrowFunction )
2022-12-04 18:02:33 +00:00
message = DeprecatedString : : formatted ( " Duplicate parameter '{}' not allowed in arrow function " , parameter_name ) ;
2021-06-19 12:43:09 +00:00
else if ( m_state . strict_mode )
2022-12-04 18:02:33 +00:00
message = DeprecatedString : : formatted ( " Duplicate parameter '{}' not allowed in strict mode " , parameter_name ) ;
2020-10-25 11:14:04 +00:00
else if ( has_default_parameter | | match ( TokenType : : Equals ) )
2022-12-04 18:02:33 +00:00
message = DeprecatedString : : formatted ( " Duplicate parameter '{}' not allowed in function with default parameter " , parameter_name ) ;
2020-10-25 11:14:04 +00:00
else if ( has_rest_parameter )
2022-12-04 18:02:33 +00:00
message = DeprecatedString : : formatted ( " Duplicate parameter '{}' not allowed in function with rest parameter " , parameter_name ) ;
2020-10-25 11:14:04 +00:00
if ( ! message . is_empty ( ) )
2020-11-02 21:03:19 +00:00
syntax_error ( message , Position { token . line_number ( ) , token . line_column ( ) } ) ;
2020-10-25 11:14:04 +00:00
break ;
}
2023-07-06 15:49:38 +00:00
return create_ast_node < Identifier const > ( { m_source_code , rule_start . position ( ) , position ( ) } , token . DeprecatedFlyString_value ( ) ) ;
2020-10-25 11:14:04 +00:00
} ;
2021-07-11 11:04:55 +00:00
while ( match ( TokenType : : CurlyOpen ) | | match ( TokenType : : BracketOpen ) | | match_identifier ( ) | | match ( TokenType : : TripleDot ) ) {
2020-10-20 17:32:51 +00:00
if ( parse_options & FunctionNodeParseOptions : : IsGetterFunction )
syntax_error ( " Getter function must have no arguments " ) ;
2020-10-20 17:43:58 +00:00
if ( parse_options & FunctionNodeParseOptions : : IsSetterFunction & & ( parameters . size ( ) > = 1 | | match ( TokenType : : TripleDot ) ) )
2020-10-20 17:32:51 +00:00
syntax_error ( " Setter function must have one argument " ) ;
2021-05-29 11:33:19 +00:00
auto is_rest = false ;
2020-05-04 15:05:13 +00:00
if ( match ( TokenType : : TripleDot ) ) {
consume ( ) ;
2020-10-25 11:14:04 +00:00
has_rest_parameter = true ;
2020-05-06 03:02:14 +00:00
function_length = parameters . size ( ) ;
2021-05-29 11:33:19 +00:00
is_rest = true ;
2020-05-04 15:05:13 +00:00
}
2021-05-29 11:33:19 +00:00
auto parameter = consume_identifier_or_binding_pattern ( ) ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > default_value ;
2020-05-02 18:46:39 +00:00
if ( match ( TokenType : : Equals ) ) {
2020-10-25 11:14:04 +00:00
consume ( ) ;
2021-07-28 23:47:36 +00:00
if ( is_rest )
syntax_error ( " Rest parameter may not have a default initializer " ) ;
2021-07-10 18:18:25 +00:00
TemporaryChange change ( m_state . in_function_context , true ) ;
2020-10-25 11:14:04 +00:00
has_default_parameter = true ;
2020-05-06 03:02:14 +00:00
function_length = parameters . size ( ) ;
2020-05-11 16:27:31 +00:00
default_value = parse_expression ( 2 ) ;
2021-06-17 17:14:45 +00:00
bool is_generator = parse_options & FunctionNodeParseOptions : : IsGeneratorFunction ;
2023-02-19 21:07:52 +00:00
if ( ( is_generator | | m_state . strict_mode ) & & default_value & & default_value - > fast_is < Identifier > ( ) & & static_cast < Identifier const & > ( * default_value ) . string ( ) = = " yield " sv )
2021-06-17 17:14:45 +00:00
syntax_error ( " Generator function parameter initializer cannot contain a reference to an identifier named \" yield \" " ) ;
2020-05-02 18:46:39 +00:00
}
2021-05-29 11:33:19 +00:00
parameters . append ( { move ( parameter ) , default_value , is_rest } ) ;
2022-01-15 16:10:20 +00:00
if ( ! match ( TokenType : : Comma ) | | is_rest )
2020-03-11 18:27:43 +00:00
break ;
consume ( TokenType : : Comma ) ;
}
2020-10-20 17:32:51 +00:00
if ( parse_options & FunctionNodeParseOptions : : IsSetterFunction & & parameters . is_empty ( ) )
syntax_error ( " Setter function must have one argument " ) ;
2022-01-15 16:10:20 +00:00
// If we're parsing the parameters standalone, e.g. via CreateDynamicFunction, we must have reached EOF here.
// Otherwise, we need a closing parenthesis (which is consumed elsewhere). If we get neither, it's an error.
if ( ! match ( TokenType : : Eof ) & & ! match ( TokenType : : ParenClose ) )
expected ( Token : : name ( TokenType : : ParenClose ) ) ;
2022-11-26 19:45:06 +00:00
parameters . shrink_to_fit ( ) ;
2020-10-18 23:26:41 +00:00
return parameters ;
2020-03-11 18:27:43 +00:00
}
2023-01-09 00:23:00 +00:00
static AK : : Array < DeprecatedFlyString , 36 > s_reserved_words = { " break " , " case " , " catch " , " class " , " const " , " continue " , " debugger " , " default " , " delete " , " do " , " else " , " enum " , " export " , " extends " , " false " , " finally " , " for " , " function " , " if " , " import " , " in " , " instanceof " , " new " , " null " , " return " , " super " , " switch " , " this " , " throw " , " true " , " try " , " typeof " , " var " , " void " , " while " , " with " } ;
2021-07-28 23:49:25 +00:00
2023-02-19 21:07:52 +00:00
RefPtr < BindingPattern const > Parser : : parse_binding_pattern ( Parser : : AllowDuplicates allow_duplicates , Parser : : AllowMemberExpressions allow_member_expressions )
2021-05-29 11:33:19 +00:00
{
auto rule_start = push_start ( ) ;
TokenType closing_token ;
2021-06-13 01:04:28 +00:00
bool is_object = true ;
2021-05-29 11:33:19 +00:00
if ( match ( TokenType : : BracketOpen ) ) {
consume ( ) ;
closing_token = TokenType : : BracketClose ;
2021-06-13 01:04:28 +00:00
is_object = false ;
2021-05-29 11:33:19 +00:00
} else if ( match ( TokenType : : CurlyOpen ) ) {
consume ( ) ;
closing_token = TokenType : : CurlyClose ;
} else {
return { } ;
}
2021-06-13 01:04:28 +00:00
Vector < BindingPattern : : BindingEntry > entries ;
2021-05-29 11:33:19 +00:00
while ( ! match ( closing_token ) ) {
2021-06-13 01:04:28 +00:00
if ( ! is_object & & match ( TokenType : : Comma ) ) {
2021-05-29 11:33:19 +00:00
consume ( ) ;
2021-06-13 01:04:28 +00:00
entries . append ( BindingPattern : : BindingEntry { } ) ;
continue ;
}
2021-05-29 11:33:19 +00:00
auto is_rest = false ;
if ( match ( TokenType : : TripleDot ) ) {
consume ( ) ;
is_rest = true ;
}
2021-06-13 01:04:28 +00:00
decltype ( BindingPattern : : BindingEntry : : name ) name = Empty { } ;
decltype ( BindingPattern : : BindingEntry : : alias ) alias = Empty { } ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > initializer = { } ;
2021-05-29 11:33:19 +00:00
2021-06-13 01:04:28 +00:00
if ( is_object ) {
2021-09-17 23:11:32 +00:00
bool needs_alias = false ;
if ( allow_member_expressions = = AllowMemberExpressions : : Yes & & is_rest ) {
auto expression_position = position ( ) ;
auto expression = parse_expression ( 2 , Associativity : : Right , { TokenType : : Equals } ) ;
if ( is < MemberExpression > ( * expression ) )
2023-02-19 21:07:52 +00:00
alias = static_ptr_cast < MemberExpression const > ( expression ) ;
2021-09-17 23:11:32 +00:00
else if ( is < Identifier > ( * expression ) )
2023-02-19 21:07:52 +00:00
name = static_ptr_cast < Identifier const > ( expression ) ;
2021-09-17 23:11:32 +00:00
else
syntax_error ( " Invalid destructuring assignment target " , expression_position ) ;
2022-08-19 22:24:30 +00:00
} else if ( match_identifier_name ( ) | | match ( TokenType : : StringLiteral ) | | match ( TokenType : : NumericLiteral ) | | match ( TokenType : : BigIntLiteral ) ) {
2021-09-17 23:11:32 +00:00
if ( match ( TokenType : : StringLiteral ) | | match ( TokenType : : NumericLiteral ) )
needs_alias = true ;
if ( match ( TokenType : : StringLiteral ) ) {
auto token = consume ( TokenType : : StringLiteral ) ;
auto string_literal = parse_string_literal ( token ) ;
2023-07-04 22:14:41 +00:00
name = create_identifier_and_register_in_current_scope ( { m_source_code , rule_start . position ( ) , position ( ) } , string_literal - > value ( ) ) ;
2022-08-19 22:24:30 +00:00
} else if ( match ( TokenType : : BigIntLiteral ) ) {
2023-01-09 00:23:00 +00:00
auto string_value = consume ( ) . DeprecatedFlyString_value ( ) ;
2022-08-19 22:24:30 +00:00
VERIFY ( string_value . ends_with ( " n " sv ) ) ;
2023-07-04 22:14:41 +00:00
name = create_identifier_and_register_in_current_scope ( { m_source_code , rule_start . position ( ) , position ( ) } , DeprecatedFlyString ( string_value . view ( ) . substring_view ( 0 , string_value . length ( ) - 1 ) ) ) ;
2021-09-17 23:11:32 +00:00
} else {
2023-07-04 22:14:41 +00:00
name = create_identifier_and_register_in_current_scope ( { m_source_code , rule_start . position ( ) , position ( ) } , consume ( ) . DeprecatedFlyString_value ( ) ) ;
2021-09-17 23:11:32 +00:00
}
2021-06-13 01:04:28 +00:00
} else if ( match ( TokenType : : BracketOpen ) ) {
2021-05-29 11:33:19 +00:00
consume ( ) ;
2021-07-28 23:49:25 +00:00
auto expression = parse_expression ( 0 ) ;
name = move ( expression ) ;
2021-07-11 10:19:42 +00:00
consume ( TokenType : : BracketClose ) ;
2021-06-13 01:04:28 +00:00
} else {
2021-07-11 19:00:55 +00:00
expected ( " identifier or computed property name " ) ;
2021-06-13 01:04:28 +00:00
return { } ;
2021-05-29 11:33:19 +00:00
}
2021-06-13 01:04:28 +00:00
if ( ! is_rest & & match ( TokenType : : Colon ) ) {
2021-05-29 11:33:19 +00:00
consume ( ) ;
2021-09-17 23:11:32 +00:00
if ( allow_member_expressions = = AllowMemberExpressions : : Yes ) {
auto expression_position = position ( ) ;
auto expression = parse_expression ( 2 , Associativity : : Right , { TokenType : : Equals } ) ;
if ( is < ArrayExpression > ( * expression ) | | is < ObjectExpression > ( * expression ) ) {
if ( auto synthesized_binding_pattern = synthesize_binding_pattern ( * expression ) )
alias = synthesized_binding_pattern . release_nonnull ( ) ;
else
syntax_error ( " Invalid destructuring assignment target " , expression_position ) ;
} else if ( is < MemberExpression > ( * expression ) ) {
2023-02-19 21:07:52 +00:00
alias = static_ptr_cast < MemberExpression const > ( expression ) ;
2021-09-17 23:11:32 +00:00
} else if ( is < Identifier > ( * expression ) ) {
2023-02-19 21:07:52 +00:00
alias = static_ptr_cast < Identifier const > ( expression ) ;
2021-09-17 23:11:32 +00:00
} else {
syntax_error ( " Invalid destructuring assignment target " , expression_position ) ;
}
} else if ( match ( TokenType : : CurlyOpen ) | | match ( TokenType : : BracketOpen ) ) {
auto binding_pattern = parse_binding_pattern ( allow_duplicates , allow_member_expressions ) ;
2021-06-13 01:04:28 +00:00
if ( ! binding_pattern )
return { } ;
alias = binding_pattern . release_nonnull ( ) ;
} else if ( match_identifier_name ( ) ) {
2023-07-04 22:14:41 +00:00
alias = create_identifier_and_register_in_current_scope ( { m_source_code , rule_start . position ( ) , position ( ) } , consume ( ) . DeprecatedFlyString_value ( ) ) ;
2021-06-13 01:04:28 +00:00
} else {
2021-07-11 19:00:55 +00:00
expected ( " identifier or binding pattern " ) ;
2021-06-13 01:04:28 +00:00
return { } ;
}
2021-09-17 23:11:32 +00:00
} else if ( needs_alias ) {
expected ( " alias for string or numeric literal name " ) ;
return { } ;
2021-06-13 01:04:28 +00:00
}
} else {
2021-09-17 23:11:32 +00:00
if ( allow_member_expressions = = AllowMemberExpressions : : Yes ) {
auto expression_position = position ( ) ;
auto expression = parse_expression ( 2 , Associativity : : Right , { TokenType : : Equals } ) ;
if ( is < ArrayExpression > ( * expression ) | | is < ObjectExpression > ( * expression ) ) {
if ( auto synthesized_binding_pattern = synthesize_binding_pattern ( * expression ) )
alias = synthesized_binding_pattern . release_nonnull ( ) ;
else
syntax_error ( " Invalid destructuring assignment target " , expression_position ) ;
} else if ( is < MemberExpression > ( * expression ) ) {
2023-02-19 21:07:52 +00:00
alias = static_ptr_cast < MemberExpression const > ( expression ) ;
2021-09-17 23:11:32 +00:00
} else if ( is < Identifier > ( * expression ) ) {
2023-02-19 21:07:52 +00:00
alias = static_ptr_cast < Identifier const > ( expression ) ;
2021-09-17 23:11:32 +00:00
} else {
syntax_error ( " Invalid destructuring assignment target " , expression_position ) ;
2021-07-28 23:49:25 +00:00
}
2021-06-13 01:04:28 +00:00
} else if ( match ( TokenType : : BracketOpen ) | | match ( TokenType : : CurlyOpen ) ) {
2021-09-17 23:11:32 +00:00
auto pattern = parse_binding_pattern ( allow_duplicates , allow_member_expressions ) ;
2021-06-13 01:04:28 +00:00
if ( ! pattern ) {
2021-07-11 19:00:55 +00:00
expected ( " binding pattern " ) ;
2021-06-13 01:04:28 +00:00
return { } ;
}
alias = pattern . release_nonnull ( ) ;
2021-09-17 23:11:32 +00:00
} else if ( match_identifier_name ( ) ) {
// BindingElement must always have an Empty name field
2023-01-09 00:23:00 +00:00
auto identifier_name = consume_identifier ( ) . DeprecatedFlyString_value ( ) ;
2023-07-04 22:14:41 +00:00
alias = create_identifier_and_register_in_current_scope ( { m_source_code , rule_start . position ( ) , position ( ) } , identifier_name ) ;
2021-06-13 01:04:28 +00:00
} else {
2021-07-11 19:00:55 +00:00
expected ( " identifier or binding pattern " ) ;
2021-06-13 01:04:28 +00:00
return { } ;
2021-05-29 11:33:19 +00:00
}
}
2021-06-13 01:04:28 +00:00
if ( match ( TokenType : : Equals ) ) {
if ( is_rest ) {
syntax_error ( " Unexpected initializer after rest element " ) ;
return { } ;
2021-05-29 11:33:19 +00:00
}
2021-06-13 01:04:28 +00:00
consume ( ) ;
initializer = parse_expression ( 2 ) ;
if ( ! initializer ) {
2021-07-11 19:00:55 +00:00
expected ( " initialization expression " ) ;
2021-06-13 01:04:28 +00:00
return { } ;
}
2021-05-29 11:33:19 +00:00
}
2021-06-13 01:04:28 +00:00
entries . append ( BindingPattern : : BindingEntry { move ( name ) , move ( alias ) , move ( initializer ) , is_rest } ) ;
if ( match ( TokenType : : Comma ) ) {
if ( is_rest ) {
syntax_error ( " Rest element may not be followed by a comma " ) ;
return { } ;
}
consume ( ) ;
2021-11-26 22:37:14 +00:00
} else if ( is_object & & ! match ( TokenType : : CurlyClose ) ) {
consume ( TokenType : : Comma ) ;
2021-06-13 01:04:28 +00:00
}
2021-05-29 11:33:19 +00:00
}
2021-06-13 01:04:28 +00:00
while ( ! is_object & & match ( TokenType : : Comma ) )
2021-05-29 11:33:19 +00:00
consume ( ) ;
consume ( closing_token ) ;
2021-06-13 01:04:28 +00:00
auto kind = is_object ? BindingPattern : : Kind : : Object : BindingPattern : : Kind : : Array ;
auto pattern = adopt_ref ( * new BindingPattern ) ;
pattern - > entries = move ( entries ) ;
pattern - > kind = kind ;
2021-07-28 23:49:25 +00:00
Vector < StringView > bound_names ;
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( pattern - > for_each_bound_identifier ( [ & ] ( auto & identifier ) {
auto const & name = identifier . string ( ) ;
2021-09-17 23:11:32 +00:00
if ( allow_duplicates = = AllowDuplicates : : No ) {
2021-07-28 23:49:25 +00:00
if ( bound_names . contains_slow ( name ) )
syntax_error ( " Duplicate parameter names in bindings " ) ;
bound_names . append ( name ) ;
}
check_identifier_name_for_assignment_validity ( name ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-07-11 11:04:55 +00:00
2021-05-29 11:33:19 +00:00
return pattern ;
}
2023-02-19 21:07:52 +00:00
RefPtr < Identifier const > Parser : : parse_lexical_binding ( )
2022-12-20 21:09:57 +00:00
{
auto binding_start = push_start ( ) ;
if ( match_identifier ( ) ) {
2023-07-04 22:14:41 +00:00
return create_identifier_and_register_in_current_scope ( { m_source_code , binding_start . position ( ) , position ( ) } , consume_identifier ( ) . DeprecatedFlyString_value ( ) ) ;
2022-12-20 21:09:57 +00:00
}
if ( ! m_state . in_generator_function_context & & match ( TokenType : : Yield ) ) {
if ( m_state . strict_mode )
syntax_error ( " Identifier must not be a reserved word in strict mode ('yield') " ) ;
2023-07-04 22:14:41 +00:00
return create_identifier_and_register_in_current_scope ( { m_source_code , binding_start . position ( ) , position ( ) } , consume ( ) . DeprecatedFlyString_value ( ) ) ;
2022-12-20 21:09:57 +00:00
}
if ( ! m_state . await_expression_is_valid & & match ( TokenType : : Async ) ) {
if ( m_program_type = = Program : : Type : : Module )
syntax_error ( " Identifier must not be a reserved word in modules ('async') " ) ;
2023-07-04 22:14:41 +00:00
return create_identifier_and_register_in_current_scope ( { m_source_code , binding_start . position ( ) , position ( ) } , consume ( ) . DeprecatedFlyString_value ( ) ) ;
2022-12-20 21:09:57 +00:00
}
return { } ;
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < VariableDeclaration const > Parser : : parse_variable_declaration ( IsForLoopVariableDeclaration is_for_loop_variable_declaration )
2020-03-11 18:27:43 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-04-08 09:59:18 +00:00
DeclarationKind declaration_kind ;
2020-03-12 11:16:48 +00:00
2021-06-19 12:43:09 +00:00
switch ( m_state . current_token . type ( ) ) {
2020-03-12 11:16:48 +00:00
case TokenType : : Var :
2020-04-08 09:59:18 +00:00
declaration_kind = DeclarationKind : : Var ;
2020-03-12 11:16:48 +00:00
break ;
case TokenType : : Let :
2020-04-08 09:59:18 +00:00
declaration_kind = DeclarationKind : : Let ;
2020-03-12 11:16:48 +00:00
break ;
2020-03-12 12:24:34 +00:00
case TokenType : : Const :
2020-04-08 09:59:18 +00:00
declaration_kind = DeclarationKind : : Const ;
2020-03-12 12:24:34 +00:00
break ;
2020-03-12 11:16:48 +00:00
default :
2021-02-23 19:42:32 +00:00
VERIFY_NOT_REACHED ( ) ;
2020-03-12 11:16:48 +00:00
}
2020-10-30 19:08:45 +00:00
consume ( ) ;
2020-04-04 19:46:25 +00:00
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < VariableDeclarator const > > declarations ;
2020-04-04 19:46:25 +00:00
for ( ; ; ) {
2023-02-19 21:07:52 +00:00
Variant < NonnullRefPtr < Identifier const > , NonnullRefPtr < BindingPattern const > , Empty > target { } ;
2022-12-20 21:09:57 +00:00
if ( auto pattern = parse_binding_pattern ( declaration_kind ! = DeclarationKind : : Var ? AllowDuplicates : : No : AllowDuplicates : : Yes , AllowMemberExpressions : : No ) ) {
2021-07-11 11:04:55 +00:00
if ( ( declaration_kind = = DeclarationKind : : Let | | declaration_kind = = DeclarationKind : : Const ) ) {
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( pattern - > for_each_bound_identifier ( [ this ] ( auto & identifier ) {
if ( identifier . string ( ) = = " let " sv )
2021-07-11 11:04:55 +00:00
syntax_error ( " Lexical binding may not be called 'let' " ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-07-11 11:04:55 +00:00
}
2021-07-11 23:27:35 +00:00
2022-12-20 21:09:57 +00:00
target = pattern . release_nonnull ( ) ;
} else if ( auto lexical_binding = parse_lexical_binding ( ) ) {
check_identifier_name_for_assignment_validity ( lexical_binding - > string ( ) ) ;
if ( ( declaration_kind = = DeclarationKind : : Let | | declaration_kind = = DeclarationKind : : Const ) & & lexical_binding - > string ( ) = = " let " sv )
syntax_error ( " Lexical binding may not be called 'let' " ) ;
2021-11-09 18:39:22 +00:00
2022-12-20 21:09:57 +00:00
target = lexical_binding . release_nonnull ( ) ;
2021-05-29 11:33:19 +00:00
}
if ( target . has < Empty > ( ) ) {
2021-07-11 19:00:55 +00:00
expected ( " identifier or a binding pattern " ) ;
2021-05-29 11:33:19 +00:00
if ( match ( TokenType : : Comma ) ) {
consume ( ) ;
continue ;
}
break ;
}
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > init ;
2020-04-04 19:46:25 +00:00
if ( match ( TokenType : : Equals ) ) {
consume ( ) ;
2021-09-22 10:44:56 +00:00
// In a for loop 'in' can be ambiguous so we do not allow it
// 14.7.4 The for Statement, https://tc39.es/ecma262/#prod-ForStatement and 14.7.5 The for-in, for-of, and for-await-of Statements, https://tc39.es/ecma262/#prod-ForInOfStatement
2022-12-20 21:09:57 +00:00
if ( is_for_loop_variable_declaration = = IsForLoopVariableDeclaration : : Yes )
2021-09-22 10:44:56 +00:00
init = parse_expression ( 2 , Associativity : : Right , { TokenType : : In } ) ;
else
init = parse_expression ( 2 ) ;
2022-12-20 21:09:57 +00:00
} else if ( is_for_loop_variable_declaration = = IsForLoopVariableDeclaration : : No & & declaration_kind = = DeclarationKind : : Const ) {
2020-10-30 19:08:45 +00:00
syntax_error ( " Missing initializer in 'const' variable declaration " ) ;
2023-02-19 21:07:52 +00:00
} else if ( is_for_loop_variable_declaration = = IsForLoopVariableDeclaration : : No & & target . has < NonnullRefPtr < BindingPattern const > > ( ) ) {
2021-05-29 11:33:19 +00:00
syntax_error ( " Missing initializer in destructuring assignment " ) ;
2020-04-04 19:46:25 +00:00
}
2021-05-29 11:33:19 +00:00
declarations . append ( create_ast_node < VariableDeclarator > (
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
2023-02-19 21:07:52 +00:00
move ( target ) . downcast < NonnullRefPtr < Identifier const > , NonnullRefPtr < BindingPattern const > > ( ) ,
2021-05-29 11:33:19 +00:00
move ( init ) ) ) ;
2020-04-04 19:46:25 +00:00
if ( match ( TokenType : : Comma ) ) {
consume ( ) ;
continue ;
}
break ;
2020-03-11 18:27:43 +00:00
}
2022-12-20 21:09:57 +00:00
if ( is_for_loop_variable_declaration = = IsForLoopVariableDeclaration : : No )
2020-04-21 18:21:26 +00:00
consume_or_insert_semicolon ( ) ;
2020-04-17 13:05:58 +00:00
2022-11-26 19:45:06 +00:00
declarations . shrink_to_fit ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto declaration = create_ast_node < VariableDeclaration > ( { m_source_code , rule_start . position ( ) , position ( ) } , declaration_kind , move ( declarations ) ) ;
2020-04-13 14:42:54 +00:00
return declaration ;
2020-03-11 18:27:43 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < UsingDeclaration const > Parser : : parse_using_declaration ( IsForLoopVariableDeclaration is_for_loop_variable_declaration )
2022-12-20 21:09:57 +00:00
{
// using [no LineTerminator here] BindingList[?In, ?Yield, ?Await, +Using] ;
auto rule_start = push_start ( ) ;
VERIFY ( m_state . current_token . original_value ( ) = = " using " sv ) ;
consume ( TokenType : : Identifier ) ;
VERIFY ( ! m_state . current_token . trivia_contains_line_terminator ( ) ) ;
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < VariableDeclarator const > > declarations ;
2022-12-20 21:09:57 +00:00
for ( ; ; ) {
auto lexical_binding = parse_lexical_binding ( ) ;
if ( ! lexical_binding ) {
expected ( " lexical binding " ) ;
break ;
}
check_identifier_name_for_assignment_validity ( lexical_binding - > string ( ) ) ;
if ( lexical_binding - > string ( ) = = " let " sv )
syntax_error ( " Lexical binding may not be called 'let' " ) ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > initializer ;
2022-12-20 21:09:57 +00:00
if ( match ( TokenType : : Equals ) ) {
consume ( ) ;
if ( is_for_loop_variable_declaration = = IsForLoopVariableDeclaration : : Yes )
initializer = parse_expression ( 2 , Associativity : : Right , { TokenType : : In } ) ;
else
initializer = parse_expression ( 2 ) ;
} else if ( is_for_loop_variable_declaration = = IsForLoopVariableDeclaration : : No ) {
consume ( TokenType : : Equals ) ;
}
declarations . append ( create_ast_node < VariableDeclarator > (
{ m_source_code , rule_start . position ( ) , position ( ) } ,
lexical_binding . release_nonnull ( ) ,
move ( initializer ) ) ) ;
if ( match ( TokenType : : Comma ) ) {
consume ( ) ;
continue ;
}
break ;
}
if ( is_for_loop_variable_declaration = = IsForLoopVariableDeclaration : : No )
consume_or_insert_semicolon ( ) ;
return create_ast_node < UsingDeclaration > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( declarations ) ) ;
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ThrowStatement const > Parser : : parse_throw_statement ( )
2020-03-24 21:03:50 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-24 21:03:50 +00:00
consume ( TokenType : : Throw ) ;
2020-04-17 13:05:58 +00:00
// Automatic semicolon insertion: terminate statement when throw is followed by newline
2021-06-19 12:43:09 +00:00
if ( m_state . current_token . trivia_contains_line_terminator ( ) ) {
2020-04-29 20:37:51 +00:00
syntax_error ( " No line break is allowed between 'throw' and its expression " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ThrowStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , create_ast_node < ErrorExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ) ;
2020-04-17 13:05:58 +00:00
}
auto expression = parse_expression ( 0 ) ;
consume_or_insert_semicolon ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ThrowStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( expression ) ) ;
2020-03-24 21:03:50 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < BreakStatement const > Parser : : parse_break_statement ( )
2020-03-29 11:09:54 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-29 11:09:54 +00:00
consume ( TokenType : : Break ) ;
2023-01-09 00:23:00 +00:00
DeprecatedFlyString target_label ;
2020-05-28 19:02:32 +00:00
if ( match ( TokenType : : Semicolon ) ) {
consume ( ) ;
2020-10-07 19:21:15 +00:00
} else {
2021-09-18 21:01:54 +00:00
if ( ! m_state . current_token . trivia_contains_line_terminator ( ) & & match_identifier ( ) ) {
2020-10-07 19:21:15 +00:00
target_label = consume ( ) . value ( ) ;
2021-08-28 15:04:37 +00:00
auto label = m_state . labels_in_scope . find ( target_label ) ;
if ( label = = m_state . labels_in_scope . end ( ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Label '{}' not found " , target_label ) ) ;
2020-10-08 17:49:08 +00:00
}
2020-10-07 19:21:15 +00:00
consume_or_insert_semicolon ( ) ;
2020-05-28 19:02:32 +00:00
}
2020-10-07 19:21:15 +00:00
2021-06-19 12:43:09 +00:00
if ( target_label . is_null ( ) & & ! m_state . in_break_context )
2020-10-07 19:21:15 +00:00
syntax_error ( " Unlabeled 'break' not allowed outside of a loop or switch statement " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < BreakStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , target_label ) ;
2020-03-29 11:09:54 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ContinueStatement const > Parser : : parse_continue_statement ( )
2020-04-04 22:22:42 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-06-19 12:43:09 +00:00
if ( ! m_state . in_continue_context )
2020-10-07 19:21:15 +00:00
syntax_error ( " 'continue' not allow outside of a loop " ) ;
2020-04-04 22:22:42 +00:00
consume ( TokenType : : Continue ) ;
2023-01-09 00:23:00 +00:00
DeprecatedFlyString target_label ;
2020-05-28 19:02:32 +00:00
if ( match ( TokenType : : Semicolon ) ) {
consume ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ContinueStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , target_label ) ;
2020-05-28 19:02:32 +00:00
}
2021-09-18 21:01:54 +00:00
if ( ! m_state . current_token . trivia_contains_line_terminator ( ) & & match_identifier ( ) ) {
auto label_position = position ( ) ;
2020-05-28 19:02:32 +00:00
target_label = consume ( ) . value ( ) ;
2021-08-28 15:04:37 +00:00
auto label = m_state . labels_in_scope . find ( target_label ) ;
2021-09-18 21:01:54 +00:00
if ( label = = m_state . labels_in_scope . end ( ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Label '{}' not found or invalid " , target_label ) ) ;
2021-09-18 21:01:54 +00:00
else
label - > value = label_position ;
2020-10-08 17:49:08 +00:00
}
2020-04-17 13:05:58 +00:00
consume_or_insert_semicolon ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ContinueStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , target_label ) ;
2020-04-04 22:22:42 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ConditionalExpression const > Parser : : parse_conditional_expression ( NonnullRefPtr < Expression const > test , ForbiddenTokens forbidden )
2020-04-03 10:14:28 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-04-03 10:14:28 +00:00
consume ( TokenType : : QuestionMark ) ;
2020-05-28 21:42:20 +00:00
auto consequent = parse_expression ( 2 ) ;
2020-04-03 10:14:28 +00:00
consume ( TokenType : : Colon ) ;
2022-02-15 10:11:56 +00:00
auto alternate = parse_expression ( 2 , Associativity : : Right , forbidden ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ConditionalExpression > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( test ) , move ( consequent ) , move ( alternate ) ) ;
2020-04-03 10:14:28 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < OptionalChain const > Parser : : parse_optional_chain ( NonnullRefPtr < Expression const > base )
2021-09-14 02:26:31 +00:00
{
auto rule_start = push_start ( ) ;
Vector < OptionalChain : : Reference > chain ;
do {
if ( match ( TokenType : : QuestionMarkPeriod ) ) {
consume ( TokenType : : QuestionMarkPeriod ) ;
switch ( m_state . current_token . type ( ) ) {
case TokenType : : ParenOpen :
chain . append ( OptionalChain : : Call { parse_arguments ( ) , OptionalChain : : Mode : : Optional } ) ;
break ;
case TokenType : : BracketOpen :
consume ( ) ;
chain . append ( OptionalChain : : ComputedReference { parse_expression ( 0 ) , OptionalChain : : Mode : : Optional } ) ;
consume ( TokenType : : BracketClose ) ;
break ;
2021-10-18 21:32:47 +00:00
case TokenType : : PrivateIdentifier : {
if ( ! is_private_identifier_valid ( ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Reference to undeclared private field or method '{}' " , m_state . current_token . value ( ) ) ) ;
2021-10-18 21:32:47 +00:00
auto start = position ( ) ;
auto private_identifier = consume ( ) ;
chain . append ( OptionalChain : : PrivateMemberReference {
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
create_ast_node < PrivateIdentifier > ( { m_source_code , start , position ( ) } , private_identifier . value ( ) ) ,
2021-10-18 21:32:47 +00:00
OptionalChain : : Mode : : Optional } ) ;
break ;
}
2021-09-14 02:26:31 +00:00
case TokenType : : TemplateLiteralStart :
// 13.3.1.1 - Static Semantics: Early Errors
// OptionalChain :
// ?. TemplateLiteral
// OptionalChain TemplateLiteral
// This is a hard error.
syntax_error ( " Invalid tagged template literal after ?. " , position ( ) ) ;
break ;
default :
if ( match_identifier_name ( ) ) {
auto start = position ( ) ;
auto identifier = consume ( ) ;
chain . append ( OptionalChain : : MemberReference {
2023-01-09 00:23:00 +00:00
create_ast_node < Identifier > ( { m_source_code , start , position ( ) } , identifier . DeprecatedFlyString_value ( ) ) ,
2021-09-14 02:26:31 +00:00
OptionalChain : : Mode : : Optional ,
} ) ;
} else {
syntax_error ( " Invalid optional chain reference after ?. " , position ( ) ) ;
}
break ;
}
} else if ( match ( TokenType : : ParenOpen ) ) {
chain . append ( OptionalChain : : Call { parse_arguments ( ) , OptionalChain : : Mode : : NotOptional } ) ;
} else if ( match ( TokenType : : Period ) ) {
consume ( ) ;
2021-10-18 21:32:47 +00:00
if ( match ( TokenType : : PrivateIdentifier ) ) {
2022-11-17 09:24:20 +00:00
if ( ! is_private_identifier_valid ( ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Reference to undeclared private field or method '{}' " , m_state . current_token . value ( ) ) ) ;
2022-11-17 09:24:20 +00:00
2021-10-18 21:32:47 +00:00
auto start = position ( ) ;
auto private_identifier = consume ( ) ;
chain . append ( OptionalChain : : PrivateMemberReference {
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
create_ast_node < PrivateIdentifier > ( { m_source_code , start , position ( ) } , private_identifier . value ( ) ) ,
2021-10-18 21:32:47 +00:00
OptionalChain : : Mode : : NotOptional ,
} ) ;
} else if ( match_identifier_name ( ) ) {
2021-09-14 02:26:31 +00:00
auto start = position ( ) ;
auto identifier = consume ( ) ;
chain . append ( OptionalChain : : MemberReference {
2023-01-09 00:23:00 +00:00
create_ast_node < Identifier > ( { m_source_code , start , position ( ) } , identifier . DeprecatedFlyString_value ( ) ) ,
2021-09-14 02:26:31 +00:00
OptionalChain : : Mode : : NotOptional ,
} ) ;
} else {
expected ( " an identifier " ) ;
break ;
}
} else if ( match ( TokenType : : TemplateLiteralStart ) ) {
// 13.3.1.1 - Static Semantics: Early Errors
// OptionalChain :
// ?. TemplateLiteral
// OptionalChain TemplateLiteral
syntax_error ( " Invalid tagged template literal after optional chain " , position ( ) ) ;
break ;
} else if ( match ( TokenType : : BracketOpen ) ) {
consume ( ) ;
chain . append ( OptionalChain : : ComputedReference { parse_expression ( 2 ) , OptionalChain : : Mode : : NotOptional } ) ;
consume ( TokenType : : BracketClose ) ;
} else {
break ;
}
} while ( ! done ( ) ) ;
return create_ast_node < OptionalChain > (
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
2021-09-14 02:26:31 +00:00
move ( base ) ,
move ( chain ) ) ;
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < TryStatement const > Parser : : parse_try_statement ( )
2020-03-24 13:03:55 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-24 13:03:55 +00:00
consume ( TokenType : : Try ) ;
auto block = parse_block_statement ( ) ;
2020-10-23 19:54:00 +00:00
2023-02-19 21:07:52 +00:00
RefPtr < CatchClause const > handler ;
2020-10-23 19:54:00 +00:00
if ( match ( TokenType : : Catch ) )
handler = parse_catch_clause ( ) ;
2020-03-24 13:03:55 +00:00
2023-02-19 21:07:52 +00:00
RefPtr < BlockStatement const > finalizer ;
2020-03-24 13:03:55 +00:00
if ( match ( TokenType : : Finally ) ) {
consume ( ) ;
finalizer = parse_block_statement ( ) ;
}
2020-10-23 19:54:00 +00:00
if ( ! handler & & ! finalizer )
syntax_error ( " try statement must have a 'catch' or 'finally' clause " ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < TryStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( block ) , move ( handler ) , move ( finalizer ) ) ;
2020-03-24 13:03:55 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < DoWhileStatement const > Parser : : parse_do_while_statement ( )
2020-04-04 19:29:23 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-04-04 19:29:23 +00:00
consume ( TokenType : : Do ) ;
2020-04-05 14:57:01 +00:00
2023-02-19 21:07:52 +00:00
auto body = [ & ] ( ) - > NonnullRefPtr < Statement const > {
2021-06-19 12:43:09 +00:00
TemporaryChange break_change ( m_state . in_break_context , true ) ;
TemporaryChange continue_change ( m_state . in_continue_context , true ) ;
2020-10-07 19:21:15 +00:00
return parse_statement ( ) ;
} ( ) ;
2020-04-05 14:57:01 +00:00
2020-04-04 19:29:23 +00:00
consume ( TokenType : : While ) ;
2020-04-05 14:57:01 +00:00
consume ( TokenType : : ParenOpen ) ;
2020-04-04 19:29:23 +00:00
auto test = parse_expression ( 0 ) ;
2020-04-05 14:57:01 +00:00
consume ( TokenType : : ParenClose ) ;
2020-10-27 19:16:23 +00:00
// Since ES 2015 a missing semicolon is inserted here, despite the regular ASI rules not applying
if ( match ( TokenType : : Semicolon ) )
consume ( ) ;
2020-04-05 14:57:01 +00:00
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < DoWhileStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( test ) , move ( body ) ) ;
2020-04-04 19:29:23 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < WhileStatement const > Parser : : parse_while_statement ( )
2020-04-21 18:27:57 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-04-21 18:27:57 +00:00
consume ( TokenType : : While ) ;
consume ( TokenType : : ParenOpen ) ;
auto test = parse_expression ( 0 ) ;
consume ( TokenType : : ParenClose ) ;
2021-06-19 12:43:09 +00:00
TemporaryChange break_change ( m_state . in_break_context , true ) ;
TemporaryChange continue_change ( m_state . in_continue_context , true ) ;
2020-04-21 18:27:57 +00:00
auto body = parse_statement ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < WhileStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( test ) , move ( body ) ) ;
2020-04-21 18:27:57 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < SwitchStatement const > Parser : : parse_switch_statement ( )
2020-03-29 11:09:54 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-29 11:09:54 +00:00
consume ( TokenType : : Switch ) ;
consume ( TokenType : : ParenOpen ) ;
auto determinant = parse_expression ( 0 ) ;
consume ( TokenType : : ParenClose ) ;
consume ( TokenType : : CurlyOpen ) ;
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < SwitchCase > > cases ;
2020-03-29 11:09:54 +00:00
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto switch_statement = create_ast_node < SwitchStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( determinant ) ) ;
2021-09-22 10:44:56 +00:00
ScopePusher switch_scope = ScopePusher : : block_scope ( * this , switch_statement ) ;
2020-10-18 22:12:27 +00:00
auto has_default = false ;
while ( match ( TokenType : : Case ) | | match ( TokenType : : Default ) ) {
if ( match ( TokenType : : Default ) ) {
if ( has_default )
syntax_error ( " Multiple 'default' clauses in switch statement " ) ;
has_default = true ;
}
2021-09-22 10:44:56 +00:00
switch_statement - > add_case ( parse_switch_case ( ) ) ;
2020-10-18 22:12:27 +00:00
}
2020-03-29 11:09:54 +00:00
consume ( TokenType : : CurlyClose ) ;
2021-09-22 10:44:56 +00:00
return switch_statement ;
2020-03-29 11:09:54 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < WithStatement const > Parser : : parse_with_statement ( )
2020-11-28 14:05:57 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-11-28 14:05:57 +00:00
consume ( TokenType : : With ) ;
consume ( TokenType : : ParenOpen ) ;
auto object = parse_expression ( 0 ) ;
consume ( TokenType : : ParenClose ) ;
2023-07-04 22:14:41 +00:00
auto with_scope_node = create_ast_node < BlockStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
ScopePusher with_scope = ScopePusher : : with_scope ( * this , with_scope_node ) ;
2020-11-28 14:05:57 +00:00
auto body = parse_statement ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < WithStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( object ) , move ( body ) ) ;
2020-11-28 14:05:57 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < SwitchCase const > Parser : : parse_switch_case ( )
2020-03-29 11:09:54 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > test ;
2020-03-29 11:09:54 +00:00
if ( consume ( ) . type ( ) = = TokenType : : Case ) {
test = parse_expression ( 0 ) ;
}
consume ( TokenType : : Colon ) ;
2023-03-06 13:17:01 +00:00
Vector < NonnullRefPtr < Statement > > consequent ;
2021-06-19 12:43:09 +00:00
TemporaryChange break_change ( m_state . in_break_context , true ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto switch_case = create_ast_node < SwitchCase > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( test ) ) ;
2021-09-22 10:44:56 +00:00
parse_statement_list ( switch_case ) ;
2020-03-29 11:09:54 +00:00
2021-09-22 10:44:56 +00:00
return switch_case ;
2020-03-29 11:09:54 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < CatchClause const > Parser : : parse_catch_clause ( )
2020-03-24 13:03:55 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-03-24 13:03:55 +00:00
consume ( TokenType : : Catch ) ;
2023-01-09 00:23:00 +00:00
DeprecatedFlyString parameter ;
2023-02-19 21:07:52 +00:00
RefPtr < BindingPattern const > pattern_parameter ;
2021-07-11 10:45:38 +00:00
auto should_expect_parameter = false ;
2020-03-24 13:03:55 +00:00
if ( match ( TokenType : : ParenOpen ) ) {
2023-07-12 02:02:27 +00:00
TemporaryChange catch_parameter_context_change { m_state . in_catch_parameter_context , true } ;
2021-07-11 10:45:38 +00:00
should_expect_parameter = true ;
2020-03-24 13:03:55 +00:00
consume ( ) ;
2021-11-26 22:29:05 +00:00
if ( match_identifier_name ( )
& & ( ! match ( TokenType : : Yield ) | | ! m_state . in_generator_function_context )
2021-11-26 22:50:32 +00:00
& & ( ! match ( TokenType : : Async ) | | ! m_state . await_expression_is_valid )
2021-11-26 22:29:05 +00:00
& & ( ! match ( TokenType : : Await ) | | ! m_state . in_class_static_init_block ) )
2021-07-11 11:04:55 +00:00
parameter = consume ( ) . value ( ) ;
2021-07-11 10:45:38 +00:00
else
2021-09-17 23:11:32 +00:00
pattern_parameter = parse_binding_pattern ( AllowDuplicates : : No , AllowMemberExpressions : : No ) ;
2020-03-24 13:03:55 +00:00
consume ( TokenType : : ParenClose ) ;
}
2021-07-11 10:45:38 +00:00
if ( should_expect_parameter & & parameter . is_empty ( ) & & ! pattern_parameter )
2021-07-11 19:00:55 +00:00
expected ( " an identifier or a binding pattern " ) ;
2021-07-11 10:45:38 +00:00
2023-01-09 00:23:00 +00:00
HashTable < DeprecatedFlyString > bound_names ;
2021-07-11 11:04:55 +00:00
2021-09-22 10:44:56 +00:00
if ( pattern_parameter ) {
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( pattern_parameter - > for_each_bound_identifier (
[ & ] ( auto & identifier ) {
check_identifier_name_for_assignment_validity ( identifier . string ( ) ) ;
bound_names . set ( identifier . string ( ) ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-09-22 10:44:56 +00:00
}
if ( ! parameter . is_empty ( ) ) {
2021-07-11 11:04:55 +00:00
check_identifier_name_for_assignment_validity ( parameter ) ;
2021-09-22 10:44:56 +00:00
bound_names . set ( parameter ) ;
}
2021-07-11 11:04:55 +00:00
2021-10-08 10:04:13 +00:00
ScopePusher catch_scope = ScopePusher : : catch_scope ( * this , pattern_parameter , parameter ) ;
2020-03-24 13:03:55 +00:00
auto body = parse_block_statement ( ) ;
2021-09-22 10:44:56 +00:00
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:54:20 +00:00
MUST ( body - > for_each_lexically_declared_identifier ( [ & ] ( auto const & identifier ) {
if ( bound_names . contains ( identifier . string ( ) ) )
syntax_error ( DeprecatedString : : formatted ( " Identifier '{}' already declared as catch parameter " , identifier . string ( ) ) ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-09-22 10:44:56 +00:00
2021-07-11 10:45:38 +00:00
if ( pattern_parameter ) {
return create_ast_node < CatchClause > (
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
2021-07-11 10:45:38 +00:00
pattern_parameter . release_nonnull ( ) ,
move ( body ) ) ;
}
return create_ast_node < CatchClause > (
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
{ m_source_code , rule_start . position ( ) , position ( ) } ,
2021-07-11 10:45:38 +00:00
move ( parameter ) ,
move ( body ) ) ;
2020-03-24 13:03:55 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < IfStatement const > Parser : : parse_if_statement ( )
2020-03-21 17:40:17 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
LibJS: Function declarations in if statement clauses
https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
B.3.4 FunctionDeclarations in IfStatement Statement Clauses
The following augments the IfStatement production in 13.6:
IfStatement[Yield, Await, Return] :
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else Statement[?Yield, ?Await, ?Return]
if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default]
This production only applies when parsing non-strict code. Code matching
this production is processed as if each matching occurrence of
FunctionDeclaration[?Yield, ?Await, ~Default] was the sole
StatementListItem of a BlockStatement occupying that position in the
source code. The semantics of such a synthetic BlockStatement includes
the web legacy compatibility semantics specified in B.3.3.
2020-10-31 13:37:09 +00:00
auto parse_function_declaration_as_block_statement = [ & ] {
// https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
2022-04-30 20:22:52 +00:00
// This production only applies when parsing non-strict code. Source text matched
// by this production is processed as if each matching occurrence of
LibJS: Function declarations in if statement clauses
https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
B.3.4 FunctionDeclarations in IfStatement Statement Clauses
The following augments the IfStatement production in 13.6:
IfStatement[Yield, Await, Return] :
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else Statement[?Yield, ?Await, ?Return]
if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default]
This production only applies when parsing non-strict code. Code matching
this production is processed as if each matching occurrence of
FunctionDeclaration[?Yield, ?Await, ~Default] was the sole
StatementListItem of a BlockStatement occupying that position in the
source code. The semantics of such a synthetic BlockStatement includes
the web legacy compatibility semantics specified in B.3.3.
2020-10-31 13:37:09 +00:00
// FunctionDeclaration[?Yield, ?Await, ~Default] was the sole StatementListItem
2022-04-30 20:22:52 +00:00
// of a BlockStatement occupying that position in the source text.
// The semantics of such a synthetic BlockStatement includes the web legacy
// compatibility semantics specified in B.3.2.
2021-08-28 15:04:37 +00:00
VERIFY ( match ( TokenType : : Function ) ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
auto block = create_ast_node < BlockStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2021-09-22 10:44:56 +00:00
ScopePusher block_scope = ScopePusher : : block_scope ( * this , * block ) ;
2021-08-28 15:04:37 +00:00
auto declaration = parse_declaration ( ) ;
2021-09-22 10:44:56 +00:00
VERIFY ( m_state . current_scope_pusher ) ;
block_scope . add_declaration ( declaration ) ;
2021-08-28 15:04:37 +00:00
VERIFY ( is < FunctionDeclaration > ( * declaration ) ) ;
auto & function_declaration = static_cast < FunctionDeclaration const & > ( * declaration ) ;
if ( function_declaration . kind ( ) = = FunctionKind : : Generator )
syntax_error ( " Generator functions can only be declared in top-level or within a block " ) ;
2021-11-09 18:39:22 +00:00
if ( function_declaration . kind ( ) = = FunctionKind : : Async )
syntax_error ( " Async functions can only be declared in top-level or within a block " ) ;
2021-08-28 15:04:37 +00:00
block - > append ( move ( declaration ) ) ;
LibJS: Function declarations in if statement clauses
https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
B.3.4 FunctionDeclarations in IfStatement Statement Clauses
The following augments the IfStatement production in 13.6:
IfStatement[Yield, Await, Return] :
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else Statement[?Yield, ?Await, ?Return]
if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default]
This production only applies when parsing non-strict code. Code matching
this production is processed as if each matching occurrence of
FunctionDeclaration[?Yield, ?Await, ~Default] was the sole
StatementListItem of a BlockStatement occupying that position in the
source code. The semantics of such a synthetic BlockStatement includes
the web legacy compatibility semantics specified in B.3.3.
2020-10-31 13:37:09 +00:00
return block ;
} ;
2020-03-21 17:40:17 +00:00
consume ( TokenType : : If ) ;
consume ( TokenType : : ParenOpen ) ;
auto predicate = parse_expression ( 0 ) ;
consume ( TokenType : : ParenClose ) ;
LibJS: Function declarations in if statement clauses
https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
B.3.4 FunctionDeclarations in IfStatement Statement Clauses
The following augments the IfStatement production in 13.6:
IfStatement[Yield, Await, Return] :
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else Statement[?Yield, ?Await, ?Return]
if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default]
This production only applies when parsing non-strict code. Code matching
this production is processed as if each matching occurrence of
FunctionDeclaration[?Yield, ?Await, ~Default] was the sole
StatementListItem of a BlockStatement occupying that position in the
source code. The semantics of such a synthetic BlockStatement includes
the web legacy compatibility semantics specified in B.3.3.
2020-10-31 13:37:09 +00:00
2023-02-19 21:07:52 +00:00
RefPtr < Statement const > consequent ;
2021-06-19 12:43:09 +00:00
if ( ! m_state . strict_mode & & match ( TokenType : : Function ) )
LibJS: Function declarations in if statement clauses
https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
B.3.4 FunctionDeclarations in IfStatement Statement Clauses
The following augments the IfStatement production in 13.6:
IfStatement[Yield, Await, Return] :
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else Statement[?Yield, ?Await, ?Return]
if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default]
This production only applies when parsing non-strict code. Code matching
this production is processed as if each matching occurrence of
FunctionDeclaration[?Yield, ?Await, ~Default] was the sole
StatementListItem of a BlockStatement occupying that position in the
source code. The semantics of such a synthetic BlockStatement includes
the web legacy compatibility semantics specified in B.3.3.
2020-10-31 13:37:09 +00:00
consequent = parse_function_declaration_as_block_statement ( ) ;
else
consequent = parse_statement ( ) ;
2023-02-19 21:07:52 +00:00
RefPtr < Statement const > alternate ;
2020-03-23 15:46:41 +00:00
if ( match ( TokenType : : Else ) ) {
LibJS: Function declarations in if statement clauses
https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
B.3.4 FunctionDeclarations in IfStatement Statement Clauses
The following augments the IfStatement production in 13.6:
IfStatement[Yield, Await, Return] :
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else Statement[?Yield, ?Await, ?Return]
if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default]
This production only applies when parsing non-strict code. Code matching
this production is processed as if each matching occurrence of
FunctionDeclaration[?Yield, ?Await, ~Default] was the sole
StatementListItem of a BlockStatement occupying that position in the
source code. The semantics of such a synthetic BlockStatement includes
the web legacy compatibility semantics specified in B.3.3.
2020-10-31 13:37:09 +00:00
consume ( ) ;
2021-06-19 12:43:09 +00:00
if ( ! m_state . strict_mode & & match ( TokenType : : Function ) )
LibJS: Function declarations in if statement clauses
https://tc39.es/ecma262/#sec-functiondeclarations-in-ifstatement-statement-clauses
B.3.4 FunctionDeclarations in IfStatement Statement Clauses
The following augments the IfStatement production in 13.6:
IfStatement[Yield, Await, Return] :
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else Statement[?Yield, ?Await, ?Return]
if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default] else FunctionDeclaration[?Yield, ?Await, ~Default]
if ( Expression[+In, ?Yield, ?Await] ) FunctionDeclaration[?Yield, ?Await, ~Default]
This production only applies when parsing non-strict code. Code matching
this production is processed as if each matching occurrence of
FunctionDeclaration[?Yield, ?Await, ~Default] was the sole
StatementListItem of a BlockStatement occupying that position in the
source code. The semantics of such a synthetic BlockStatement includes
the web legacy compatibility semantics specified in B.3.3.
2020-10-31 13:37:09 +00:00
alternate = parse_function_declaration_as_block_statement ( ) ;
else
alternate = parse_statement ( ) ;
2020-03-23 15:46:41 +00:00
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < IfStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( predicate ) , move ( * consequent ) , move ( alternate ) ) ;
2020-03-21 17:40:17 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Statement const > Parser : : parse_for_statement ( )
2020-03-12 12:12:12 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-11-23 15:09:28 +00:00
auto is_await_loop = IsForAwaitLoop : : No ;
2023-07-04 22:14:41 +00:00
auto loop_scope_node = create_ast_node < BlockStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
ScopePusher for_loop_scope = ScopePusher : : for_loop_scope ( * this , * loop_scope_node ) ;
2021-11-14 23:47:16 +00:00
auto match_of = [ & ] ( Token const & token ) {
return token . type ( ) = = TokenType : : Identifier & & token . original_value ( ) = = " of " sv ;
} ;
2021-11-23 15:09:28 +00:00
auto match_for_in_of = [ & ] ( ) {
bool is_of = match_of ( m_state . current_token ) ;
if ( is_await_loop = = IsForAwaitLoop : : Yes ) {
if ( ! is_of )
syntax_error ( " for await loop is only valid with 'of' " ) ;
2021-11-26 22:50:32 +00:00
else if ( ! m_state . await_expression_is_valid )
2021-11-23 15:09:28 +00:00
syntax_error ( " for await loop is only valid in async function or generator " ) ;
return true ;
}
return match ( TokenType : : In ) | | is_of ;
2020-04-21 18:21:26 +00:00
} ;
2020-03-12 12:12:12 +00:00
consume ( TokenType : : For ) ;
2021-11-23 15:09:28 +00:00
if ( match ( TokenType : : Await ) ) {
consume ( ) ;
2021-11-26 22:50:32 +00:00
if ( ! m_state . await_expression_is_valid )
2021-11-23 15:09:28 +00:00
syntax_error ( " for-await-of is only allowed in async function context " ) ;
is_await_loop = IsForAwaitLoop : : Yes ;
}
2020-03-12 12:12:12 +00:00
consume ( TokenType : : ParenOpen ) ;
2021-09-16 17:39:28 +00:00
Optional < ScopePusher > scope_pusher ;
2021-07-05 19:31:51 +00:00
2023-02-19 21:07:52 +00:00
RefPtr < ASTNode const > init ;
2020-04-21 18:21:26 +00:00
if ( ! match ( TokenType : : Semicolon ) ) {
2022-12-23 00:45:29 +00:00
auto match_for_using_declaration = [ & ] {
if ( ! match ( TokenType : : Identifier ) | | m_state . current_token . original_value ( ) ! = " using " sv )
return false ;
auto lookahead = next_token ( ) ;
if ( lookahead . trivia_contains_line_terminator ( ) )
return false ;
if ( lookahead . original_value ( ) = = " of " sv )
return false ;
return token_is_identifier ( lookahead ) ;
} ;
if ( match_for_using_declaration ( ) ) {
auto declaration = parse_using_declaration ( IsForLoopVariableDeclaration : : Yes ) ;
if ( match_of ( m_state . current_token ) ) {
if ( declaration - > declarations ( ) . size ( ) ! = 1 )
syntax_error ( " Must have exactly one declaration in for using of " ) ;
2023-03-06 13:17:01 +00:00
else if ( declaration - > declarations ( ) . first ( ) - > init ( ) )
2022-12-23 00:45:29 +00:00
syntax_error ( " Using declaration cannot have initializer " ) ;
return parse_for_in_of_statement ( move ( declaration ) , is_await_loop ) ;
}
if ( match ( TokenType : : In ) )
syntax_error ( " Using declaration not allowed in for-in loop " ) ;
init = move ( declaration ) ;
} else if ( match_variable_declaration ( ) ) {
2022-12-20 21:09:57 +00:00
auto declaration = parse_variable_declaration ( IsForLoopVariableDeclaration : : Yes ) ;
2023-07-04 22:14:41 +00:00
m_state . current_scope_pusher - > add_declaration ( declaration ) ;
2022-12-23 00:45:29 +00:00
if ( match_for_in_of ( ) ) {
if ( declaration - > declarations ( ) . size ( ) > 1 )
syntax_error ( " Multiple declarations not allowed in for..in/of " ) ;
else if ( declaration - > declarations ( ) . size ( ) < 1 )
syntax_error ( " Need exactly one variable declaration in for..in/of " ) ;
return parse_for_in_of_statement ( move ( declaration ) , is_await_loop ) ;
}
if ( declaration - > declaration_kind ( ) = = DeclarationKind : : Const ) {
for ( auto const & variable : declaration - > declarations ( ) ) {
2023-03-06 13:17:01 +00:00
if ( ! variable - > init ( ) )
2020-10-30 19:08:45 +00:00
syntax_error ( " Missing initializer in 'const' variable declaration " ) ;
}
}
2022-12-23 00:45:29 +00:00
init = move ( declaration ) ;
2021-07-11 11:04:55 +00:00
} else if ( match_expression ( ) ) {
2021-11-14 23:47:16 +00:00
auto lookahead_token = next_token ( ) ;
bool starts_with_async_of = match ( TokenType : : Async ) & & match_of ( lookahead_token ) ;
2021-07-11 11:04:55 +00:00
init = parse_expression ( 0 , Associativity : : Right , { TokenType : : In } ) ;
2021-11-14 23:47:16 +00:00
if ( match_for_in_of ( ) ) {
2021-11-23 15:09:28 +00:00
if ( is_await_loop ! = IsForAwaitLoop : : Yes
& & starts_with_async_of & & match_of ( m_state . current_token ) )
2021-11-14 23:47:16 +00:00
syntax_error ( " for-of loop may not start with async of " ) ;
2021-11-23 15:09:28 +00:00
return parse_for_in_of_statement ( * init , is_await_loop ) ;
2021-11-14 23:47:16 +00:00
}
2020-04-17 13:05:58 +00:00
} else {
2020-06-01 14:08:34 +00:00
syntax_error ( " Unexpected token in for loop " ) ;
2020-04-17 13:05:58 +00:00
}
2020-03-12 12:12:12 +00:00
}
2020-04-21 18:21:26 +00:00
consume ( TokenType : : Semicolon ) ;
2020-03-12 12:12:12 +00:00
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > test ;
2020-04-21 18:21:26 +00:00
if ( ! match ( TokenType : : Semicolon ) )
2020-03-12 22:02:41 +00:00
test = parse_expression ( 0 ) ;
2020-03-12 12:12:12 +00:00
consume ( TokenType : : Semicolon ) ;
2023-02-19 21:07:52 +00:00
RefPtr < Expression const > update ;
2020-04-21 18:21:26 +00:00
if ( ! match ( TokenType : : ParenClose ) )
2020-03-12 22:02:41 +00:00
update = parse_expression ( 0 ) ;
2020-03-12 12:12:12 +00:00
consume ( TokenType : : ParenClose ) ;
2021-06-19 12:43:09 +00:00
TemporaryChange break_change ( m_state . in_break_context , true ) ;
TemporaryChange continue_change ( m_state . in_continue_context , true ) ;
2023-07-04 22:14:41 +00:00
2020-04-04 19:09:06 +00:00
auto body = parse_statement ( ) ;
2020-03-12 12:12:12 +00:00
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ForStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( init ) , move ( test ) , move ( update ) , move ( body ) ) ;
2020-03-12 12:12:12 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < Statement const > Parser : : parse_for_in_of_statement ( NonnullRefPtr < ASTNode const > lhs , IsForAwaitLoop is_for_await_loop )
2020-04-21 18:21:26 +00:00
{
2023-02-19 21:07:52 +00:00
Variant < NonnullRefPtr < ASTNode const > , NonnullRefPtr < BindingPattern const > > for_declaration = lhs ;
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2021-09-22 10:44:56 +00:00
auto has_annexB_for_in_init_extension = false ;
2021-01-01 18:34:07 +00:00
if ( is < VariableDeclaration > ( * lhs ) ) {
2023-02-19 21:07:52 +00:00
auto & declaration = static_cast < VariableDeclaration const & > ( * lhs ) ;
2022-12-23 00:45:29 +00:00
// Syntax errors for wrong amounts of declaration should have already been hit.
if ( ! declaration . declarations ( ) . is_empty ( ) ) {
2021-09-22 10:44:56 +00:00
// AnnexB extension B.3.5 Initializers in ForIn Statement Heads, https://tc39.es/ecma262/#sec-initializers-in-forin-statement-heads
auto & variable = declaration . declarations ( ) . first ( ) ;
2023-03-06 13:17:01 +00:00
if ( variable - > init ( ) ) {
if ( m_state . strict_mode | | declaration . declaration_kind ( ) ! = DeclarationKind : : Var | | ! variable - > target ( ) . has < NonnullRefPtr < Identifier const > > ( ) )
2021-09-22 10:44:56 +00:00
syntax_error ( " Variable initializer not allowed in for..in/of " ) ;
else
has_annexB_for_in_init_extension = true ;
}
}
2022-12-23 00:45:29 +00:00
} else if ( ! lhs - > is_identifier ( ) & & ! is < MemberExpression > ( * lhs ) & & ! is < CallExpression > ( * lhs ) & & ! is < UsingDeclaration > ( * lhs ) ) {
2021-09-22 10:44:56 +00:00
bool valid = false ;
if ( is < ObjectExpression > ( * lhs ) | | is < ArrayExpression > ( * lhs ) ) {
auto synthesized_binding_pattern = synthesize_binding_pattern ( static_cast < Expression const & > ( * lhs ) ) ;
if ( synthesized_binding_pattern ) {
for_declaration = synthesized_binding_pattern . release_nonnull ( ) ;
valid = true ;
}
}
if ( ! valid )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Invalid left-hand side in for-loop ('{}') " , lhs - > class_name ( ) ) ) ;
2020-04-21 18:21:26 +00:00
}
auto in_or_of = consume ( ) ;
2021-09-22 10:44:56 +00:00
auto is_in = in_or_of . type ( ) = = TokenType : : In ;
2021-07-28 21:09:57 +00:00
2021-09-22 10:44:56 +00:00
if ( ! is_in ) {
2021-07-28 21:09:57 +00:00
if ( is < MemberExpression > ( * lhs ) ) {
auto & member = static_cast < MemberExpression const & > ( * lhs ) ;
if ( member . object ( ) . is_identifier ( ) & & static_cast < Identifier const & > ( member . object ( ) ) . string ( ) = = " let " sv )
syntax_error ( " For of statement may not start with let. " ) ;
}
2021-09-22 10:44:56 +00:00
if ( has_annexB_for_in_init_extension )
syntax_error ( " Variable initializer not allowed in for..of " , rule_start . position ( ) ) ;
2021-07-28 21:09:57 +00:00
}
2021-09-22 10:44:56 +00:00
auto rhs = parse_expression ( is_in ? 0 : 2 ) ;
2020-04-21 18:21:26 +00:00
consume ( TokenType : : ParenClose ) ;
2020-10-07 19:21:15 +00:00
2021-06-19 12:43:09 +00:00
TemporaryChange break_change ( m_state . in_break_context , true ) ;
TemporaryChange continue_change ( m_state . in_continue_context , true ) ;
2023-07-04 22:14:41 +00:00
2020-04-21 18:21:26 +00:00
auto body = parse_statement ( ) ;
2021-09-22 10:44:56 +00:00
if ( is_in )
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ForInStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( for_declaration ) , move ( rhs ) , move ( body ) ) ;
2021-11-23 15:09:28 +00:00
if ( is_for_await_loop = = IsForAwaitLoop : : Yes )
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ForAwaitOfStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( for_declaration ) , move ( rhs ) , move ( body ) ) ;
return create_ast_node < ForOfStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( for_declaration ) , move ( rhs ) , move ( body ) ) ;
2020-04-21 18:21:26 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < DebuggerStatement const > Parser : : parse_debugger_statement ( )
2020-04-30 16:26:27 +00:00
{
2020-12-28 17:15:22 +00:00
auto rule_start = push_start ( ) ;
2020-04-30 16:26:27 +00:00
consume ( TokenType : : Debugger ) ;
consume_or_insert_semicolon ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < DebuggerStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } ) ;
2020-04-30 16:26:27 +00:00
}
2020-03-11 18:27:43 +00:00
bool Parser : : match ( TokenType type ) const
{
2021-06-19 12:43:09 +00:00
return m_state . current_token . type ( ) = = type ;
2020-03-11 18:27:43 +00:00
}
bool Parser : : match_expression ( ) const
{
2021-06-19 12:43:09 +00:00
auto type = m_state . current_token . type ( ) ;
2021-11-26 22:45:10 +00:00
if ( type = = TokenType : : Import ) {
auto lookahead_token = next_token ( ) ;
return lookahead_token . type ( ) = = TokenType : : Period | | lookahead_token . type ( ) = = TokenType : : ParenOpen ;
}
2020-03-11 18:27:43 +00:00
return type = = TokenType : : BoolLiteral
| | type = = TokenType : : NumericLiteral
2020-06-06 00:14:10 +00:00
| | type = = TokenType : : BigIntLiteral
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : StringLiteral
LibJS: Add template literals
Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.
When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.
When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.
The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).
TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):
> `foo
> bar`
'foo
bar'
2020-05-03 22:41:14 +00:00
| | type = = TokenType : : TemplateLiteralStart
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : NullLiteral
2021-07-11 11:04:55 +00:00
| | match_identifier ( )
2021-11-26 23:16:01 +00:00
| | type = = TokenType : : PrivateIdentifier
2021-09-18 21:02:50 +00:00
| | type = = TokenType : : Await
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : New
2021-08-28 15:04:37 +00:00
| | type = = TokenType : : Class
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : CurlyOpen
| | type = = TokenType : : BracketOpen
2020-03-14 18:45:51 +00:00
| | type = = TokenType : : ParenOpen
2020-03-19 10:52:56 +00:00
| | type = = TokenType : : Function
2021-11-09 18:39:22 +00:00
| | type = = TokenType : : Async
2020-04-12 22:42:14 +00:00
| | type = = TokenType : : This
2020-06-08 18:31:21 +00:00
| | type = = TokenType : : Super
2020-06-03 23:05:49 +00:00
| | type = = TokenType : : RegexLiteral
2021-07-29 21:28:28 +00:00
| | type = = TokenType : : Slash // Wrongly recognized regex by lexer
| | type = = TokenType : : SlashEquals // Wrongly recognized regex by lexer (/=a/ is a valid regex)
2021-06-10 21:08:30 +00:00
| | type = = TokenType : : Yield
2020-03-14 18:45:51 +00:00
| | match_unary_prefixed_expression ( ) ;
}
bool Parser : : match_unary_prefixed_expression ( ) const
{
2021-06-19 12:43:09 +00:00
auto type = m_state . current_token . type ( ) ;
2020-03-14 18:45:51 +00:00
return type = = TokenType : : PlusPlus
| | type = = TokenType : : MinusMinus
| | type = = TokenType : : ExclamationMark
2020-03-17 19:33:32 +00:00
| | type = = TokenType : : Tilde
2020-04-02 16:58:39 +00:00
| | type = = TokenType : : Plus
| | type = = TokenType : : Minus
2020-04-15 16:55:03 +00:00
| | type = = TokenType : : Typeof
2020-04-26 11:53:40 +00:00
| | type = = TokenType : : Void
| | type = = TokenType : : Delete ;
2020-03-11 18:27:43 +00:00
}
2022-02-16 06:34:59 +00:00
bool Parser : : match_secondary_expression ( ForbiddenTokens forbidden ) const
2020-03-11 18:27:43 +00:00
{
2021-06-19 12:43:09 +00:00
auto type = m_state . current_token . type ( ) ;
2022-02-16 06:34:59 +00:00
if ( ! forbidden . allows ( type ) )
2020-04-21 18:21:26 +00:00
return false ;
2020-03-11 18:27:43 +00:00
return type = = TokenType : : Plus
2020-03-12 12:09:15 +00:00
| | type = = TokenType : : PlusEquals
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : Minus
2020-03-12 12:09:15 +00:00
| | type = = TokenType : : MinusEquals
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : Asterisk
2020-03-12 12:09:15 +00:00
| | type = = TokenType : : AsteriskEquals
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : Slash
2020-03-12 12:09:15 +00:00
| | type = = TokenType : : SlashEquals
2020-04-04 19:17:34 +00:00
| | type = = TokenType : : Percent
2020-05-04 22:07:05 +00:00
| | type = = TokenType : : PercentEquals
2020-04-05 12:40:00 +00:00
| | type = = TokenType : : DoubleAsterisk
2020-05-04 22:03:35 +00:00
| | type = = TokenType : : DoubleAsteriskEquals
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : Equals
2020-03-12 12:11:33 +00:00
| | type = = TokenType : : EqualsEqualsEquals
| | type = = TokenType : : ExclamationMarkEqualsEquals
2020-03-15 22:23:38 +00:00
| | type = = TokenType : : EqualsEquals
| | type = = TokenType : : ExclamationMarkEquals
2020-03-12 12:10:27 +00:00
| | type = = TokenType : : GreaterThan
2020-03-12 12:07:08 +00:00
| | type = = TokenType : : GreaterThanEquals
2020-03-12 12:10:27 +00:00
| | type = = TokenType : : LessThan
2020-03-12 12:07:08 +00:00
| | type = = TokenType : : LessThanEquals
2020-03-12 12:05:57 +00:00
| | type = = TokenType : : ParenOpen
2020-03-12 11:45:45 +00:00
| | type = = TokenType : : Period
2020-03-20 19:51:03 +00:00
| | type = = TokenType : : BracketOpen
2021-08-14 15:02:15 +00:00
| | ( type = = TokenType : : PlusPlus & & ! m_state . current_token . trivia_contains_line_terminator ( ) )
| | ( type = = TokenType : : MinusMinus & & ! m_state . current_token . trivia_contains_line_terminator ( ) )
2020-04-23 15:06:01 +00:00
| | type = = TokenType : : In
2020-04-03 10:14:28 +00:00
| | type = = TokenType : : Instanceof
2020-04-03 12:02:31 +00:00
| | type = = TokenType : : QuestionMark
| | type = = TokenType : : Ampersand
2020-05-04 21:34:45 +00:00
| | type = = TokenType : : AmpersandEquals
2020-04-03 12:02:31 +00:00
| | type = = TokenType : : Pipe
2020-05-04 21:34:45 +00:00
| | type = = TokenType : : PipeEquals
2020-04-03 13:33:28 +00:00
| | type = = TokenType : : Caret
2020-05-04 21:34:45 +00:00
| | type = = TokenType : : CaretEquals
2020-04-23 12:36:14 +00:00
| | type = = TokenType : : ShiftLeft
| | type = = TokenType : : ShiftLeftEquals
2020-04-23 12:45:19 +00:00
| | type = = TokenType : : ShiftRight
| | type = = TokenType : : ShiftRightEquals
2020-04-23 14:43:10 +00:00
| | type = = TokenType : : UnsignedShiftRight
| | type = = TokenType : : UnsignedShiftRightEquals
2020-04-03 13:33:28 +00:00
| | type = = TokenType : : DoubleAmpersand
2020-10-05 15:49:43 +00:00
| | type = = TokenType : : DoubleAmpersandEquals
2020-04-17 23:49:11 +00:00
| | type = = TokenType : : DoublePipe
2020-10-05 15:49:43 +00:00
| | type = = TokenType : : DoublePipeEquals
| | type = = TokenType : : DoubleQuestionMark
2021-09-14 02:26:31 +00:00
| | type = = TokenType : : DoubleQuestionMarkEquals
| | type = = TokenType : : QuestionMarkPeriod ;
2020-03-11 18:27:43 +00:00
}
bool Parser : : match_statement ( ) const
{
2021-06-19 12:43:09 +00:00
auto type = m_state . current_token . type ( ) ;
2020-03-11 18:27:43 +00:00
return match_expression ( )
| | type = = TokenType : : Return
2021-06-10 21:08:30 +00:00
| | type = = TokenType : : Yield
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : Do
| | type = = TokenType : : If
2020-03-24 21:03:50 +00:00
| | type = = TokenType : : Throw
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : Try
| | type = = TokenType : : While
2020-11-28 14:05:57 +00:00
| | type = = TokenType : : With
2020-03-12 12:12:12 +00:00
| | type = = TokenType : : For
2020-03-11 18:27:43 +00:00
| | type = = TokenType : : CurlyOpen
2020-03-29 11:09:54 +00:00
| | type = = TokenType : : Switch
| | type = = TokenType : : Break
2020-04-04 22:22:42 +00:00
| | type = = TokenType : : Continue
2020-04-30 16:26:27 +00:00
| | type = = TokenType : : Var
2020-05-03 09:59:00 +00:00
| | type = = TokenType : : Debugger
| | type = = TokenType : : Semicolon ;
2020-03-11 18:27:43 +00:00
}
2021-08-14 15:42:30 +00:00
bool Parser : : match_export_or_import ( ) const
{
auto type = m_state . current_token . type ( ) ;
return type = = TokenType : : Export
| | type = = TokenType : : Import ;
}
2022-12-20 21:09:57 +00:00
bool Parser : : match_declaration ( AllowUsingDeclaration allow_using ) const
2020-10-22 22:30:07 +00:00
{
2021-06-19 12:43:09 +00:00
auto type = m_state . current_token . type ( ) ;
2021-07-29 00:03:16 +00:00
if ( type = = TokenType : : Let & & ! m_state . strict_mode ) {
return try_match_let_declaration ( ) ;
}
2021-11-30 14:52:51 +00:00
if ( type = = TokenType : : Async ) {
auto lookahead_token = next_token ( ) ;
return lookahead_token . type ( ) = = TokenType : : Function & & ! lookahead_token . trivia_contains_line_terminator ( ) ;
}
2022-12-20 21:09:57 +00:00
if ( allow_using = = AllowUsingDeclaration : : Yes & & type = = TokenType : : Identifier & & m_state . current_token . original_value ( ) = = " using " sv )
return try_match_using_declaration ( ) ;
2020-10-22 22:30:07 +00:00
return type = = TokenType : : Function
| | type = = TokenType : : Class
| | type = = TokenType : : Const
2021-11-30 14:52:51 +00:00
| | type = = TokenType : : Let ;
2020-10-22 22:30:07 +00:00
}
2022-01-16 22:51:28 +00:00
Token Parser : : next_token ( size_t steps ) const
2021-07-29 00:03:16 +00:00
{
2021-10-07 22:38:24 +00:00
Lexer lookahead_lexer = m_state . lexer ;
2021-07-29 00:03:16 +00:00
2022-01-16 22:51:28 +00:00
Token lookahead_token ;
2021-08-28 15:04:37 +00:00
2022-01-16 22:51:28 +00:00
while ( steps > 0 ) {
lookahead_token = lookahead_lexer . next ( ) ;
steps - - ;
}
return lookahead_token ;
2021-08-28 15:04:37 +00:00
}
2021-07-29 00:03:16 +00:00
2021-10-07 22:38:24 +00:00
bool Parser : : try_match_let_declaration ( ) const
2021-08-28 15:04:37 +00:00
{
VERIFY ( m_state . current_token . type ( ) = = TokenType : : Let ) ;
auto token_after = next_token ( ) ;
2021-07-29 00:03:16 +00:00
2021-08-28 15:04:37 +00:00
if ( token_after . is_identifier_name ( ) & & token_after . value ( ) ! = " in " sv )
2021-07-29 00:03:16 +00:00
return true ;
2021-08-28 15:04:37 +00:00
if ( token_after . type ( ) = = TokenType : : CurlyOpen | | token_after . type ( ) = = TokenType : : BracketOpen )
2021-07-29 00:03:16 +00:00
return true ;
return false ;
}
2022-12-20 21:09:57 +00:00
bool Parser : : try_match_using_declaration ( ) const
{
VERIFY ( m_state . current_token . type ( ) = = TokenType : : Identifier ) ;
VERIFY ( m_state . current_token . original_value ( ) = = " using " sv ) ;
auto token_after = next_token ( ) ;
if ( token_after . trivia_contains_line_terminator ( ) )
return false ;
return token_after . is_identifier_name ( ) ;
}
2021-10-07 22:38:24 +00:00
bool Parser : : match_variable_declaration ( ) const
2020-10-22 22:30:07 +00:00
{
2021-06-19 12:43:09 +00:00
auto type = m_state . current_token . type ( ) ;
2021-07-29 00:03:16 +00:00
if ( type = = TokenType : : Let & & ! m_state . strict_mode ) {
return try_match_let_declaration ( ) ;
}
2020-10-22 22:30:07 +00:00
return type = = TokenType : : Var
| | type = = TokenType : : Let
| | type = = TokenType : : Const ;
}
2021-07-11 11:04:55 +00:00
bool Parser : : match_identifier ( ) const
{
2022-12-23 00:45:29 +00:00
return token_is_identifier ( m_state . current_token ) ;
}
bool Parser : : token_is_identifier ( Token const & token ) const
{
if ( token . type ( ) = = TokenType : : EscapedKeyword ) {
if ( token . value ( ) = = " let " sv )
2021-08-21 09:27:20 +00:00
return ! m_state . strict_mode ;
2022-12-23 00:45:29 +00:00
if ( token . value ( ) = = " yield " sv )
2021-08-21 09:27:20 +00:00
return ! m_state . strict_mode & & ! m_state . in_generator_function_context ;
2022-12-23 00:45:29 +00:00
if ( token . value ( ) = = " await " sv )
2021-11-26 22:50:32 +00:00
return m_program_type ! = Program : : Type : : Module & & ! m_state . await_expression_is_valid & & ! m_state . in_class_static_init_block ;
2021-08-21 09:27:20 +00:00
return true ;
}
2022-12-23 00:45:29 +00:00
return token . type ( ) = = TokenType : : Identifier
| | token . type ( ) = = TokenType : : Async
| | ( token . type ( ) = = TokenType : : Let & & ! m_state . strict_mode )
| | ( token . type ( ) = = TokenType : : Await & & m_program_type ! = Program : : Type : : Module & & ! m_state . await_expression_is_valid & & ! m_state . in_class_static_init_block )
| | ( token . type ( ) = = TokenType : : Yield & & ! m_state . in_generator_function_context & & ! m_state . strict_mode ) ; // See note in Parser::parse_identifier().
2021-07-11 11:04:55 +00:00
}
2020-04-18 18:31:27 +00:00
bool Parser : : match_identifier_name ( ) const
{
2021-06-19 12:43:09 +00:00
return m_state . current_token . is_identifier_name ( ) ;
2020-04-18 18:31:27 +00:00
}
2020-06-08 18:31:21 +00:00
bool Parser : : match_property_key ( ) const
{
2021-06-19 12:43:09 +00:00
auto type = m_state . current_token . type ( ) ;
2020-06-08 18:31:21 +00:00
return match_identifier_name ( )
| | type = = TokenType : : BracketOpen
| | type = = TokenType : : StringLiteral
| | type = = TokenType : : NumericLiteral
| | type = = TokenType : : BigIntLiteral ;
}
2020-03-11 18:27:43 +00:00
bool Parser : : done ( ) const
{
return match ( TokenType : : Eof ) ;
}
Token Parser : : consume ( )
2023-05-27 22:08:52 +00:00
{
auto old_token = m_state . current_token ;
m_state . current_token = m_state . lexer . next ( ) ;
// If an IdentifierName is not parsed as an Identifier a slash after it should not be a division
if ( old_token . is_identifier_name ( ) & & ( m_state . current_token . type ( ) = = TokenType : : Slash | | m_state . current_token . type ( ) = = TokenType : : SlashEquals ) ) {
m_state . current_token = m_state . lexer . force_slash_as_regex ( ) ;
}
return old_token ;
}
Token Parser : : consume_and_allow_division ( )
2020-03-11 18:27:43 +00:00
{
2021-06-19 12:43:09 +00:00
auto old_token = m_state . current_token ;
m_state . current_token = m_state . lexer . next ( ) ;
LibJS: Add an optimization to avoid needless arguments object creation
This gives FunctionNode a "might need arguments object" boolean flag and
sets it based on the simplest possible heuristic for this: if we
encounter an identifier called "arguments" or "eval" up to the next
(nested) function declaration or expression, we won't need an arguments
object. Otherwise, we *might* need one - the final decision is made in
the FunctionDeclarationInstantiation AO.
Now, this is obviously not perfect. Even if you avoid eval, something
like `foo.arguments` will still trigger a false positive - but it's a
start and already massively cuts down on needlessly allocated objects,
especially in real-world code that is often minified, and so a full
"arguments" identifier will be an actual arguments object more often
than not.
To illustrate the actual impact of this change, here's the number of
allocated arguments objects during a full test-js run:
Before:
- Unmapped arguments objects: 78765
- Mapped arguments objects: 2455
After:
- Unmapped arguments objects: 18
- Mapped arguments objects: 37
This results in a ~5% speedup of test-js on my Linux host machine, and
about 3.5% on i686 Serenity in QEMU (warm runs, average of 5).
The following microbenchmark (calling an empty function 1M times) runs
25% faster on Linux and 45% on Serenity:
function foo() {}
for (var i = 0; i < 1_000_000; ++i)
foo();
test262 reports no changes in either direction, apart from a speedup :^)
2021-10-05 07:44:58 +00:00
// NOTE: This is the bare minimum needed to decide whether we might need an arguments object
// in a function expression or declaration. ("might" because the AST implements some further
// conditions from the spec that rule out the need for allocating one)
if ( old_token . type ( ) = = TokenType : : Identifier & & old_token . value ( ) . is_one_of ( " arguments " sv , " eval " sv ) )
m_state . function_might_need_arguments_object = true ;
2020-03-12 22:02:41 +00:00
return old_token ;
2020-03-11 18:27:43 +00:00
}
2020-04-17 13:05:58 +00:00
void Parser : : consume_or_insert_semicolon ( )
2020-03-11 18:27:43 +00:00
{
2020-04-17 13:05:58 +00:00
// Semicolon was found and will be consumed
if ( match ( TokenType : : Semicolon ) ) {
consume ( ) ;
return ;
}
// Insert semicolon if...
2020-10-02 13:59:28 +00:00
// ...token is preceded by one or more newlines
2021-06-19 12:43:09 +00:00
if ( m_state . current_token . trivia_contains_line_terminator ( ) )
2020-04-17 13:05:58 +00:00
return ;
2020-04-17 13:27:51 +00:00
// ...token is a closing curly brace
if ( match ( TokenType : : CurlyClose ) )
2020-04-17 13:05:58 +00:00
return ;
// ...token is eof
if ( match ( TokenType : : Eof ) )
return ;
// No rule for semicolon insertion applies -> syntax error
expected ( " Semicolon " ) ;
}
2021-07-11 11:04:55 +00:00
Token Parser : : consume_identifier ( )
{
if ( match ( TokenType : : Identifier ) )
return consume ( TokenType : : Identifier ) ;
2021-08-21 09:27:20 +00:00
if ( match ( TokenType : : EscapedKeyword ) )
return consume ( TokenType : : EscapedKeyword ) ;
2021-07-11 11:04:55 +00:00
// Note that 'let' is not a reserved keyword, but our lexer considers it such
// As it's pretty nice to have that (for syntax highlighting and such), we'll
// special-case it here instead.
if ( match ( TokenType : : Let ) ) {
if ( m_state . strict_mode )
syntax_error ( " 'let' is not allowed as an identifier in strict mode " ) ;
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-07-11 11:04:55 +00:00
}
2021-07-28 23:45:55 +00:00
if ( match ( TokenType : : Yield ) ) {
if ( m_state . strict_mode | | m_state . in_generator_function_context )
syntax_error ( " Identifier must not be a reserved word in strict mode ('yield') " ) ;
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-07-28 23:45:55 +00:00
}
2021-11-09 18:39:22 +00:00
if ( match ( TokenType : : Await ) ) {
2021-11-26 22:50:32 +00:00
if ( m_program_type = = Program : : Type : : Module | | m_state . await_expression_is_valid | | m_state . in_class_static_init_block )
2021-11-09 18:39:22 +00:00
syntax_error ( " Identifier must not be a reserved word in modules ('await') " ) ;
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-11-09 18:39:22 +00:00
}
if ( match ( TokenType : : Async ) )
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-11-09 18:39:22 +00:00
2021-07-11 11:04:55 +00:00
expected ( " Identifier " ) ;
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-07-11 11:04:55 +00:00
}
// https://tc39.es/ecma262/#prod-IdentifierReference
Token Parser : : consume_identifier_reference ( )
{
if ( match ( TokenType : : Identifier ) )
return consume ( TokenType : : Identifier ) ;
2021-08-21 09:27:20 +00:00
if ( match ( TokenType : : EscapedKeyword ) ) {
auto name = m_state . current_token . value ( ) ;
2021-09-18 21:02:50 +00:00
if ( m_state . strict_mode & & ( name = = " let " sv | | name = = " yield " sv ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " '{}' is not allowed as an identifier in strict mode " , name ) ) ;
2021-09-18 21:02:50 +00:00
if ( m_program_type = = Program : : Type : : Module & & name = = " await " sv )
syntax_error ( " 'await' is not allowed as an identifier in module " ) ;
2021-08-21 09:27:20 +00:00
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-08-21 09:27:20 +00:00
}
2021-07-11 11:04:55 +00:00
// See note in Parser::parse_identifier().
if ( match ( TokenType : : Let ) ) {
if ( m_state . strict_mode )
syntax_error ( " 'let' is not allowed as an identifier in strict mode " ) ;
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-07-11 11:04:55 +00:00
}
if ( match ( TokenType : : Yield ) ) {
if ( m_state . strict_mode )
syntax_error ( " Identifier reference may not be 'yield' in strict mode " ) ;
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-07-11 11:04:55 +00:00
}
if ( match ( TokenType : : Await ) ) {
2021-09-18 21:02:50 +00:00
if ( m_program_type = = Program : : Type : : Module )
syntax_error ( " 'await' is not allowed as an identifier in module " ) ;
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-07-11 11:04:55 +00:00
}
2021-11-09 18:39:22 +00:00
if ( match ( TokenType : : Async ) )
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-11-09 18:39:22 +00:00
2021-07-11 11:04:55 +00:00
expected ( Token : : name ( TokenType : : Identifier ) ) ;
2023-05-27 22:08:52 +00:00
return consume_and_allow_division ( ) ;
2021-07-11 11:04:55 +00:00
}
2020-04-17 13:05:58 +00:00
Token Parser : : consume ( TokenType expected_type )
{
2020-04-21 18:21:26 +00:00
if ( ! match ( expected_type ) ) {
2020-04-17 13:05:58 +00:00
expected ( Token : : name ( expected_type ) ) ;
2020-03-11 18:27:43 +00:00
}
2023-05-27 22:08:52 +00:00
auto token = expected_type = = TokenType : : Identifier ? consume_and_allow_division ( ) : consume ( ) ;
2021-06-16 22:57:01 +00:00
if ( expected_type = = TokenType : : Identifier ) {
2021-07-11 23:27:35 +00:00
if ( m_state . strict_mode & & is_strict_reserved_word ( token . value ( ) ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Identifier must not be a reserved word in strict mode ('{}') " , token . value ( ) ) ) ;
2021-06-16 22:57:01 +00:00
}
return token ;
2020-03-11 18:27:43 +00:00
}
2020-10-19 17:01:28 +00:00
Token Parser : : consume_and_validate_numeric_literal ( )
{
2021-11-10 23:55:02 +00:00
auto is_unprefixed_octal_number = [ ] ( StringView value ) {
2021-06-13 08:47:09 +00:00
return value . length ( ) > 1 & & value [ 0 ] = = ' 0 ' & & is_ascii_digit ( value [ 1 ] ) ;
2020-10-19 17:01:28 +00:00
} ;
2020-11-02 21:03:19 +00:00
auto literal_start = position ( ) ;
2020-10-19 17:01:28 +00:00
auto token = consume ( TokenType : : NumericLiteral ) ;
2021-06-19 12:43:09 +00:00
if ( m_state . strict_mode & & is_unprefixed_octal_number ( token . value ( ) ) )
2020-11-02 21:03:19 +00:00
syntax_error ( " Unprefixed octal number not allowed in strict mode " , literal_start ) ;
2021-06-19 12:43:09 +00:00
if ( match_identifier_name ( ) & & m_state . current_token . trivia ( ) . is_empty ( ) )
2020-10-22 21:18:31 +00:00
syntax_error ( " Numeric literal must not be immediately followed by identifier " ) ;
2020-10-19 17:01:28 +00:00
return token ;
}
2022-04-01 17:58:27 +00:00
void Parser : : expected ( char const * what )
2020-04-29 20:37:51 +00:00
{
2023-01-26 13:38:09 +00:00
auto message = m_state . current_token . message ( ) . to_deprecated_string ( ) ;
2020-10-26 20:19:36 +00:00
if ( message . is_empty ( ) )
2022-12-04 18:02:33 +00:00
message = DeprecatedString : : formatted ( " Unexpected token {}. Expected {} " , m_state . current_token . name ( ) , what ) ;
2020-10-26 20:19:36 +00:00
syntax_error ( message ) ;
2020-04-29 20:37:51 +00:00
}
2020-12-28 17:15:22 +00:00
Position Parser : : position ( ) const
2020-03-11 18:27:43 +00:00
{
2020-11-02 21:03:19 +00:00
return {
2021-06-19 12:43:09 +00:00
m_state . current_token . line_number ( ) ,
2021-07-10 20:46:17 +00:00
m_state . current_token . line_column ( ) ,
m_state . current_token . offset ( ) ,
2020-11-02 21:03:19 +00:00
} ;
}
2022-04-01 17:58:27 +00:00
bool Parser : : try_parse_arrow_function_expression_failed_at_position ( Position const & position ) const
2021-04-11 20:41:51 +00:00
{
auto it = m_token_memoizations . find ( position ) ;
if ( it = = m_token_memoizations . end ( ) )
return false ;
return ( * it ) . value . try_parse_arrow_function_expression_failed ;
}
2022-04-01 17:58:27 +00:00
void Parser : : set_try_parse_arrow_function_expression_failed_at_position ( Position const & position , bool failed )
2021-04-11 20:41:51 +00:00
{
m_token_memoizations . set ( position , { failed } ) ;
}
2022-12-04 18:02:33 +00:00
void Parser : : syntax_error ( DeprecatedString const & message , Optional < Position > position )
2020-11-02 21:03:19 +00:00
{
if ( ! position . has_value ( ) )
position = this - > position ( ) ;
2021-06-19 12:43:09 +00:00
m_state . errors . append ( { message , position } ) ;
2020-03-11 18:27:43 +00:00
}
2020-03-30 13:24:43 +00:00
void Parser : : save_state ( )
{
2021-06-19 12:43:09 +00:00
m_saved_state . append ( m_state ) ;
2020-03-30 13:24:43 +00:00
}
void Parser : : load_state ( )
{
2021-02-23 19:42:32 +00:00
VERIFY ( ! m_saved_state . is_empty ( ) ) ;
2021-06-19 12:43:09 +00:00
m_state = m_saved_state . take_last ( ) ;
2020-03-30 13:24:43 +00:00
}
2020-04-29 20:37:51 +00:00
2020-12-29 13:17:39 +00:00
void Parser : : discard_saved_state ( )
{
m_saved_state . take_last ( ) ;
}
2023-01-09 00:23:00 +00:00
void Parser : : check_identifier_name_for_assignment_validity ( DeprecatedFlyString const & name , bool force_strict )
2021-07-11 11:04:55 +00:00
{
2021-07-11 23:27:35 +00:00
// FIXME: this is now called from multiple places maybe the error message should be dynamic?
2021-07-25 20:21:27 +00:00
if ( any_of ( s_reserved_words , [ & ] ( auto & value ) { return name = = value ; } ) ) {
2021-07-11 11:04:55 +00:00
syntax_error ( " Binding pattern target may not be a reserved word " ) ;
2021-07-11 23:27:35 +00:00
} else if ( m_state . strict_mode | | force_strict ) {
2021-07-11 11:04:55 +00:00
if ( name . is_one_of ( " arguments " sv , " eval " sv ) )
syntax_error ( " Binding pattern target may not be called 'arguments' or 'eval' in strict mode " ) ;
2021-07-11 23:27:35 +00:00
else if ( is_strict_reserved_word ( name ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Binding pattern target may not be called '{}' in strict mode " , name ) ) ;
2021-07-11 11:04:55 +00:00
}
}
2021-12-20 14:29:25 +00:00
bool Parser : : match_assert_clause ( ) const
{
return ! m_state . current_token . trivia_contains_line_terminator ( ) & & m_state . current_token . original_value ( ) = = " assert " sv ;
}
2023-01-09 00:23:00 +00:00
DeprecatedFlyString Parser : : consume_string_value ( )
2022-01-16 22:51:28 +00:00
{
VERIFY ( match ( TokenType : : StringLiteral ) ) ;
auto string_token = consume ( ) ;
2023-01-09 00:23:00 +00:00
DeprecatedFlyString value = parse_string_literal ( string_token ) - > value ( ) ;
2022-01-16 22:51:28 +00:00
// This also checks IsStringWellFormedUnicode which makes sure there is no unpaired surrogate
// Surrogates are at least 3 bytes
if ( value . length ( ) < 3 )
return value ;
Utf8View view { value . view ( ) . substring_view ( value . length ( ) - 3 ) } ;
VERIFY ( view . length ( ) < = 3 ) ;
auto codepoint = * view . begin ( ) ;
if ( Utf16View : : is_high_surrogate ( codepoint ) ) {
syntax_error ( " StringValue ending with unpaired high surrogate " ) ;
VERIFY ( view . length ( ) = = 1 ) ;
}
return value ;
}
2021-12-20 14:29:25 +00:00
// AssertClause, https://tc39.es/proposal-import-assertions/#prod-AssertClause
2022-01-16 22:51:28 +00:00
ModuleRequest Parser : : parse_module_request ( )
2021-12-20 14:29:25 +00:00
{
2022-01-16 22:51:28 +00:00
// Does not include the 'from' since that is not always required.
if ( ! match ( TokenType : : StringLiteral ) ) {
expected ( " ModuleSpecifier (string) " ) ;
return ModuleRequest { " !!invalid!! " } ;
}
ModuleRequest request { consume_string_value ( ) } ;
if ( ! match_assert_clause ( ) )
return request ;
2021-12-20 14:29:25 +00:00
VERIFY ( m_state . current_token . original_value ( ) = = " assert " sv ) ;
consume ( TokenType : : Identifier ) ;
consume ( TokenType : : CurlyOpen ) ;
while ( ! done ( ) & & ! match ( TokenType : : CurlyClose ) ) {
2022-12-04 18:02:33 +00:00
DeprecatedString key ;
2021-12-20 14:29:25 +00:00
if ( match ( TokenType : : StringLiteral ) ) {
2022-12-06 01:12:49 +00:00
key = parse_string_literal ( m_state . current_token ) - > value ( ) . to_deprecated_string ( ) ;
2021-12-20 14:29:25 +00:00
consume ( ) ;
} else if ( match_identifier_name ( ) ) {
key = consume ( ) . value ( ) ;
} else {
expected ( " IdentifierName or StringValue as AssertionKey " ) ;
consume ( ) ;
}
consume ( TokenType : : Colon ) ;
if ( match ( TokenType : : StringLiteral ) ) {
for ( auto & entries : request . assertions ) {
if ( entries . key = = key )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Duplicate assertion clauses with name: {} " , key ) ) ;
2021-12-20 14:29:25 +00:00
}
2022-12-06 01:12:49 +00:00
request . add_assertion ( move ( key ) , parse_string_literal ( m_state . current_token ) - > value ( ) . to_deprecated_string ( ) ) ;
2021-12-20 14:29:25 +00:00
}
consume ( TokenType : : StringLiteral ) ;
if ( match ( TokenType : : Comma ) )
consume ( TokenType : : Comma ) ;
else
break ;
}
consume ( TokenType : : CurlyClose ) ;
2022-01-16 22:51:28 +00:00
return request ;
2021-12-20 14:29:25 +00:00
}
2023-01-09 00:23:00 +00:00
static DeprecatedFlyString default_string_value = " default " ;
2022-01-16 22:51:28 +00:00
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ImportStatement const > Parser : : parse_import_statement ( Program & program )
2021-08-14 15:42:30 +00:00
{
2021-12-20 14:29:25 +00:00
// We use the extended syntax which adds:
// ImportDeclaration:
// import ImportClause FromClause [no LineTerminator here] AssertClause;
// import ModuleSpecifier [no LineTerminator here] AssertClause;
// From: https://tc39.es/proposal-import-assertions/#prod-ImportDeclaration
2021-08-14 15:42:30 +00:00
auto rule_start = push_start ( ) ;
if ( program . type ( ) ! = Program : : Type : : Module )
syntax_error ( " Cannot use import statement outside a module " ) ;
consume ( TokenType : : Import ) ;
if ( match ( TokenType : : StringLiteral ) ) {
2022-01-16 22:51:28 +00:00
// import ModuleSpecifier ;
auto module_request = parse_module_request ( ) ;
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ImportStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( module_request ) ) ;
2021-08-14 15:42:30 +00:00
}
auto match_imported_binding = [ & ] {
return match_identifier ( ) | | match ( TokenType : : Yield ) | | match ( TokenType : : Await ) ;
} ;
auto match_as = [ & ] {
2021-08-18 20:34:25 +00:00
return match ( TokenType : : Identifier ) & & m_state . current_token . original_value ( ) = = " as " sv ;
2021-08-14 15:42:30 +00:00
} ;
bool continue_parsing = true ;
struct ImportWithLocation {
2022-11-23 11:16:51 +00:00
ImportEntry entry ;
2021-08-14 15:42:30 +00:00
Position position ;
} ;
Vector < ImportWithLocation > entries_with_location ;
2022-01-16 22:51:28 +00:00
// import ImportClause FromClause ;
// ImportClause :
// ImportedDefaultBinding
// NameSpaceImport
// NamedImports
// ImportedDefaultBinding , NameSpaceImport
// ImportedDefaultBinding , NamedImports
2021-08-14 15:42:30 +00:00
if ( match_imported_binding ( ) ) {
2022-01-16 22:51:28 +00:00
// ImportedDefaultBinding : ImportedBinding
2021-08-14 15:42:30 +00:00
auto id_position = position ( ) ;
auto bound_name = consume ( ) . value ( ) ;
2022-01-16 22:51:28 +00:00
entries_with_location . append ( { { default_string_value , bound_name } , id_position } ) ;
2021-08-14 15:42:30 +00:00
if ( match ( TokenType : : Comma ) ) {
consume ( TokenType : : Comma ) ;
} else {
continue_parsing = false ;
}
}
if ( ! continue_parsing ) {
// skip the rest
} else if ( match ( TokenType : : Asterisk ) ) {
2022-01-16 22:51:28 +00:00
// NameSpaceImport : * as ImportedBinding
2021-08-14 15:42:30 +00:00
consume ( TokenType : : Asterisk ) ;
if ( ! match_as ( ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Unexpected token: {} " , m_state . current_token . name ( ) ) ) ;
2021-08-14 15:42:30 +00:00
consume ( TokenType : : Identifier ) ;
if ( match_imported_binding ( ) ) {
auto namespace_position = position ( ) ;
auto namespace_name = consume ( ) . value ( ) ;
2022-11-23 11:16:51 +00:00
entries_with_location . append ( { ImportEntry ( { } , namespace_name , true ) , namespace_position } ) ;
2021-08-14 15:42:30 +00:00
} else {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Unexpected token: {} " , m_state . current_token . name ( ) ) ) ;
2021-08-14 15:42:30 +00:00
}
} else if ( match ( TokenType : : CurlyOpen ) ) {
2022-01-16 22:51:28 +00:00
// NamedImports :
// { ImportSpecifier ,_opt } (repeated any amount of times)
2021-08-14 15:42:30 +00:00
2022-01-16 22:51:28 +00:00
consume ( TokenType : : CurlyOpen ) ;
2021-08-14 15:42:30 +00:00
while ( ! done ( ) & & ! match ( TokenType : : CurlyClose ) ) {
if ( match_identifier_name ( ) ) {
2022-01-16 22:51:28 +00:00
// ImportSpecifier : ImportedBinding
2021-08-14 15:42:30 +00:00
auto require_as = ! match_imported_binding ( ) ;
auto name_position = position ( ) ;
2023-01-09 00:23:00 +00:00
auto name = consume ( ) . DeprecatedFlyString_value ( ) ;
2021-08-14 15:42:30 +00:00
if ( match_as ( ) ) {
consume ( TokenType : : Identifier ) ;
auto alias_position = position ( ) ;
2023-01-09 00:23:00 +00:00
auto alias = consume_identifier ( ) . DeprecatedFlyString_value ( ) ;
2021-08-14 15:42:30 +00:00
check_identifier_name_for_assignment_validity ( alias ) ;
entries_with_location . append ( { { name , alias } , alias_position } ) ;
} else if ( require_as ) {
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Unexpected reserved word '{}' " , name ) ) ;
2021-08-14 15:42:30 +00:00
} else {
check_identifier_name_for_assignment_validity ( name ) ;
entries_with_location . append ( { { name , name } , name_position } ) ;
}
2022-01-16 22:51:28 +00:00
} else if ( match ( TokenType : : StringLiteral ) ) {
// ImportSpecifier : ModuleExportName as ImportedBinding
auto name = consume_string_value ( ) ;
if ( ! match_as ( ) )
expected ( " as " ) ;
consume ( TokenType : : Identifier ) ;
auto alias_position = position ( ) ;
2023-01-09 00:23:00 +00:00
auto alias = consume_identifier ( ) . DeprecatedFlyString_value ( ) ;
2022-01-16 22:51:28 +00:00
check_identifier_name_for_assignment_validity ( alias ) ;
entries_with_location . append ( { { move ( name ) , alias } , alias_position } ) ;
2021-08-14 15:42:30 +00:00
} else {
expected ( " identifier " ) ;
break ;
}
if ( ! match ( TokenType : : Comma ) )
break ;
consume ( TokenType : : Comma ) ;
}
consume ( TokenType : : CurlyClose ) ;
} else {
expected ( " import clauses " ) ;
}
2021-09-18 21:02:50 +00:00
auto from_statement = consume ( TokenType : : Identifier ) . original_value ( ) ;
2021-08-14 15:42:30 +00:00
if ( from_statement ! = " from " sv )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Expected 'from' got {} " , from_statement ) ) ;
2021-08-14 15:42:30 +00:00
2022-01-16 22:51:28 +00:00
auto module_request = parse_module_request ( ) ;
2021-08-14 15:42:30 +00:00
2022-11-23 11:16:51 +00:00
Vector < ImportEntry > entries ;
2021-08-14 15:42:30 +00:00
entries . ensure_capacity ( entries_with_location . size ( ) ) ;
for ( auto & entry : entries_with_location ) {
for ( auto & import_statement : program . imports ( ) ) {
2023-03-06 13:17:01 +00:00
if ( import_statement - > has_bound_name ( entry . entry . local_name ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Identifier '{}' already declared " , entry . entry . local_name ) , entry . position ) ;
2021-08-14 15:42:30 +00:00
}
for ( auto & new_entry : entries ) {
if ( new_entry . local_name = = entry . entry . local_name )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Identifier '{}' already declared " , entry . entry . local_name ) , entry . position ) ;
2021-08-14 15:42:30 +00:00
}
entries . append ( move ( entry . entry ) ) ;
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ImportStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( module_request ) , move ( entries ) ) ;
2021-08-14 15:42:30 +00:00
}
2023-02-19 21:07:52 +00:00
NonnullRefPtr < ExportStatement const > Parser : : parse_export_statement ( Program & program )
2021-08-14 15:42:30 +00:00
{
2021-12-20 14:29:25 +00:00
// We use the extended syntax which adds:
// ExportDeclaration:
// export ExportFromClause FromClause [no LineTerminator here] AssertClause ;
// From: https://tc39.es/proposal-import-assertions/#prod-ExportDeclaration
2021-08-14 15:42:30 +00:00
auto rule_start = push_start ( ) ;
if ( program . type ( ) ! = Program : : Type : : Module )
syntax_error ( " Cannot use export statement outside a module " ) ;
auto match_as = [ & ] {
2021-08-18 20:34:25 +00:00
return match ( TokenType : : Identifier ) & & m_state . current_token . original_value ( ) = = " as " sv ;
2021-08-14 15:42:30 +00:00
} ;
auto match_from = [ & ] {
2021-08-18 20:34:25 +00:00
return match ( TokenType : : Identifier ) & & m_state . current_token . original_value ( ) = = " from " sv ;
} ;
auto match_default = [ & ] {
return match ( TokenType : : Default ) & & m_state . current_token . original_value ( ) = = " default " sv ;
2021-08-14 15:42:30 +00:00
} ;
consume ( TokenType : : Export ) ;
struct EntryAndLocation {
2022-01-27 00:54:47 +00:00
ExportEntry entry ;
2021-08-14 15:42:30 +00:00
Position position ;
} ;
Vector < EntryAndLocation > entries_with_location ;
2023-02-19 21:07:52 +00:00
RefPtr < ASTNode const > expression = { } ;
2022-01-16 22:51:28 +00:00
bool is_default = false ;
2022-01-27 00:54:47 +00:00
ModuleRequest from_specifier ;
2022-01-16 22:51:28 +00:00
2021-08-18 20:34:25 +00:00
if ( match_default ( ) ) {
2022-01-16 22:51:28 +00:00
is_default = true ;
2021-08-14 15:42:30 +00:00
auto default_position = position ( ) ;
consume ( TokenType : : Default ) ;
2023-01-09 00:23:00 +00:00
DeprecatedFlyString local_name ;
2022-01-16 22:51:28 +00:00
auto lookahead_token = next_token ( ) ;
2022-09-01 22:46:37 +00:00
enum class MatchesFunctionDeclaration {
Yes ,
No ,
WithoutName ,
} ;
2022-01-16 22:51:28 +00:00
// Note: For some reason the spec here has declaration which can have no name
// and the rest of the parser is just not setup for that. With these
// hacks below we get through most things but we should probably figure
// out a better solution. I have attempted to explain why/what these "hacks" do below.
// The summary is treat named declarations just as declarations and hack around unnamed
// declarations with expression see also SourceTextModule::initialize_environment.
// As far as I'm aware the only problem (which is a tricky one) is:
// `export default function() {}()`
// Since we parse this as an expression you are immediately allowed to call it
// which is incorrect and this should give a SyntaxError.
2022-09-01 22:46:37 +00:00
auto has_name = [ & ] ( Token const & token ) {
if ( token . type ( ) ! = TokenType : : ParenOpen )
return MatchesFunctionDeclaration : : Yes ;
return MatchesFunctionDeclaration : : WithoutName ;
} ;
2022-01-16 22:51:28 +00:00
auto match_function_declaration = [ & ] {
// Hack part 1.
// Match a function declaration with a name, since we have async and generator
// and asyncgenerator variants this is quite complicated.
auto current_type = m_state . current_token . type ( ) ;
Lexer lookahead_lexer = m_state . lexer ;
lookahead_lexer . next ( ) ;
if ( current_type = = TokenType : : Function ) {
if ( lookahead_token . type ( ) = = TokenType : : Asterisk )
2022-09-01 22:46:37 +00:00
return has_name ( lookahead_lexer . next ( ) ) ; // function * [name]
2022-01-16 22:51:28 +00:00
else
2022-09-01 22:46:37 +00:00
return has_name ( lookahead_token ) ; // function [name]
2022-01-16 22:51:28 +00:00
}
if ( current_type = = TokenType : : Async ) {
if ( lookahead_token . type ( ) ! = TokenType : : Function )
2022-09-01 22:46:37 +00:00
return MatchesFunctionDeclaration : : No ;
2022-01-16 22:51:28 +00:00
if ( lookahead_token . trivia_contains_line_terminator ( ) )
2022-09-01 22:46:37 +00:00
return MatchesFunctionDeclaration : : No ;
2022-01-16 22:51:28 +00:00
auto lookahead_two_token = lookahead_lexer . next ( ) ;
if ( lookahead_two_token . type ( ) = = TokenType : : Asterisk )
2022-09-01 22:46:37 +00:00
return has_name ( lookahead_lexer . next ( ) ) ; // async function * [name]
2022-01-16 22:51:28 +00:00
else
2022-09-01 22:46:37 +00:00
return has_name ( lookahead_two_token ) ; // async function [name]
2022-01-16 22:51:28 +00:00
}
2021-08-14 15:42:30 +00:00
2022-09-01 22:46:37 +00:00
return MatchesFunctionDeclaration : : No ;
2022-01-16 22:51:28 +00:00
} ;
2022-09-01 22:46:37 +00:00
if ( auto matches_function = match_function_declaration ( ) ; matches_function ! = MatchesFunctionDeclaration : : No ) {
auto function_declaration = parse_function_node < FunctionDeclaration > (
( matches_function = = MatchesFunctionDeclaration : : WithoutName ? FunctionNodeParseOptions : : HasDefaultExportName : 0 )
| FunctionNodeParseOptions : : CheckForFunctionAndName ) ;
2022-01-16 22:51:28 +00:00
m_state . current_scope_pusher - > add_declaration ( function_declaration ) ;
2022-09-01 22:46:37 +00:00
if ( matches_function = = MatchesFunctionDeclaration : : WithoutName )
local_name = ExportStatement : : local_name_for_default ;
else
local_name = function_declaration - > name ( ) ;
2022-01-16 22:51:28 +00:00
expression = move ( function_declaration ) ;
} else if ( match ( TokenType : : Class ) & & lookahead_token . type ( ) ! = TokenType : : CurlyOpen & & lookahead_token . type ( ) ! = TokenType : : Extends ) {
// Hack part 2.
// Attempt to detect classes with names only as those are declarations,
// this actually seems to cover all cases already.
auto class_expression = parse_class_declaration ( ) ;
m_state . current_scope_pusher - > add_declaration ( class_expression ) ;
2021-08-14 15:42:30 +00:00
local_name = class_expression - > name ( ) ;
expression = move ( class_expression ) ;
2022-01-16 22:51:28 +00:00
2021-08-14 15:42:30 +00:00
} else if ( match_expression ( ) ) {
2022-01-16 22:51:28 +00:00
// Hack part 3.
// Even though the unnamed declarations look like expression we should
// not treat them as such and thus not consume a semicolon after them.
bool special_case_declaration_without_name = match ( TokenType : : Class ) | | match ( TokenType : : Function ) | | ( match ( TokenType : : Async ) & & lookahead_token . type ( ) = = TokenType : : Function & & ! lookahead_token . trivia_contains_line_terminator ( ) ) ;
2021-08-14 15:42:30 +00:00
expression = parse_expression ( 2 ) ;
2022-01-16 22:51:28 +00:00
if ( ! special_case_declaration_without_name )
consume_or_insert_semicolon ( ) ;
if ( is < ClassExpression > ( * expression ) ) {
2023-02-19 21:07:52 +00:00
auto const & class_expression = static_cast < ClassExpression const & > ( * expression ) ;
2022-01-16 22:51:28 +00:00
if ( class_expression . has_name ( ) )
local_name = class_expression . name ( ) ;
}
2021-08-14 15:42:30 +00:00
} else {
expected ( " Declaration or assignment expression " ) ;
2022-01-16 22:51:28 +00:00
local_name = " !!invalid!! " ;
}
if ( local_name . is_null ( ) ) {
local_name = ExportStatement : : local_name_for_default ;
2021-08-14 15:42:30 +00:00
}
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : named_export ( default_string_value , move ( local_name ) ) , default_position } ) ;
2021-08-14 15:42:30 +00:00
} else {
2022-08-27 23:40:51 +00:00
enum class FromSpecifier {
2021-08-14 15:42:30 +00:00
NotAllowed ,
Optional ,
Required
2022-08-27 23:40:51 +00:00
} check_for_from { FromSpecifier : : NotAllowed } ;
2023-01-09 00:23:00 +00:00
auto parse_module_export_name = [ & ] ( bool lhs ) - > DeprecatedFlyString {
2022-08-27 23:40:51 +00:00
// https://tc39.es/ecma262/#prod-ModuleExportName
// ModuleExportName :
// IdentifierName
// StringLiteral
if ( match_identifier_name ( ) ) {
return consume ( ) . value ( ) ;
}
if ( match ( TokenType : : StringLiteral ) ) {
// It is a Syntax Error if ReferencedBindings of NamedExports contains any StringLiterals.
// Only for export { "a" as "b" }; // <-- no from
if ( lhs )
check_for_from = FromSpecifier : : Required ;
return consume_string_value ( ) ;
}
expected ( " ExportSpecifier (string or identifier) " ) ;
return { } ;
} ;
2021-08-14 15:42:30 +00:00
if ( match ( TokenType : : Asterisk ) ) {
auto asterisk_position = position ( ) ;
consume ( TokenType : : Asterisk ) ;
if ( match_as ( ) ) {
2022-08-27 23:40:51 +00:00
// * as ModuleExportName
2021-08-14 15:42:30 +00:00
consume ( TokenType : : Identifier ) ;
2022-08-27 23:40:51 +00:00
auto namespace_position = position ( ) ;
auto exported_name = parse_module_export_name ( false ) ;
entries_with_location . append ( { ExportEntry : : all_module_request ( exported_name ) , namespace_position } ) ;
2021-08-14 15:42:30 +00:00
} else {
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : all_but_default_entry ( ) , asterisk_position } ) ;
2021-08-14 15:42:30 +00:00
}
2022-08-27 23:40:51 +00:00
check_for_from = FromSpecifier : : Required ;
2021-08-14 15:42:30 +00:00
} else if ( match_declaration ( ) ) {
auto decl_position = position ( ) ;
auto declaration = parse_declaration ( ) ;
2021-11-26 23:01:23 +00:00
m_state . current_scope_pusher - > add_declaration ( declaration ) ;
2021-08-14 15:42:30 +00:00
if ( is < FunctionDeclaration > ( * declaration ) ) {
2023-02-19 21:07:52 +00:00
auto & func = static_cast < FunctionDeclaration const & > ( * declaration ) ;
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : named_export ( func . name ( ) , func . name ( ) ) , func . source_range ( ) . start } ) ;
2021-08-14 15:42:30 +00:00
} else if ( is < ClassDeclaration > ( * declaration ) ) {
2023-02-19 21:07:52 +00:00
auto & class_declaration = static_cast < ClassDeclaration const & > ( * declaration ) ;
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : named_export ( class_declaration . name ( ) , class_declaration . name ( ) ) , class_declaration . source_range ( ) . start } ) ;
2021-08-14 15:42:30 +00:00
} else {
VERIFY ( is < VariableDeclaration > ( * declaration ) ) ;
2023-02-19 21:07:52 +00:00
auto & variables = static_cast < VariableDeclaration const & > ( * declaration ) ;
2022-01-16 22:51:28 +00:00
VERIFY ( variables . is_lexical_declaration ( ) ) ;
2021-08-14 15:42:30 +00:00
for ( auto & decl : variables . declarations ( ) ) {
2023-03-06 13:17:01 +00:00
decl - > target ( ) . visit (
2023-02-19 21:07:52 +00:00
[ & ] ( NonnullRefPtr < Identifier const > const & identifier ) {
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : named_export ( identifier - > string ( ) , identifier - > string ( ) ) , identifier - > source_range ( ) . start } ) ;
2021-08-14 15:42:30 +00:00
} ,
2023-02-19 21:07:52 +00:00
[ & ] ( NonnullRefPtr < BindingPattern const > const & binding ) {
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( binding - > for_each_bound_identifier ( [ & ] ( auto & identifier ) {
auto const & name = identifier . string ( ) ;
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : named_export ( name , name ) , decl_position } ) ;
2023-02-27 22:13:37 +00:00
} ) ) ;
2021-08-14 15:42:30 +00:00
} ) ;
}
}
expression = declaration ;
} else if ( match ( TokenType : : Var ) ) {
auto variable_position = position ( ) ;
auto variable_declaration = parse_variable_declaration ( ) ;
2022-01-16 22:51:28 +00:00
m_state . current_scope_pusher - > add_declaration ( variable_declaration ) ;
2021-08-14 15:42:30 +00:00
for ( auto & decl : variable_declaration - > declarations ( ) ) {
2023-03-06 13:17:01 +00:00
decl - > target ( ) . visit (
2023-02-19 21:07:52 +00:00
[ & ] ( NonnullRefPtr < Identifier const > const & identifier ) {
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : named_export ( identifier - > string ( ) , identifier - > string ( ) ) , identifier - > source_range ( ) . start } ) ;
2021-08-14 15:42:30 +00:00
} ,
2023-02-19 21:07:52 +00:00
[ & ] ( NonnullRefPtr < BindingPattern const > const & binding ) {
2023-02-27 22:13:37 +00:00
// NOTE: Nothing in the callback throws an exception.
2023-07-20 14:40:14 +00:00
MUST ( binding - > for_each_bound_identifier (
[ & ] ( auto & identifier ) {
auto const & name = identifier . string ( ) ;
entries_with_location . append ( { ExportEntry : : named_export ( name , name ) , variable_position } ) ;
} ) ) ;
2021-08-14 15:42:30 +00:00
} ) ;
}
expression = variable_declaration ;
} else if ( match ( TokenType : : CurlyOpen ) ) {
consume ( TokenType : : CurlyOpen ) ;
2022-08-27 23:40:51 +00:00
check_for_from = FromSpecifier : : Optional ;
2022-01-16 22:51:28 +00:00
2022-08-29 20:12:25 +00:00
// FIXME: Even when empty should add module to requiredModules!
2022-01-16 22:51:28 +00:00
while ( ! done ( ) & & ! match ( TokenType : : CurlyClose ) ) {
auto identifier_position = position ( ) ;
2022-08-27 23:40:51 +00:00
auto identifier = parse_module_export_name ( true ) ;
2022-01-16 22:51:28 +00:00
if ( match_as ( ) ) {
consume ( TokenType : : Identifier ) ;
2022-08-27 23:40:51 +00:00
auto export_name = parse_module_export_name ( false ) ;
2022-01-16 22:51:28 +00:00
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : named_export ( move ( export_name ) , move ( identifier ) ) , identifier_position } ) ;
2022-01-16 22:51:28 +00:00
} else {
2022-01-27 00:54:47 +00:00
entries_with_location . append ( { ExportEntry : : named_export ( identifier , identifier ) , identifier_position } ) ;
2021-08-14 15:42:30 +00:00
}
if ( ! match ( TokenType : : Comma ) )
break ;
consume ( TokenType : : Comma ) ;
}
2022-08-29 20:12:25 +00:00
if ( entries_with_location . is_empty ( ) ) {
// export {} from "module"; Since this will never be a
// duplicate we can give a slightly wrong location.
entries_with_location . append ( { ExportEntry : : empty_named_export ( ) , position ( ) } ) ;
}
2021-08-14 15:42:30 +00:00
consume ( TokenType : : CurlyClose ) ;
2022-01-16 22:51:28 +00:00
2021-08-14 15:42:30 +00:00
} else {
syntax_error ( " Unexpected token 'export' " , rule_start . position ( ) ) ;
}
2022-08-27 23:40:51 +00:00
if ( check_for_from ! = FromSpecifier : : NotAllowed & & match_from ( ) ) {
2021-08-14 15:42:30 +00:00
consume ( TokenType : : Identifier ) ;
2022-01-27 00:54:47 +00:00
from_specifier = parse_module_request ( ) ;
2022-08-27 23:40:51 +00:00
} else if ( check_for_from = = FromSpecifier : : Required ) {
2021-08-14 15:42:30 +00:00
expected ( " from " ) ;
}
2022-08-27 23:40:51 +00:00
if ( check_for_from ! = FromSpecifier : : NotAllowed )
2021-08-14 15:42:30 +00:00
consume_or_insert_semicolon ( ) ;
}
2022-11-23 11:16:51 +00:00
Vector < ExportEntry > entries ;
2021-08-14 15:42:30 +00:00
entries . ensure_capacity ( entries_with_location . size ( ) ) ;
for ( auto & entry : entries_with_location ) {
for ( auto & export_statement : program . exports ( ) ) {
2023-03-06 13:17:01 +00:00
if ( export_statement - > has_export ( entry . entry . export_name ) )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Duplicate export with name: '{}' " , entry . entry . export_name ) , entry . position ) ;
2021-08-14 15:42:30 +00:00
}
for ( auto & new_entry : entries ) {
2022-11-23 11:16:51 +00:00
if ( new_entry . kind ! = ExportEntry : : Kind : : EmptyNamedExport & & new_entry . export_name = = entry . entry . export_name )
2022-12-04 18:02:33 +00:00
syntax_error ( DeprecatedString : : formatted ( " Duplicate export with name: '{}' " , entry . entry . export_name ) , entry . position ) ;
2021-08-14 15:42:30 +00:00
}
entries . append ( move ( entry . entry ) ) ;
}
LibJS: Reduce AST memory usage by shrink-wrapping source range info
Before this change, each AST node had a 64-byte SourceRange member.
This SourceRange had the following layout:
filename: StringView (16 bytes)
start: Position (24 bytes)
end: Position (24 bytes)
The Position structs have { line, column, offset }, all members size_t.
To reduce memory consumption, AST nodes now only store the following:
source_code: NonnullRefPtr<SourceCode> (8 bytes)
start_offset: u32 (4 bytes)
end_offset: u32 (4 bytes)
SourceCode is a new ref-counted data structure that keeps the filename
and original parsed source code in a single location, and all AST nodes
have a pointer to it.
The start_offset and end_offset can be turned into (line, column) when
necessary by calling SourceCode::range_from_offsets(). This will walk
the source code string and compute line/column numbers on the fly, so
it's not necessarily fast, but it should be rare since this information
is primarily used for diagnostics and exception stack traces.
With this, ASTNode shrinks from 80 bytes to 32 bytes. This gives us a
~23% reduction in memory usage when loading twitter.com/awesomekling
(330 MiB before, 253 MiB after!) :^)
2022-11-21 16:37:38 +00:00
return create_ast_node < ExportStatement > ( { m_source_code , rule_start . position ( ) , position ( ) } , move ( expression ) , move ( entries ) , is_default , move ( from_specifier ) ) ;
2021-08-14 15:42:30 +00:00
}
2022-02-16 06:34:59 +00:00
Parser : : ForbiddenTokens : : ForbiddenTokens ( std : : initializer_list < TokenType > const & forbidden )
{
forbid_tokens ( forbidden ) ;
}
void Parser : : ForbiddenTokens : : forbid_tokens ( std : : initializer_list < TokenType > const & forbidden )
{
for ( auto token : forbidden ) {
switch ( token ) {
case TokenType : : In :
m_forbid_in_token = true ;
break ;
case TokenType : : DoubleAmpersand :
case TokenType : : DoublePipe :
m_forbid_logical_tokens = true ;
break ;
case TokenType : : DoubleQuestionMark :
m_forbid_coalesce_token = true ;
break ;
case TokenType : : QuestionMarkPeriod :
m_forbid_question_mark_period = true ;
break ;
case TokenType : : ParenOpen :
m_forbid_paren_open = true ;
break ;
case TokenType : : Equals :
m_forbid_equals = true ;
break ;
default :
VERIFY_NOT_REACHED ( ) ;
}
}
}
bool Parser : : ForbiddenTokens : : allows ( TokenType token ) const
{
switch ( token ) {
case TokenType : : In :
return ! m_forbid_in_token ;
case TokenType : : DoubleAmpersand :
case TokenType : : DoublePipe :
return ! m_forbid_logical_tokens ;
case TokenType : : DoubleQuestionMark :
return ! m_forbid_coalesce_token ;
case TokenType : : QuestionMarkPeriod :
return ! m_forbid_question_mark_period ;
case TokenType : : ParenOpen :
return ! m_forbid_paren_open ;
case TokenType : : Equals :
return ! m_forbid_equals ;
default :
return true ;
}
}
Parser : : ForbiddenTokens Parser : : ForbiddenTokens : : merge ( ForbiddenTokens other ) const
{
ForbiddenTokens result = * this ;
result . m_forbid_in_token | = other . m_forbid_in_token ;
result . m_forbid_logical_tokens | = other . m_forbid_logical_tokens ;
result . m_forbid_coalesce_token | = other . m_forbid_coalesce_token ;
result . m_forbid_paren_open | = other . m_forbid_paren_open ;
result . m_forbid_question_mark_period | = other . m_forbid_question_mark_period ;
result . m_forbid_equals | = other . m_forbid_equals ;
return result ;
}
Parser : : ForbiddenTokens Parser : : ForbiddenTokens : : forbid ( std : : initializer_list < TokenType > const & forbidden ) const
{
ForbiddenTokens result = * this ;
result . forbid_tokens ( forbidden ) ;
return result ;
}
2022-09-01 22:46:37 +00:00
template NonnullRefPtr < FunctionExpression > Parser : : parse_function_node ( u16 , Optional < Position > const & ) ;
template NonnullRefPtr < FunctionDeclaration > Parser : : parse_function_node ( u16 , Optional < Position > const & ) ;
2023-07-04 22:14:41 +00:00
NonnullRefPtr < Identifier const > Parser : : create_identifier_and_register_in_current_scope ( SourceRange range , DeprecatedFlyString string )
{
auto id = create_ast_node < Identifier const > ( range , string ) ;
2023-07-05 19:15:36 +00:00
if ( m_state . current_scope_pusher )
m_state . current_scope_pusher - > register_identifier ( const_cast < Identifier & > ( * id ) ) ;
2023-07-04 22:14:41 +00:00
return id ;
}
2023-07-07 21:14:03 +00:00
Parser Parser : : parse_function_body_from_string ( DeprecatedString const & body_string , u16 parse_options , Vector < FunctionParameter > const & parameters , FunctionKind kind , bool & contains_direct_call_to_eval )
{
RefPtr < FunctionBody const > function_body ;
auto body_parser = Parser { Lexer { body_string } } ;
{
// Set up some parser state to accept things like return await, and yield in the plain function body.
body_parser . m_state . in_function_context = true ;
auto function_scope = ScopePusher : : function_scope ( body_parser ) ;
if ( ( parse_options & FunctionNodeParseOptions : : IsAsyncFunction ) ! = 0 )
body_parser . m_state . await_expression_is_valid = true ;
if ( ( parse_options & FunctionNodeParseOptions : : IsGeneratorFunction ) ! = 0 )
body_parser . m_state . in_generator_function_context = true ;
function_body = body_parser . parse_function_body ( parameters , kind , contains_direct_call_to_eval ) ;
}
return body_parser ;
}
2020-03-11 18:27:43 +00:00
}