mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-25 09:00:22 +00:00
beeb58bd93
This starts by switching all AST members to Strings, and dealing with the fallout.
328 lines
10 KiB
C++
328 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2020, the SerenityOS developers.
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "AST.h"
|
|
#include <AK/Function.h>
|
|
#include <AK/RefPtr.h>
|
|
#include <AK/String.h>
|
|
#include <AK/StringBuilder.h>
|
|
#include <AK/Vector.h>
|
|
|
|
namespace Shell {
|
|
|
|
class Parser {
|
|
public:
|
|
Parser(StringView input, bool interactive = false)
|
|
: m_input(move(input))
|
|
, m_in_interactive_mode(interactive)
|
|
{
|
|
}
|
|
|
|
RefPtr<AST::Node> parse();
|
|
/// Parse the given string *as* an expression
|
|
/// that is to forcefully enclose it in double-quotes.
|
|
RefPtr<AST::Node> parse_as_single_expression();
|
|
NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions();
|
|
|
|
struct SavedOffset {
|
|
size_t offset;
|
|
AST::Position::Line line;
|
|
};
|
|
SavedOffset save_offset() const;
|
|
|
|
private:
|
|
enum class ShouldReadMoreSequences {
|
|
Yes,
|
|
No,
|
|
};
|
|
|
|
enum class StringEndCondition {
|
|
DoubleQuote,
|
|
Heredoc,
|
|
};
|
|
|
|
struct SequenceParseResult {
|
|
NonnullRefPtrVector<AST::Node> entries;
|
|
Vector<AST::Position, 1> separator_positions;
|
|
ShouldReadMoreSequences decision;
|
|
};
|
|
|
|
struct HeredocInitiationRecord {
|
|
String end;
|
|
RefPtr<AST::Heredoc> node;
|
|
bool interpolate { false };
|
|
bool deindent { false };
|
|
};
|
|
|
|
constexpr static size_t max_allowed_nested_rule_depth = 2048;
|
|
RefPtr<AST::Node> parse_toplevel();
|
|
SequenceParseResult parse_sequence();
|
|
RefPtr<AST::Node> parse_function_decl();
|
|
RefPtr<AST::Node> parse_and_logical_sequence();
|
|
RefPtr<AST::Node> parse_or_logical_sequence();
|
|
RefPtr<AST::Node> parse_variable_decls();
|
|
RefPtr<AST::Node> parse_pipe_sequence();
|
|
RefPtr<AST::Node> parse_command();
|
|
RefPtr<AST::Node> parse_control_structure();
|
|
RefPtr<AST::Node> parse_continuation_control();
|
|
RefPtr<AST::Node> parse_for_loop();
|
|
RefPtr<AST::Node> parse_loop_loop();
|
|
RefPtr<AST::Node> parse_if_expr();
|
|
RefPtr<AST::Node> parse_subshell();
|
|
RefPtr<AST::Node> parse_match_expr();
|
|
AST::MatchEntry parse_match_entry();
|
|
RefPtr<AST::Node> parse_match_pattern();
|
|
Optional<Regex<ECMA262>> parse_regex_pattern();
|
|
RefPtr<AST::Node> parse_redirection();
|
|
RefPtr<AST::Node> parse_list_expression();
|
|
RefPtr<AST::Node> parse_expression();
|
|
RefPtr<AST::Node> parse_string_composite();
|
|
RefPtr<AST::Node> parse_string();
|
|
RefPtr<AST::Node> parse_string_inner(StringEndCondition);
|
|
RefPtr<AST::Node> parse_variable();
|
|
RefPtr<AST::Node> parse_variable_ref();
|
|
RefPtr<AST::Slice> parse_slice();
|
|
RefPtr<AST::Node> parse_evaluate();
|
|
RefPtr<AST::Node> parse_history_designator();
|
|
RefPtr<AST::Node> parse_comment();
|
|
RefPtr<AST::Node> parse_bareword();
|
|
RefPtr<AST::Node> parse_glob();
|
|
RefPtr<AST::Node> parse_brace_expansion();
|
|
RefPtr<AST::Node> parse_brace_expansion_spec();
|
|
RefPtr<AST::Node> parse_immediate_expression();
|
|
RefPtr<AST::Node> parse_heredoc_initiation_record();
|
|
bool parse_heredoc_entries();
|
|
|
|
template<typename A, typename... Args>
|
|
NonnullRefPtr<A> create(Args&&... args);
|
|
|
|
void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); }
|
|
bool at_end() const
|
|
{
|
|
if (m_end_condition && (*m_end_condition)())
|
|
return true;
|
|
return m_input.length() <= m_offset;
|
|
}
|
|
char peek();
|
|
char consume();
|
|
bool expect(char);
|
|
bool expect(StringView);
|
|
bool next_is(StringView);
|
|
|
|
void restore_to(size_t offset, AST::Position::Line line)
|
|
{
|
|
m_offset = offset;
|
|
m_line = move(line);
|
|
}
|
|
|
|
AST::Position::Line line() const { return m_line; }
|
|
|
|
StringView consume_while(Function<bool(char)>);
|
|
|
|
struct Offset {
|
|
size_t offset;
|
|
AST::Position::Line line;
|
|
};
|
|
struct ScopedOffset {
|
|
ScopedOffset(Vector<size_t>& offsets, Vector<AST::Position::Line>& lines, size_t offset, size_t lineno, size_t linecol)
|
|
: offsets(offsets)
|
|
, lines(lines)
|
|
, offset(offset)
|
|
, line({ lineno, linecol })
|
|
{
|
|
offsets.append(offset);
|
|
lines.append(line);
|
|
}
|
|
~ScopedOffset()
|
|
{
|
|
auto last = offsets.take_last();
|
|
VERIFY(last == offset);
|
|
|
|
auto last_line = lines.take_last();
|
|
VERIFY(last_line == line);
|
|
}
|
|
|
|
Vector<size_t>& offsets;
|
|
Vector<AST::Position::Line>& lines;
|
|
size_t offset;
|
|
AST::Position::Line line;
|
|
};
|
|
|
|
void restore_to(ScopedOffset const& offset) { restore_to(offset.offset, offset.line); }
|
|
|
|
OwnPtr<ScopedOffset> push_start();
|
|
Offset current_position();
|
|
|
|
StringView m_input;
|
|
size_t m_offset { 0 };
|
|
|
|
AST::Position::Line m_line { 0, 0 };
|
|
|
|
Vector<size_t> m_rule_start_offsets;
|
|
Vector<AST::Position::Line> m_rule_start_lines;
|
|
|
|
OwnPtr<Function<bool()>> m_end_condition;
|
|
Vector<HeredocInitiationRecord> m_heredoc_initiations;
|
|
Vector<char> m_extra_chars_not_allowed_in_barewords;
|
|
bool m_is_in_brace_expansion_spec { false };
|
|
bool m_continuation_controls_allowed { false };
|
|
bool m_in_interactive_mode { false };
|
|
};
|
|
|
|
#if 0
|
|
constexpr auto the_grammar = R"(
|
|
toplevel :: sequence?
|
|
|
|
sequence :: variable_decls? or_logical_sequence terminator sequence
|
|
| variable_decls? or_logical_sequence '&' sequence
|
|
| variable_decls? or_logical_sequence
|
|
| variable_decls? function_decl (terminator sequence)?
|
|
| variable_decls? terminator sequence
|
|
|
|
function_decl :: identifier '(' (ws* identifier)* ')' ws* '{' [!c] toplevel '}'
|
|
|
|
or_logical_sequence :: and_logical_sequence '|' '|' and_logical_sequence
|
|
| and_logical_sequence
|
|
|
|
and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence
|
|
| pipe_sequence
|
|
|
|
terminator :: ';'
|
|
| '\n' [?!heredoc_stack.is_empty] heredoc_entries
|
|
|
|
heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries]
|
|
|
|
variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '*
|
|
| identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '*
|
|
|
|
pipe_sequence :: command '|' '&'? pipe_sequence
|
|
| command
|
|
| control_structure '|' '&'? pipe_sequence
|
|
| control_structure
|
|
|
|
control_structure[c] :: for_expr
|
|
| loop_expr
|
|
| if_expr
|
|
| subshell
|
|
| match_expr
|
|
| ?c: continuation_control
|
|
|
|
continuation_control :: 'break'
|
|
| 'continue'
|
|
|
|
for_expr :: 'for' ws+ (('index' ' '+ identifier ' '+)? identifier ' '+ 'in' ws*)? expression ws+ '{' [c] toplevel '}'
|
|
|
|
loop_expr :: 'loop' ws* '{' [c] toplevel '}'
|
|
|
|
if_expr :: 'if' ws+ or_logical_sequence ws+ '{' toplevel '}' else_clause?
|
|
|
|
else_clause :: else '{' toplevel '}'
|
|
| else if_expr
|
|
|
|
subshell :: '{' toplevel '}'
|
|
|
|
match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
|
|
|
|
match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
|
|
| regex_pattern ws* '{' toplevel '}'
|
|
|
|
identifier_list :: '(' (identifier ws*)* ')'
|
|
|
|
regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)*
|
|
|
|
match_pattern :: expression (ws* '|' ws* expression)*
|
|
|
|
regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses }
|
|
|
|
command :: redirection command
|
|
| list_expression command?
|
|
|
|
redirection :: number? '>'{1,2} ' '* string_composite
|
|
| number? '<' ' '* string_composite
|
|
| number? '>' '&' number
|
|
| number? '>' '&' '-'
|
|
|
|
list_expression :: ' '* expression (' '+ list_expression)?
|
|
|
|
expression :: evaluate expression?
|
|
| string_composite expression?
|
|
| comment expression?
|
|
| immediate_expression expression?
|
|
| history_designator expression?
|
|
| '(' list_expression ')' expression?
|
|
|
|
evaluate :: '$' '(' pipe_sequence ')'
|
|
| '$' [lookahead != '('] expression {eval / dynamic resolve}
|
|
|
|
string_composite :: string string_composite?
|
|
| variable string_composite?
|
|
| bareword string_composite?
|
|
| glob string_composite?
|
|
| brace_expansion string_composite?
|
|
| heredoc_initiator string_composite? {append to heredoc_entries}
|
|
|
|
heredoc_initiator :: '<' '<' '-' bareword {*bareword, interpolate, no deindent}
|
|
| '<' '<' '-' "'" [^']* "'" {*string, no interpolate, no deindent}
|
|
| '<' '<' '~' bareword {*bareword, interpolate, deindent}
|
|
| '<' '<' '~' "'" [^']* "'" {*bareword, no interpolate, deindent}
|
|
|
|
string :: '"' dquoted_string_inner '"'
|
|
| "'" [^']* "'"
|
|
|
|
dquoted_string_inner :: '\' . dquoted_string_inner? {concat}
|
|
| variable dquoted_string_inner? {compose}
|
|
| . dquoted_string_inner?
|
|
| '\' 'x' xdigit*2 dquoted_string_inner?
|
|
| '\' 'u' xdigit*8 dquoted_string_inner?
|
|
| '\' [abefrnt] dquoted_string_inner?
|
|
|
|
variable :: variable_ref slice?
|
|
|
|
variable_ref :: '$' identifier
|
|
| '$' '$'
|
|
| '$' '?'
|
|
| '$' '*'
|
|
| '$' '#'
|
|
| ...
|
|
|
|
slice :: '[' brace_expansion_spec ']'
|
|
|
|
comment :: '#' [^\n]*
|
|
|
|
immediate_expression :: '$' '{' immediate_function expression* '}'
|
|
|
|
immediate_function :: identifier { predetermined list of names, see Shell.h:ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS }
|
|
|
|
history_designator :: '!' event_selector (':' word_selector_composite)?
|
|
|
|
event_selector :: '!' {== '-0'}
|
|
| '?' bareword '?'
|
|
| bareword {number: index, otherwise: lookup}
|
|
|
|
word_selector_composite :: word_selector ('-' word_selector)?
|
|
|
|
word_selector :: number
|
|
| '^' {== 0}
|
|
| '$' {== end}
|
|
|
|
bareword :: [^"'*$&#|()[\]{} ?;<>] bareword?
|
|
| '\' [^"'*$&#|()[\]{} ?;<>] bareword?
|
|
|
|
bareword_with_tilde_expansion :: '~' bareword?
|
|
|
|
glob :: [*?] bareword?
|
|
| bareword [*?]
|
|
|
|
brace_expansion :: '{' brace_expansion_spec '}'
|
|
|
|
brace_expansion_spec :: expression? (',' expression?)*
|
|
| expression '..' expression
|
|
)";
|
|
#endif
|
|
|
|
}
|