123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322 |
- /*
- * Copyright (c) 2020, the SerenityOS developers.
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #pragma once
- #include "AST.h"
- #include <AK/Function.h>
- #include <AK/RefPtr.h>
- #include <AK/String.h>
- #include <AK/StringBuilder.h>
- #include <AK/Vector.h>
- namespace Shell {
- class Parser {
- public:
- Parser(StringView input, bool interactive = false)
- : m_input(move(input))
- , m_in_interactive_mode(interactive)
- {
- }
- RefPtr<AST::Node> parse();
- /// Parse the given string *as* an expression
- /// that is to forefully enclose it in double-quotes.
- RefPtr<AST::Node> parse_as_single_expression();
- NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions();
- struct SavedOffset {
- size_t offset;
- AST::Position::Line line;
- };
- SavedOffset save_offset() const;
- private:
- enum class ShouldReadMoreSequences {
- Yes,
- No,
- };
- enum class StringEndCondition {
- DoubleQuote,
- Heredoc,
- };
- struct SequenceParseResult {
- NonnullRefPtrVector<AST::Node> entries;
- Vector<AST::Position, 1> separator_positions;
- ShouldReadMoreSequences decision;
- };
- struct HeredocInitiationRecord {
- String end;
- RefPtr<AST::Heredoc> node;
- bool interpolate { false };
- bool deindent { false };
- };
- constexpr static size_t max_allowed_nested_rule_depth = 2048;
- RefPtr<AST::Node> parse_toplevel();
- SequenceParseResult parse_sequence();
- RefPtr<AST::Node> parse_function_decl();
- RefPtr<AST::Node> parse_and_logical_sequence();
- RefPtr<AST::Node> parse_or_logical_sequence();
- RefPtr<AST::Node> parse_variable_decls();
- RefPtr<AST::Node> parse_pipe_sequence();
- RefPtr<AST::Node> parse_command();
- RefPtr<AST::Node> parse_control_structure();
- RefPtr<AST::Node> parse_continuation_control();
- RefPtr<AST::Node> parse_for_loop();
- RefPtr<AST::Node> parse_loop_loop();
- RefPtr<AST::Node> parse_if_expr();
- RefPtr<AST::Node> parse_subshell();
- RefPtr<AST::Node> parse_match_expr();
- AST::MatchEntry parse_match_entry();
- RefPtr<AST::Node> parse_match_pattern();
- RefPtr<AST::Node> parse_redirection();
- RefPtr<AST::Node> parse_list_expression();
- RefPtr<AST::Node> parse_expression();
- RefPtr<AST::Node> parse_string_composite();
- RefPtr<AST::Node> parse_string();
- RefPtr<AST::Node> parse_string_inner(StringEndCondition);
- RefPtr<AST::Node> parse_variable();
- RefPtr<AST::Node> parse_variable_ref();
- RefPtr<AST::Slice> parse_slice();
- RefPtr<AST::Node> parse_evaluate();
- RefPtr<AST::Node> parse_history_designator();
- RefPtr<AST::Node> parse_comment();
- RefPtr<AST::Node> parse_bareword();
- RefPtr<AST::Node> parse_glob();
- RefPtr<AST::Node> parse_brace_expansion();
- RefPtr<AST::Node> parse_brace_expansion_spec();
- RefPtr<AST::Node> parse_immediate_expression();
- RefPtr<AST::Node> parse_heredoc_initiation_record();
- bool parse_heredoc_entries();
- template<typename A, typename... Args>
- NonnullRefPtr<A> create(Args... args);
- void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); }
- bool at_end() const
- {
- if (m_end_condition && (*m_end_condition)())
- return true;
- return m_input.length() <= m_offset;
- }
- char peek();
- char consume();
- bool expect(char);
- bool expect(StringView);
- bool next_is(StringView);
- void restore_to(size_t offset, AST::Position::Line line)
- {
- m_offset = offset;
- m_line = move(line);
- }
- AST::Position::Line line() const { return m_line; }
- StringView consume_while(Function<bool(char)>);
- struct Offset {
- size_t offset;
- AST::Position::Line line;
- };
- struct ScopedOffset {
- ScopedOffset(Vector<size_t>& offsets, Vector<AST::Position::Line>& lines, size_t offset, size_t lineno, size_t linecol)
- : offsets(offsets)
- , lines(lines)
- , offset(offset)
- , line({ lineno, linecol })
- {
- offsets.append(offset);
- lines.append(line);
- }
- ~ScopedOffset()
- {
- auto last = offsets.take_last();
- VERIFY(last == offset);
- auto last_line = lines.take_last();
- VERIFY(last_line == line);
- }
- Vector<size_t>& offsets;
- Vector<AST::Position::Line>& lines;
- size_t offset;
- AST::Position::Line line;
- };
- void restore_to(const ScopedOffset& offset) { restore_to(offset.offset, offset.line); }
- OwnPtr<ScopedOffset> push_start();
- Offset current_position();
- StringView m_input;
- size_t m_offset { 0 };
- AST::Position::Line m_line { 0, 0 };
- Vector<size_t> m_rule_start_offsets;
- Vector<AST::Position::Line> m_rule_start_lines;
- OwnPtr<Function<bool()>> m_end_condition;
- Vector<HeredocInitiationRecord> m_heredoc_initiations;
- Vector<char> m_extra_chars_not_allowed_in_barewords;
- bool m_is_in_brace_expansion_spec { false };
- bool m_continuation_controls_allowed { false };
- bool m_in_interactive_mode { false };
- };
- #if 0
- constexpr auto the_grammar = R"(
- toplevel :: sequence?
- sequence :: variable_decls? or_logical_sequence terminator sequence
- | variable_decls? or_logical_sequence '&' sequence
- | variable_decls? or_logical_sequence
- | variable_decls? function_decl (terminator sequence)?
- | variable_decls? terminator sequence
- function_decl :: identifier '(' (ws* identifier)* ')' ws* '{' [!c] toplevel '}'
- or_logical_sequence :: and_logical_sequence '|' '|' and_logical_sequence
- | and_logical_sequence
- and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence
- | pipe_sequence
- terminator :: ';'
- | '\n' [?!heredoc_stack.is_empty] heredoc_entries
- heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries]
- variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '*
- | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '*
- pipe_sequence :: command '|' pipe_sequence
- | command
- | control_structure '|' pipe_sequence
- | control_structure
- control_structure[c] :: for_expr
- | loop_expr
- | if_expr
- | subshell
- | match_expr
- | ?c: continuation_control
- continuation_control :: 'break'
- | 'continue'
- for_expr :: 'for' ws+ (('index' ' '+ identifier ' '+)? identifier ' '+ 'in' ws*)? expression ws+ '{' [c] toplevel '}'
- loop_expr :: 'loop' ws* '{' [c] toplevel '}'
- if_expr :: 'if' ws+ or_logical_sequence ws+ '{' toplevel '}' else_clause?
- else_clause :: else '{' toplevel '}'
- | else if_expr
- subshell :: '{' toplevel '}'
- match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
- match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
- identifier_list :: '(' (identifier ws*)* ')'
- match_pattern :: expression (ws* '|' ws* expression)*
- command :: redirection command
- | list_expression command?
- redirection :: number? '>'{1,2} ' '* string_composite
- | number? '<' ' '* string_composite
- | number? '>' '&' number
- | number? '>' '&' '-'
- list_expression :: ' '* expression (' '+ list_expression)?
- expression :: evaluate expression?
- | string_composite expression?
- | comment expression?
- | immediate_expression expression?
- | history_designator expression?
- | '(' list_expression ')' expression?
- evaluate :: '$' '(' pipe_sequence ')'
- | '$' [lookahead != '('] expression {eval / dynamic resolve}
- string_composite :: string string_composite?
- | variable string_composite?
- | bareword string_composite?
- | glob string_composite?
- | brace_expansion string_composite?
- | heredoc_initiator string_composite? {append to heredoc_entries}
- heredoc_initiator :: '<' '<' '-' bareword {*bareword, interpolate, no deindent}
- | '<' '<' '-' "'" [^']* "'" {*string, no interpolate, no deindent}
- | '<' '<' '~' bareword {*bareword, interpolate, deindent}
- | '<' '<' '~' "'" [^']* "'" {*bareword, no interpolate, deindent}
- string :: '"' dquoted_string_inner '"'
- | "'" [^']* "'"
- dquoted_string_inner :: '\' . dquoted_string_inner? {concat}
- | variable dquoted_string_inner? {compose}
- | . dquoted_string_inner?
- | '\' 'x' xdigit*2 dquoted_string_inner?
- | '\' 'u' xdigit*8 dquoted_string_inner?
- | '\' [abefrnt] dquoted_string_inner?
- variable :: variable_ref slice?
- variable_ref :: '$' identifier
- | '$' '$'
- | '$' '?'
- | '$' '*'
- | '$' '#'
- | ...
- slice :: '[' brace_expansion_spec ']'
- comment :: '#' [^\n]*
- immediate_expression :: '$' '{' immediate_function expression* '}'
- immediate_function :: identifier { predetermined list of names, see Shell.h:ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS }
- history_designator :: '!' event_selector (':' word_selector_composite)?
- event_selector :: '!' {== '-0'}
- | '?' bareword '?'
- | bareword {number: index, otherwise: lookup}
- word_selector_composite :: word_selector ('-' word_selector)?
- word_selector :: number
- | '^' {== 0}
- | '$' {== end}
- bareword :: [^"'*$&#|()[\]{} ?;<>] bareword?
- | '\' [^"'*$&#|()[\]{} ?;<>] bareword?
- bareword_with_tilde_expansion :: '~' bareword?
- glob :: [*?] bareword?
- | bareword [*?]
- brace_expansion :: '{' brace_expansion_spec '}'
- brace_expansion_spec :: expression? (',' expression?)*
- | expression '..' expression
- )";
- #endif
- }
|