Parser.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. * Copyright (c) 2020, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include "AST.h"
  8. #include <AK/Function.h>
  9. #include <AK/RefPtr.h>
  10. #include <AK/String.h>
  11. #include <AK/StringBuilder.h>
  12. #include <AK/Vector.h>
  13. namespace Shell {
  14. class Parser {
  15. public:
  16. Parser(StringView input, bool interactive = false)
  17. : m_input(move(input))
  18. , m_in_interactive_mode(interactive)
  19. {
  20. }
  21. RefPtr<AST::Node> parse();
  22. /// Parse the given string *as* an expression
  23. /// that is to forefully enclose it in double-quotes.
  24. RefPtr<AST::Node> parse_as_single_expression();
  25. NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions();
  26. struct SavedOffset {
  27. size_t offset;
  28. AST::Position::Line line;
  29. };
  30. SavedOffset save_offset() const;
  31. private:
  32. enum class ShouldReadMoreSequences {
  33. Yes,
  34. No,
  35. };
  36. enum class StringEndCondition {
  37. DoubleQuote,
  38. Heredoc,
  39. };
  40. struct SequenceParseResult {
  41. NonnullRefPtrVector<AST::Node> entries;
  42. Vector<AST::Position, 1> separator_positions;
  43. ShouldReadMoreSequences decision;
  44. };
  45. struct HeredocInitiationRecord {
  46. String end;
  47. RefPtr<AST::Heredoc> node;
  48. bool interpolate { false };
  49. bool deindent { false };
  50. };
  51. constexpr static size_t max_allowed_nested_rule_depth = 2048;
  52. RefPtr<AST::Node> parse_toplevel();
  53. SequenceParseResult parse_sequence();
  54. RefPtr<AST::Node> parse_function_decl();
  55. RefPtr<AST::Node> parse_and_logical_sequence();
  56. RefPtr<AST::Node> parse_or_logical_sequence();
  57. RefPtr<AST::Node> parse_variable_decls();
  58. RefPtr<AST::Node> parse_pipe_sequence();
  59. RefPtr<AST::Node> parse_command();
  60. RefPtr<AST::Node> parse_control_structure();
  61. RefPtr<AST::Node> parse_continuation_control();
  62. RefPtr<AST::Node> parse_for_loop();
  63. RefPtr<AST::Node> parse_loop_loop();
  64. RefPtr<AST::Node> parse_if_expr();
  65. RefPtr<AST::Node> parse_subshell();
  66. RefPtr<AST::Node> parse_match_expr();
  67. AST::MatchEntry parse_match_entry();
  68. RefPtr<AST::Node> parse_match_pattern();
  69. RefPtr<AST::Node> parse_redirection();
  70. RefPtr<AST::Node> parse_list_expression();
  71. RefPtr<AST::Node> parse_expression();
  72. RefPtr<AST::Node> parse_string_composite();
  73. RefPtr<AST::Node> parse_string();
  74. RefPtr<AST::Node> parse_string_inner(StringEndCondition);
  75. RefPtr<AST::Node> parse_variable();
  76. RefPtr<AST::Node> parse_variable_ref();
  77. RefPtr<AST::Slice> parse_slice();
  78. RefPtr<AST::Node> parse_evaluate();
  79. RefPtr<AST::Node> parse_history_designator();
  80. RefPtr<AST::Node> parse_comment();
  81. RefPtr<AST::Node> parse_bareword();
  82. RefPtr<AST::Node> parse_glob();
  83. RefPtr<AST::Node> parse_brace_expansion();
  84. RefPtr<AST::Node> parse_brace_expansion_spec();
  85. RefPtr<AST::Node> parse_immediate_expression();
  86. RefPtr<AST::Node> parse_heredoc_initiation_record();
  87. bool parse_heredoc_entries();
  88. template<typename A, typename... Args>
  89. NonnullRefPtr<A> create(Args... args);
  90. void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); }
  91. bool at_end() const
  92. {
  93. if (m_end_condition && (*m_end_condition)())
  94. return true;
  95. return m_input.length() <= m_offset;
  96. }
  97. char peek();
  98. char consume();
  99. bool expect(char);
  100. bool expect(StringView);
  101. bool next_is(StringView);
  102. void restore_to(size_t offset, AST::Position::Line line)
  103. {
  104. m_offset = offset;
  105. m_line = move(line);
  106. }
  107. AST::Position::Line line() const { return m_line; }
  108. StringView consume_while(Function<bool(char)>);
  109. struct Offset {
  110. size_t offset;
  111. AST::Position::Line line;
  112. };
  113. struct ScopedOffset {
  114. ScopedOffset(Vector<size_t>& offsets, Vector<AST::Position::Line>& lines, size_t offset, size_t lineno, size_t linecol)
  115. : offsets(offsets)
  116. , lines(lines)
  117. , offset(offset)
  118. , line({ lineno, linecol })
  119. {
  120. offsets.append(offset);
  121. lines.append(line);
  122. }
  123. ~ScopedOffset()
  124. {
  125. auto last = offsets.take_last();
  126. VERIFY(last == offset);
  127. auto last_line = lines.take_last();
  128. VERIFY(last_line == line);
  129. }
  130. Vector<size_t>& offsets;
  131. Vector<AST::Position::Line>& lines;
  132. size_t offset;
  133. AST::Position::Line line;
  134. };
  135. void restore_to(const ScopedOffset& offset) { restore_to(offset.offset, offset.line); }
  136. OwnPtr<ScopedOffset> push_start();
  137. Offset current_position();
  138. StringView m_input;
  139. size_t m_offset { 0 };
  140. AST::Position::Line m_line { 0, 0 };
  141. Vector<size_t> m_rule_start_offsets;
  142. Vector<AST::Position::Line> m_rule_start_lines;
  143. OwnPtr<Function<bool()>> m_end_condition;
  144. Vector<HeredocInitiationRecord> m_heredoc_initiations;
  145. Vector<char> m_extra_chars_not_allowed_in_barewords;
  146. bool m_is_in_brace_expansion_spec { false };
  147. bool m_continuation_controls_allowed { false };
  148. bool m_in_interactive_mode { false };
  149. };
  150. #if 0
  151. constexpr auto the_grammar = R"(
  152. toplevel :: sequence?
  153. sequence :: variable_decls? or_logical_sequence terminator sequence
  154. | variable_decls? or_logical_sequence '&' sequence
  155. | variable_decls? or_logical_sequence
  156. | variable_decls? function_decl (terminator sequence)?
  157. | variable_decls? terminator sequence
  158. function_decl :: identifier '(' (ws* identifier)* ')' ws* '{' [!c] toplevel '}'
  159. or_logical_sequence :: and_logical_sequence '|' '|' and_logical_sequence
  160. | and_logical_sequence
  161. and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence
  162. | pipe_sequence
  163. terminator :: ';'
  164. | '\n' [?!heredoc_stack.is_empty] heredoc_entries
  165. heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries]
  166. variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '*
  167. | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '*
  168. pipe_sequence :: command '|' pipe_sequence
  169. | command
  170. | control_structure '|' pipe_sequence
  171. | control_structure
  172. control_structure[c] :: for_expr
  173. | loop_expr
  174. | if_expr
  175. | subshell
  176. | match_expr
  177. | ?c: continuation_control
  178. continuation_control :: 'break'
  179. | 'continue'
  180. for_expr :: 'for' ws+ (('index' ' '+ identifier ' '+)? identifier ' '+ 'in' ws*)? expression ws+ '{' [c] toplevel '}'
  181. loop_expr :: 'loop' ws* '{' [c] toplevel '}'
  182. if_expr :: 'if' ws+ or_logical_sequence ws+ '{' toplevel '}' else_clause?
  183. else_clause :: else '{' toplevel '}'
  184. | else if_expr
  185. subshell :: '{' toplevel '}'
  186. match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
  187. match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
  188. identifier_list :: '(' (identifier ws*)* ')'
  189. match_pattern :: expression (ws* '|' ws* expression)*
  190. command :: redirection command
  191. | list_expression command?
  192. redirection :: number? '>'{1,2} ' '* string_composite
  193. | number? '<' ' '* string_composite
  194. | number? '>' '&' number
  195. | number? '>' '&' '-'
  196. list_expression :: ' '* expression (' '+ list_expression)?
  197. expression :: evaluate expression?
  198. | string_composite expression?
  199. | comment expression?
  200. | immediate_expression expression?
  201. | history_designator expression?
  202. | '(' list_expression ')' expression?
  203. evaluate :: '$' '(' pipe_sequence ')'
  204. | '$' [lookahead != '('] expression {eval / dynamic resolve}
  205. string_composite :: string string_composite?
  206. | variable string_composite?
  207. | bareword string_composite?
  208. | glob string_composite?
  209. | brace_expansion string_composite?
  210. | heredoc_initiator string_composite? {append to heredoc_entries}
  211. heredoc_initiator :: '<' '<' '-' bareword {*bareword, interpolate, no deindent}
  212. | '<' '<' '-' "'" [^']* "'" {*string, no interpolate, no deindent}
  213. | '<' '<' '~' bareword {*bareword, interpolate, deindent}
  214. | '<' '<' '~' "'" [^']* "'" {*bareword, no interpolate, deindent}
  215. string :: '"' dquoted_string_inner '"'
  216. | "'" [^']* "'"
  217. dquoted_string_inner :: '\' . dquoted_string_inner? {concat}
  218. | variable dquoted_string_inner? {compose}
  219. | . dquoted_string_inner?
  220. | '\' 'x' xdigit*2 dquoted_string_inner?
  221. | '\' 'u' xdigit*8 dquoted_string_inner?
  222. | '\' [abefrnt] dquoted_string_inner?
  223. variable :: variable_ref slice?
  224. variable_ref :: '$' identifier
  225. | '$' '$'
  226. | '$' '?'
  227. | '$' '*'
  228. | '$' '#'
  229. | ...
  230. slice :: '[' brace_expansion_spec ']'
  231. comment :: '#' [^\n]*
  232. immediate_expression :: '$' '{' immediate_function expression* '}'
  233. immediate_function :: identifier { predetermined list of names, see Shell.h:ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS }
  234. history_designator :: '!' event_selector (':' word_selector_composite)?
  235. event_selector :: '!' {== '-0'}
  236. | '?' bareword '?'
  237. | bareword {number: index, otherwise: lookup}
  238. word_selector_composite :: word_selector ('-' word_selector)?
  239. word_selector :: number
  240. | '^' {== 0}
  241. | '$' {== end}
  242. bareword :: [^"'*$&#|()[\]{} ?;<>] bareword?
  243. | '\' [^"'*$&#|()[\]{} ?;<>] bareword?
  244. bareword_with_tilde_expansion :: '~' bareword?
  245. glob :: [*?] bareword?
  246. | bareword [*?]
  247. brace_expansion :: '{' brace_expansion_spec '}'
  248. brace_expansion_spec :: expression? (',' expression?)*
  249. | expression '..' expression
  250. )";
  251. #endif
  252. }