Parser.h 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. /*
  2. * Copyright (c) 2020, the SerenityOS developers.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #pragma once
  27. #include "AST.h"
  28. #include <AK/Function.h>
  29. #include <AK/RefPtr.h>
  30. #include <AK/String.h>
  31. #include <AK/StringBuilder.h>
  32. #include <AK/Vector.h>
  33. namespace Shell {
  34. class Parser {
  35. public:
  36. Parser(StringView input)
  37. : m_input(move(input))
  38. {
  39. }
  40. RefPtr<AST::Node> parse();
  41. /// Parse the given string *as* an expression
  42. /// that is to forefully enclose it in double-quotes.
  43. RefPtr<AST::Node> parse_as_single_expression();
  44. NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions();
  45. struct SavedOffset {
  46. size_t offset;
  47. AST::Position::Line line;
  48. };
  49. SavedOffset save_offset() const;
  50. private:
  51. enum class ShouldReadMoreSequences {
  52. Yes,
  53. No,
  54. };
  55. struct SequenceParseResult {
  56. NonnullRefPtrVector<AST::Node> entries;
  57. Vector<AST::Position, 1> separator_positions;
  58. ShouldReadMoreSequences decision;
  59. };
  60. constexpr static size_t max_allowed_nested_rule_depth = 2048;
  61. RefPtr<AST::Node> parse_toplevel();
  62. SequenceParseResult parse_sequence();
  63. RefPtr<AST::Node> parse_function_decl();
  64. RefPtr<AST::Node> parse_and_logical_sequence();
  65. RefPtr<AST::Node> parse_or_logical_sequence();
  66. RefPtr<AST::Node> parse_variable_decls();
  67. RefPtr<AST::Node> parse_pipe_sequence();
  68. RefPtr<AST::Node> parse_command();
  69. RefPtr<AST::Node> parse_control_structure();
  70. RefPtr<AST::Node> parse_continuation_control();
  71. RefPtr<AST::Node> parse_for_loop();
  72. RefPtr<AST::Node> parse_loop_loop();
  73. RefPtr<AST::Node> parse_if_expr();
  74. RefPtr<AST::Node> parse_subshell();
  75. RefPtr<AST::Node> parse_match_expr();
  76. AST::MatchEntry parse_match_entry();
  77. RefPtr<AST::Node> parse_match_pattern();
  78. RefPtr<AST::Node> parse_redirection();
  79. RefPtr<AST::Node> parse_list_expression();
  80. RefPtr<AST::Node> parse_expression();
  81. RefPtr<AST::Node> parse_string_composite();
  82. RefPtr<AST::Node> parse_string();
  83. RefPtr<AST::Node> parse_doublequoted_string_inner();
  84. RefPtr<AST::Node> parse_variable();
  85. RefPtr<AST::Node> parse_evaluate();
  86. RefPtr<AST::Node> parse_history_designator();
  87. RefPtr<AST::Node> parse_comment();
  88. RefPtr<AST::Node> parse_bareword();
  89. RefPtr<AST::Node> parse_glob();
  90. RefPtr<AST::Node> parse_brace_expansion();
  91. RefPtr<AST::Node> parse_brace_expansion_spec();
  92. template<typename A, typename... Args>
  93. NonnullRefPtr<A> create(Args... args);
  94. bool at_end() const { return m_input.length() <= m_offset; }
  95. char peek();
  96. char consume();
  97. bool expect(char);
  98. bool expect(const StringView&);
  99. bool next_is(const StringView&);
  100. void restore_to(size_t offset, AST::Position::Line line)
  101. {
  102. m_offset = offset;
  103. m_line = move(line);
  104. }
  105. AST::Position::Line line() const { return m_line; }
  106. StringView consume_while(Function<bool(char)>);
  107. struct Offset {
  108. size_t offset;
  109. AST::Position::Line line;
  110. };
  111. struct ScopedOffset {
  112. ScopedOffset(Vector<size_t>& offsets, Vector<AST::Position::Line>& lines, size_t offset, size_t lineno, size_t linecol)
  113. : offsets(offsets)
  114. , lines(lines)
  115. , offset(offset)
  116. , line({ lineno, linecol })
  117. {
  118. offsets.append(offset);
  119. lines.append(line);
  120. }
  121. ~ScopedOffset()
  122. {
  123. auto last = offsets.take_last();
  124. ASSERT(last == offset);
  125. auto last_line = lines.take_last();
  126. ASSERT(last_line == line);
  127. }
  128. Vector<size_t>& offsets;
  129. Vector<AST::Position::Line>& lines;
  130. size_t offset;
  131. AST::Position::Line line;
  132. };
  133. void restore_to(const ScopedOffset& offset) { restore_to(offset.offset, offset.line); }
  134. OwnPtr<ScopedOffset> push_start();
  135. Offset current_position();
  136. StringView m_input;
  137. size_t m_offset { 0 };
  138. AST::Position::Line m_line { 0, 0 };
  139. Vector<size_t> m_rule_start_offsets;
  140. Vector<AST::Position::Line> m_rule_start_lines;
  141. Vector<char> m_extra_chars_not_allowed_in_barewords;
  142. bool m_is_in_brace_expansion_spec { false };
  143. bool m_continuation_controls_allowed { false };
  144. };
  145. #if 0
  146. constexpr auto the_grammar = R"(
  147. toplevel :: sequence?
  148. sequence :: variable_decls? or_logical_sequence terminator sequence
  149. | variable_decls? or_logical_sequence '&' sequence
  150. | variable_decls? or_logical_sequence
  151. | variable_decls? function_decl (terminator sequence)?
  152. | variable_decls? terminator sequence
  153. function_decl :: identifier '(' (ws* identifier)* ')' ws* '{' [!c] toplevel '}'
  154. or_logical_sequence :: and_logical_sequence '|' '|' and_logical_sequence
  155. | and_logical_sequence
  156. and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence
  157. | pipe_sequence
  158. terminator :: ';'
  159. | '\n'
  160. variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '*
  161. | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '*
  162. pipe_sequence :: command '|' pipe_sequence
  163. | command
  164. | control_structure '|' pipe_sequence
  165. | control_structure
  166. control_structure[c] :: for_expr
  167. | loop_expr
  168. | if_expr
  169. | subshell
  170. | match_expr
  171. | ?c: continuation_control
  172. continuation_control :: 'break'
  173. | 'continue'
  174. for_expr :: 'for' ws+ (identifier ' '+ 'in' ws*)? expression ws+ '{' [c] toplevel '}'
  175. loop_expr :: 'loop' ws* '{' [c] toplevel '}'
  176. if_expr :: 'if' ws+ or_logical_sequence ws+ '{' toplevel '}' else_clause?
  177. else_clause :: else '{' toplevel '}'
  178. | else if_expr
  179. subshell :: '{' toplevel '}'
  180. match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
  181. match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
  182. identifier_list :: '(' (identifier ws*)* ')'
  183. match_pattern :: expression (ws* '|' ws* expression)*
  184. command :: redirection command
  185. | list_expression command?
  186. redirection :: number? '>'{1,2} ' '* string_composite
  187. | number? '<' ' '* string_composite
  188. | number? '>' '&' number
  189. | number? '>' '&' '-'
  190. list_expression :: ' '* expression (' '+ list_expression)?
  191. expression :: evaluate expression?
  192. | string_composite expression?
  193. | comment expression?
  194. | history_designator expression?
  195. | '(' list_expression ')' expression?
  196. evaluate :: '$' '(' pipe_sequence ')'
  197. | '$' expression {eval / dynamic resolve}
  198. string_composite :: string string_composite?
  199. | variable string_composite?
  200. | bareword string_composite?
  201. | glob string_composite?
  202. | brace_expansion string_composite?
  203. string :: '"' dquoted_string_inner '"'
  204. | "'" [^']* "'"
  205. dquoted_string_inner :: '\' . dquoted_string_inner? {concat}
  206. | variable dquoted_string_inner? {compose}
  207. | . dquoted_string_inner?
  208. | '\' 'x' digit digit dquoted_string_inner?
  209. | '\' [abefrn] dquoted_string_inner?
  210. variable :: '$' identifier
  211. | '$' '$'
  212. | '$' '?'
  213. | '$' '*'
  214. | '$' '#'
  215. | ...
  216. comment :: '#' [^\n]*
  217. history_designator :: '!' event_selector (':' word_selector_composite)?
  218. event_selector :: '!' {== '-0'}
  219. | '?' bareword '?'
  220. | bareword {number: index, otherwise: lookup}
  221. word_selector_composite :: word_selector ('-' word_selector)?
  222. word_selector :: number
  223. | '^' {== 0}
  224. | '$' {== end}
  225. bareword :: [^"'*$&#|()[\]{} ?;<>] bareword?
  226. | '\' [^"'*$&#|()[\]{} ?;<>] bareword?
  227. bareword_with_tilde_expansion :: '~' bareword?
  228. glob :: [*?] bareword?
  229. | bareword [*?]
  230. brace_expansion :: '{' brace_expansion_spec '}'
  231. brace_expansion_spec :: expression? (',' expression?)*
  232. | expression '..' expression
  233. )";
  234. #endif
  235. }