Parser.cpp 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315
  1. /*
  2. * Copyright (c) 2020, the SerenityOS developers.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "Parser.h"
  27. #include <ctype.h>
  28. #include <stdio.h>
  29. #include <unistd.h>
  30. char Parser::peek()
  31. {
  32. if (m_offset == m_input.length())
  33. return 0;
  34. ASSERT(m_offset < m_input.length());
  35. auto ch = m_input[m_offset];
  36. if (ch == '\\' && m_input.length() > m_offset + 1 && m_input[m_offset + 1] == '\n') {
  37. m_offset += 2;
  38. return peek();
  39. }
  40. return ch;
  41. }
  42. char Parser::consume()
  43. {
  44. auto ch = peek();
  45. ++m_offset;
  46. return ch;
  47. }
  48. void Parser::putback()
  49. {
  50. ASSERT(m_offset > 0);
  51. --m_offset;
  52. }
  53. bool Parser::expect(char ch)
  54. {
  55. return expect(StringView { &ch, 1 });
  56. }
  57. bool Parser::expect(const StringView& expected)
  58. {
  59. auto offset_at_start = m_offset;
  60. if (expected.length() + m_offset > m_input.length())
  61. return false;
  62. for (size_t i = 0; i < expected.length(); ++i) {
  63. if (peek() != expected[i]) {
  64. m_offset = offset_at_start;
  65. return false;
  66. }
  67. consume();
  68. }
  69. return true;
  70. }
  71. template<typename A, typename... Args>
  72. NonnullRefPtr<A> Parser::create(Args... args)
  73. {
  74. return adopt(*new A(AST::Position { m_rule_start_offsets.last(), m_offset }, args...));
  75. }
  76. [[nodiscard]] OwnPtr<Parser::ScopedOffset> Parser::push_start()
  77. {
  78. return make<ScopedOffset>(m_rule_start_offsets, m_offset);
  79. }
  80. static constexpr bool is_whitespace(char c)
  81. {
  82. return c == ' ' || c == '\t';
  83. }
  84. static constexpr bool is_word_character(char c)
  85. {
  86. return (c <= '9' && c >= '0') || (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '_';
  87. }
  88. static constexpr bool is_digit(char c)
  89. {
  90. return c <= '9' && c >= '0';
  91. }
  92. static constexpr auto is_not(char c)
  93. {
  94. return [c](char ch) { return ch != c; };
  95. }
  96. static constexpr auto is_any_of(StringView s)
  97. {
  98. return [s](char ch) { return s.contains(ch); };
  99. }
  100. static inline char to_byte(char a, char b)
  101. {
  102. char buf[3] { a, b, 0 };
  103. return strtol(buf, nullptr, 16);
  104. }
  105. RefPtr<AST::Node> Parser::parse()
  106. {
  107. m_offset = 0;
  108. auto toplevel = parse_toplevel();
  109. if (m_offset < m_input.length()) {
  110. // Parsing stopped midway, this is a syntax error.
  111. auto error_start = push_start();
  112. m_offset = m_input.length();
  113. auto syntax_error_node = create<AST::SyntaxError>("Unexpected tokens past the end");
  114. if (!toplevel)
  115. toplevel = move(syntax_error_node);
  116. else
  117. toplevel->set_is_syntax_error(*syntax_error_node);
  118. }
  119. return toplevel;
  120. }
  121. RefPtr<AST::Node> Parser::parse_toplevel()
  122. {
  123. auto rule_start = push_start();
  124. if (auto sequence = parse_sequence())
  125. return create<AST::Execute>(sequence.release_nonnull());
  126. return nullptr;
  127. }
  128. RefPtr<AST::Node> Parser::parse_sequence()
  129. {
  130. consume_while(is_any_of(" \t\n;")); // ignore whitespaces or terminators without effect.
  131. auto rule_start = push_start();
  132. auto var_decls = parse_variable_decls();
  133. switch (peek()) {
  134. case '}':
  135. return var_decls;
  136. case ';':
  137. case '\n': {
  138. if (!var_decls)
  139. break;
  140. consume_while(is_any_of("\n;"));
  141. auto rest = parse_sequence();
  142. if (rest)
  143. return create<AST::Sequence>(var_decls.release_nonnull(), rest.release_nonnull());
  144. return var_decls;
  145. }
  146. default:
  147. break;
  148. }
  149. auto first = parse_function_decl();
  150. if (!first)
  151. first = parse_or_logical_sequence();
  152. if (!first)
  153. return var_decls;
  154. if (var_decls)
  155. first = create<AST::Sequence>(var_decls.release_nonnull(), first.release_nonnull());
  156. consume_while(is_whitespace);
  157. switch (peek()) {
  158. case ';':
  159. case '\n':
  160. consume_while(is_any_of("\n;"));
  161. if (auto expr = parse_sequence()) {
  162. return create<AST::Sequence>(first.release_nonnull(), expr.release_nonnull()); // Sequence
  163. }
  164. return first;
  165. case '&': {
  166. auto execute_pipe_seq = first->would_execute() ? first.release_nonnull() : static_cast<NonnullRefPtr<AST::Node>>(create<AST::Execute>(first.release_nonnull()));
  167. consume();
  168. auto bg = create<AST::Background>(execute_pipe_seq); // Execute Background
  169. if (auto rest = parse_sequence())
  170. return create<AST::Sequence>(move(bg), rest.release_nonnull()); // Sequence Background Sequence
  171. return bg;
  172. }
  173. default:
  174. return first;
  175. }
  176. }
  177. RefPtr<AST::Node> Parser::parse_variable_decls()
  178. {
  179. auto rule_start = push_start();
  180. consume_while(is_whitespace);
  181. auto offset_before_name = m_offset;
  182. auto var_name = consume_while(is_word_character);
  183. if (var_name.is_empty())
  184. return nullptr;
  185. if (!expect('=')) {
  186. m_offset = offset_before_name;
  187. return nullptr;
  188. }
  189. auto name_expr = create<AST::BarewordLiteral>(move(var_name));
  190. auto start = push_start();
  191. auto expression = parse_expression();
  192. if (!expression || expression->is_syntax_error()) {
  193. m_offset = start->offset;
  194. if (peek() == '(') {
  195. consume();
  196. auto command = parse_pipe_sequence();
  197. if (!command)
  198. m_offset = start->offset;
  199. else if (!expect(')'))
  200. command->set_is_syntax_error(*create<AST::SyntaxError>("Expected a terminating close paren"));
  201. expression = command;
  202. }
  203. }
  204. if (!expression) {
  205. if (is_whitespace(peek())) {
  206. auto string_start = push_start();
  207. expression = create<AST::StringLiteral>("");
  208. } else {
  209. m_offset = offset_before_name;
  210. return nullptr;
  211. }
  212. }
  213. Vector<AST::VariableDeclarations::Variable> variables;
  214. variables.append({ move(name_expr), expression.release_nonnull() });
  215. if (consume_while(is_whitespace).is_empty())
  216. return create<AST::VariableDeclarations>(move(variables));
  217. auto rest = parse_variable_decls();
  218. if (!rest)
  219. return create<AST::VariableDeclarations>(move(variables));
  220. ASSERT(rest->is_variable_decls());
  221. auto* rest_decl = static_cast<AST::VariableDeclarations*>(rest.ptr());
  222. variables.append(rest_decl->variables());
  223. return create<AST::VariableDeclarations>(move(variables));
  224. }
  225. RefPtr<AST::Node> Parser::parse_function_decl()
  226. {
  227. auto rule_start = push_start();
  228. auto restore = [&] {
  229. m_offset = rule_start->offset;
  230. return nullptr;
  231. };
  232. consume_while(is_whitespace);
  233. auto offset_before_name = m_offset;
  234. auto function_name = consume_while(is_word_character);
  235. auto offset_after_name = m_offset;
  236. if (function_name.is_empty())
  237. return restore();
  238. if (!expect('('))
  239. return restore();
  240. Vector<AST::FunctionDeclaration::NameWithPosition> arguments;
  241. for (;;) {
  242. consume_while(is_whitespace);
  243. if (expect(')'))
  244. break;
  245. auto name_offset = m_offset;
  246. auto arg_name = consume_while(is_word_character);
  247. if (arg_name.is_empty()) {
  248. // FIXME: Should this be a syntax error, or just return?
  249. return restore();
  250. }
  251. arguments.append({ arg_name, { name_offset, m_offset } });
  252. }
  253. consume_while(is_whitespace);
  254. {
  255. RefPtr<AST::Node> syntax_error;
  256. {
  257. auto obrace_error_start = push_start();
  258. syntax_error = create<AST::SyntaxError>("Expected an open brace '{' to start a function body");
  259. }
  260. if (!expect('{')) {
  261. return create<AST::FunctionDeclaration>(
  262. AST::FunctionDeclaration::NameWithPosition {
  263. move(function_name),
  264. { offset_before_name, offset_after_name } },
  265. move(arguments),
  266. move(syntax_error));
  267. }
  268. }
  269. auto body = parse_toplevel();
  270. {
  271. RefPtr<AST::SyntaxError> syntax_error;
  272. {
  273. auto cbrace_error_start = push_start();
  274. syntax_error = create<AST::SyntaxError>("Expected a close brace '}' to end a function body");
  275. }
  276. if (!expect('}')) {
  277. if (body)
  278. body->set_is_syntax_error(*syntax_error);
  279. else
  280. body = move(syntax_error);
  281. return create<AST::FunctionDeclaration>(
  282. AST::FunctionDeclaration::NameWithPosition {
  283. move(function_name),
  284. { offset_before_name, offset_after_name } },
  285. move(arguments),
  286. move(body));
  287. }
  288. }
  289. return create<AST::FunctionDeclaration>(
  290. AST::FunctionDeclaration::NameWithPosition {
  291. move(function_name),
  292. { offset_before_name, offset_after_name } },
  293. move(arguments),
  294. move(body));
  295. }
  296. RefPtr<AST::Node> Parser::parse_or_logical_sequence()
  297. {
  298. consume_while(is_whitespace);
  299. auto rule_start = push_start();
  300. auto and_sequence = parse_and_logical_sequence();
  301. if (!and_sequence)
  302. return nullptr;
  303. consume_while(is_whitespace);
  304. auto saved_offset = m_offset;
  305. if (!expect("||")) {
  306. m_offset = saved_offset;
  307. return and_sequence;
  308. }
  309. auto right_and_sequence = parse_and_logical_sequence();
  310. if (!right_and_sequence)
  311. right_and_sequence = create<AST::SyntaxError>("Expected an expression after '||'");
  312. return create<AST::Or>(and_sequence.release_nonnull(), right_and_sequence.release_nonnull());
  313. }
  314. RefPtr<AST::Node> Parser::parse_and_logical_sequence()
  315. {
  316. consume_while(is_whitespace);
  317. auto rule_start = push_start();
  318. auto pipe_sequence = parse_pipe_sequence();
  319. if (!pipe_sequence)
  320. return nullptr;
  321. consume_while(is_whitespace);
  322. auto saved_offset = m_offset;
  323. if (!expect("&&")) {
  324. m_offset = saved_offset;
  325. return pipe_sequence;
  326. }
  327. auto right_and_sequence = parse_and_logical_sequence();
  328. if (!right_and_sequence)
  329. right_and_sequence = create<AST::SyntaxError>("Expected an expression after '&&'");
  330. return create<AST::And>(pipe_sequence.release_nonnull(), right_and_sequence.release_nonnull());
  331. }
  332. RefPtr<AST::Node> Parser::parse_pipe_sequence()
  333. {
  334. auto rule_start = push_start();
  335. auto left = parse_control_structure();
  336. if (!left) {
  337. if (auto cmd = parse_command())
  338. left = cmd;
  339. else
  340. return nullptr;
  341. }
  342. consume_while(is_whitespace);
  343. if (peek() != '|')
  344. return left;
  345. consume();
  346. if (auto pipe_seq = parse_pipe_sequence()) {
  347. return create<AST::Pipe>(left.release_nonnull(), pipe_seq.release_nonnull()); // Pipe
  348. }
  349. putback();
  350. return left;
  351. }
  352. RefPtr<AST::Node> Parser::parse_command()
  353. {
  354. auto rule_start = push_start();
  355. consume_while(is_whitespace);
  356. auto redir = parse_redirection();
  357. if (!redir) {
  358. auto list_expr = parse_list_expression();
  359. if (!list_expr)
  360. return nullptr;
  361. auto cast = create<AST::CastToCommand>(list_expr.release_nonnull()); // Cast List Command
  362. auto next_command = parse_command();
  363. if (!next_command)
  364. return cast;
  365. return create<AST::Join>(move(cast), next_command.release_nonnull()); // Join List Command
  366. }
  367. auto command = parse_command();
  368. if (!command)
  369. return redir;
  370. return create<AST::Join>(redir.release_nonnull(), command.release_nonnull()); // Join Command Command
  371. }
  372. RefPtr<AST::Node> Parser::parse_control_structure()
  373. {
  374. auto rule_start = push_start();
  375. consume_while(is_whitespace);
  376. if (auto for_loop = parse_for_loop())
  377. return for_loop;
  378. if (auto if_expr = parse_if_expr())
  379. return if_expr;
  380. if (auto subshell = parse_subshell())
  381. return subshell;
  382. if (auto match = parse_match_expr())
  383. return match;
  384. return nullptr;
  385. }
  386. RefPtr<AST::Node> Parser::parse_for_loop()
  387. {
  388. auto rule_start = push_start();
  389. if (!expect("for")) {
  390. m_offset = rule_start->offset;
  391. return nullptr;
  392. }
  393. if (consume_while(is_any_of(" \t\n")).is_empty()) {
  394. m_offset = rule_start->offset;
  395. return nullptr;
  396. }
  397. auto variable_name = consume_while(is_word_character);
  398. Optional<size_t> in_start_position;
  399. if (variable_name.is_empty()) {
  400. variable_name = "it";
  401. } else {
  402. consume_while(is_whitespace);
  403. auto in_error_start = push_start();
  404. in_start_position = in_error_start->offset;
  405. if (!expect("in")) {
  406. auto syntax_error = create<AST::SyntaxError>("Expected 'in' after a variable name in a 'for' loop");
  407. return create<AST::ForLoop>(move(variable_name), move(syntax_error), nullptr); // ForLoop Var Iterated Block
  408. }
  409. }
  410. consume_while(is_whitespace);
  411. RefPtr<AST::Node> iterated_expression;
  412. {
  413. auto iter_error_start = push_start();
  414. iterated_expression = parse_expression();
  415. if (!iterated_expression) {
  416. auto syntax_error = create<AST::SyntaxError>("Expected an expression in 'for' loop");
  417. return create<AST::ForLoop>(move(variable_name), move(syntax_error), nullptr, move(in_start_position)); // ForLoop Var Iterated Block
  418. }
  419. }
  420. consume_while(is_any_of(" \t\n"));
  421. {
  422. auto obrace_error_start = push_start();
  423. if (!expect('{')) {
  424. auto syntax_error = create<AST::SyntaxError>("Expected an open brace '{' to start a 'for' loop body");
  425. return create<AST::ForLoop>(move(variable_name), iterated_expression.release_nonnull(), move(syntax_error), move(in_start_position)); // ForLoop Var Iterated Block
  426. }
  427. }
  428. auto body = parse_toplevel();
  429. {
  430. auto cbrace_error_start = push_start();
  431. if (!expect('}')) {
  432. auto error_start = push_start();
  433. auto syntax_error = create<AST::SyntaxError>("Expected a close brace '}' to end a 'for' loop body");
  434. if (body)
  435. body->set_is_syntax_error(*syntax_error);
  436. else
  437. body = syntax_error;
  438. }
  439. }
  440. return create<AST::ForLoop>(move(variable_name), iterated_expression.release_nonnull(), move(body), move(in_start_position)); // ForLoop Var Iterated Block
  441. }
  442. RefPtr<AST::Node> Parser::parse_if_expr()
  443. {
  444. auto rule_start = push_start();
  445. if (!expect("if")) {
  446. m_offset = rule_start->offset;
  447. return nullptr;
  448. }
  449. if (consume_while(is_any_of(" \t\n")).is_empty()) {
  450. m_offset = rule_start->offset;
  451. return nullptr;
  452. }
  453. RefPtr<AST::Node> condition;
  454. {
  455. auto cond_error_start = push_start();
  456. condition = parse_or_logical_sequence();
  457. if (!condition)
  458. condition = create<AST::SyntaxError>("Expected a logical sequence after 'if'");
  459. }
  460. auto parse_braced_toplevel = [&]() -> RefPtr<AST::Node> {
  461. RefPtr<AST::Node> body;
  462. {
  463. auto obrace_error_start = push_start();
  464. if (!expect('{')) {
  465. body = create<AST::SyntaxError>("Expected an open brace '{' to start an 'if' true branch");
  466. }
  467. }
  468. if (!body)
  469. body = parse_toplevel();
  470. {
  471. auto cbrace_error_start = push_start();
  472. if (!expect('}')) {
  473. auto error_start = push_start();
  474. RefPtr<AST::SyntaxError> syntax_error = create<AST::SyntaxError>("Expected a close brace '}' to end an 'if' true branch");
  475. if (body)
  476. body->set_is_syntax_error(*syntax_error);
  477. else
  478. body = syntax_error;
  479. }
  480. }
  481. return body;
  482. };
  483. consume_while(is_whitespace);
  484. auto true_branch = parse_braced_toplevel();
  485. consume_while(is_whitespace);
  486. Optional<AST::Position> else_position;
  487. {
  488. auto else_start = push_start();
  489. if (expect("else"))
  490. else_position = AST::Position { else_start->offset, m_offset };
  491. }
  492. if (else_position.has_value()) {
  493. consume_while(is_whitespace);
  494. if (peek() == '{') {
  495. auto false_branch = parse_braced_toplevel();
  496. return create<AST::IfCond>(else_position, condition.release_nonnull(), move(true_branch), move(false_branch)); // If expr true_branch Else false_branch
  497. }
  498. auto else_if_branch = parse_if_expr();
  499. return create<AST::IfCond>(else_position, condition.release_nonnull(), move(true_branch), move(else_if_branch)); // If expr true_branch Else If ...
  500. }
  501. return create<AST::IfCond>(else_position, condition.release_nonnull(), move(true_branch), nullptr); // If expr true_branch
  502. }
  503. RefPtr<AST::Node> Parser::parse_subshell()
  504. {
  505. auto rule_start = push_start();
  506. if (!expect('{'))
  507. return nullptr;
  508. auto body = parse_toplevel();
  509. {
  510. auto cbrace_error_start = push_start();
  511. if (!expect('}')) {
  512. auto error_start = push_start();
  513. RefPtr<AST::SyntaxError> syntax_error = create<AST::SyntaxError>("Expected a close brace '}' to end a subshell");
  514. if (body)
  515. body->set_is_syntax_error(*syntax_error);
  516. else
  517. body = syntax_error;
  518. }
  519. }
  520. return create<AST::Subshell>(move(body));
  521. }
  522. RefPtr<AST::Node> Parser::parse_match_expr()
  523. {
  524. auto rule_start = push_start();
  525. if (!expect("match"))
  526. return nullptr;
  527. if (consume_while(is_whitespace).is_empty()) {
  528. m_offset = rule_start->offset;
  529. return nullptr;
  530. }
  531. auto match_expression = parse_expression();
  532. if (!match_expression) {
  533. return create<AST::MatchExpr>(
  534. create<AST::SyntaxError>("Expected an expression after 'match'"),
  535. String {}, Optional<AST::Position> {}, Vector<AST::MatchEntry> {});
  536. }
  537. consume_while(is_any_of(" \t\n"));
  538. String match_name;
  539. Optional<AST::Position> as_position;
  540. auto as_start = m_offset;
  541. if (expect("as")) {
  542. as_position = AST::Position { as_start, m_offset };
  543. if (consume_while(is_any_of(" \t\n")).is_empty()) {
  544. auto node = create<AST::MatchExpr>(
  545. match_expression.release_nonnull(),
  546. String {}, move(as_position), Vector<AST::MatchEntry> {});
  547. node->set_is_syntax_error(create<AST::SyntaxError>("Expected whitespace after 'as' in 'match'"));
  548. return node;
  549. }
  550. match_name = consume_while(is_word_character);
  551. if (match_name.is_empty()) {
  552. auto node = create<AST::MatchExpr>(
  553. match_expression.release_nonnull(),
  554. String {}, move(as_position), Vector<AST::MatchEntry> {});
  555. node->set_is_syntax_error(create<AST::SyntaxError>("Expected an identifier after 'as' in 'match'"));
  556. return node;
  557. }
  558. }
  559. consume_while(is_any_of(" \t\n"));
  560. if (!expect('{')) {
  561. auto node = create<AST::MatchExpr>(
  562. match_expression.release_nonnull(),
  563. move(match_name), move(as_position), Vector<AST::MatchEntry> {});
  564. node->set_is_syntax_error(create<AST::SyntaxError>("Expected an open brace '{' to start a 'match' entry list"));
  565. return node;
  566. }
  567. consume_while(is_any_of(" \t\n"));
  568. Vector<AST::MatchEntry> entries;
  569. for (;;) {
  570. auto entry = parse_match_entry();
  571. consume_while(is_any_of(" \t\n"));
  572. if (entry.options.is_empty())
  573. break;
  574. entries.append(entry);
  575. }
  576. consume_while(is_any_of(" \t\n"));
  577. if (!expect('}')) {
  578. auto node = create<AST::MatchExpr>(
  579. match_expression.release_nonnull(),
  580. move(match_name), move(as_position), move(entries));
  581. node->set_is_syntax_error(create<AST::SyntaxError>("Expected a close brace '}' to end a 'match' entry list"));
  582. return node;
  583. }
  584. return create<AST::MatchExpr>(match_expression.release_nonnull(), move(match_name), move(as_position), move(entries));
  585. }
  586. AST::MatchEntry Parser::parse_match_entry()
  587. {
  588. auto rule_start = push_start();
  589. NonnullRefPtrVector<AST::Node> patterns;
  590. Vector<AST::Position> pipe_positions;
  591. auto pattern = parse_match_pattern();
  592. if (!pattern)
  593. return { {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body") };
  594. patterns.append(pattern.release_nonnull());
  595. consume_while(is_any_of(" \t\n"));
  596. auto previous_pipe_start_position = m_offset;
  597. RefPtr<AST::SyntaxError> error;
  598. while (expect('|')) {
  599. pipe_positions.append({ previous_pipe_start_position, m_offset });
  600. consume_while(is_any_of(" \t\n"));
  601. auto pattern = parse_match_pattern();
  602. if (!pattern) {
  603. error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body");
  604. break;
  605. }
  606. consume_while(is_any_of(" \t\n"));
  607. patterns.append(pattern.release_nonnull());
  608. }
  609. consume_while(is_any_of(" \t\n"));
  610. if (!expect('{')) {
  611. if (!error)
  612. error = create<AST::SyntaxError>("Expected an open brace '{' to start a match entry body");
  613. }
  614. auto body = parse_toplevel();
  615. if (!expect('}')) {
  616. if (!error)
  617. error = create<AST::SyntaxError>("Expected a close brace '}' to end a match entry body");
  618. }
  619. if (body && error)
  620. body->set_is_syntax_error(*error);
  621. else if (error)
  622. body = error;
  623. return { move(patterns), move(pipe_positions), move(body) };
  624. }
  625. RefPtr<AST::Node> Parser::parse_match_pattern()
  626. {
  627. return parse_expression();
  628. }
  629. RefPtr<AST::Node> Parser::parse_redirection()
  630. {
  631. auto rule_start = push_start();
  632. auto pipe_fd = 0;
  633. auto number = consume_while(is_digit);
  634. if (number.is_empty()) {
  635. pipe_fd = -1;
  636. } else {
  637. auto fd = number.to_int();
  638. ASSERT(fd.has_value());
  639. pipe_fd = fd.value();
  640. }
  641. switch (peek()) {
  642. case '>': {
  643. consume();
  644. if (peek() == '>') {
  645. consume();
  646. consume_while(is_whitespace);
  647. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO;
  648. auto path = parse_expression();
  649. if (!path) {
  650. if (!at_end()) {
  651. // Eat a character and hope the problem goes away
  652. consume();
  653. }
  654. path = create<AST::SyntaxError>("Expected a path after redirection");
  655. }
  656. return create<AST::WriteAppendRedirection>(pipe_fd, path.release_nonnull()); // Redirection WriteAppend
  657. }
  658. if (peek() == '&') {
  659. consume();
  660. // FIXME: 'fd>&-' Syntax not the best. needs discussion.
  661. if (peek() == '-') {
  662. consume();
  663. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO;
  664. return create<AST::CloseFdRedirection>(pipe_fd); // Redirection CloseFd
  665. }
  666. int dest_pipe_fd = 0;
  667. auto number = consume_while(is_digit);
  668. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO;
  669. if (number.is_empty()) {
  670. dest_pipe_fd = -1;
  671. } else {
  672. auto fd = number.to_int();
  673. ASSERT(fd.has_value());
  674. dest_pipe_fd = fd.value();
  675. }
  676. auto redir = create<AST::Fd2FdRedirection>(pipe_fd, dest_pipe_fd); // Redirection Fd2Fd
  677. if (dest_pipe_fd == -1)
  678. redir->set_is_syntax_error(*create<AST::SyntaxError>("Expected a file descriptor"));
  679. return redir;
  680. }
  681. consume_while(is_whitespace);
  682. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO;
  683. auto path = parse_expression();
  684. if (!path) {
  685. if (!at_end()) {
  686. // Eat a character and hope the problem goes away
  687. consume();
  688. }
  689. path = create<AST::SyntaxError>("Expected a path after redirection");
  690. }
  691. return create<AST::WriteRedirection>(pipe_fd, path.release_nonnull()); // Redirection Write
  692. }
  693. case '<': {
  694. consume();
  695. enum {
  696. Read,
  697. ReadWrite,
  698. } mode { Read };
  699. if (peek() == '>') {
  700. mode = ReadWrite;
  701. consume();
  702. }
  703. consume_while(is_whitespace);
  704. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDIN_FILENO;
  705. auto path = parse_expression();
  706. if (!path) {
  707. if (!at_end()) {
  708. // Eat a character and hope the problem goes away
  709. consume();
  710. }
  711. path = create<AST::SyntaxError>("Expected a path after redirection");
  712. }
  713. if (mode == Read)
  714. return create<AST::ReadRedirection>(pipe_fd, path.release_nonnull()); // Redirection Read
  715. return create<AST::ReadWriteRedirection>(pipe_fd, path.release_nonnull()); // Redirection ReadWrite
  716. }
  717. default:
  718. m_offset = rule_start->offset;
  719. return nullptr;
  720. }
  721. }
  722. RefPtr<AST::Node> Parser::parse_list_expression()
  723. {
  724. consume_while(is_whitespace);
  725. auto rule_start = push_start();
  726. Vector<NonnullRefPtr<AST::Node>> nodes;
  727. do {
  728. auto expr = parse_expression();
  729. if (!expr)
  730. break;
  731. nodes.append(expr.release_nonnull());
  732. } while (!consume_while(is_whitespace).is_empty());
  733. if (nodes.is_empty())
  734. return nullptr;
  735. return create<AST::ListConcatenate>(move(nodes)); // Concatenate List
  736. }
  737. RefPtr<AST::Node> Parser::parse_expression()
  738. {
  739. auto rule_start = push_start();
  740. auto starting_char = peek();
  741. auto read_concat = [&](auto&& expr) -> NonnullRefPtr<AST::Node> {
  742. if (is_whitespace(peek()))
  743. return move(expr);
  744. if (auto next_expr = parse_expression())
  745. return create<AST::Juxtaposition>(move(expr), next_expr.release_nonnull());
  746. return move(expr);
  747. };
  748. if (strchr("&|){} ;<>\n", starting_char) != nullptr)
  749. return nullptr;
  750. if (isdigit(starting_char)) {
  751. ScopedValueRollback offset_rollback { m_offset };
  752. auto redir = parse_redirection();
  753. if (redir)
  754. return nullptr;
  755. }
  756. if (starting_char == '$') {
  757. if (auto variable = parse_variable())
  758. return read_concat(variable.release_nonnull());
  759. if (auto inline_exec = parse_evaluate())
  760. return read_concat(inline_exec.release_nonnull());
  761. }
  762. if (starting_char == '#')
  763. return parse_comment();
  764. if (starting_char == '(') {
  765. consume();
  766. auto list = parse_list_expression();
  767. if (!expect(')')) {
  768. m_offset = rule_start->offset;
  769. return nullptr;
  770. }
  771. return read_concat(create<AST::CastToList>(move(list))); // Cast To List
  772. }
  773. if (auto composite = parse_string_composite())
  774. return read_concat(composite.release_nonnull());
  775. return nullptr;
  776. }
  777. RefPtr<AST::Node> Parser::parse_string_composite()
  778. {
  779. auto rule_start = push_start();
  780. if (auto string = parse_string()) {
  781. if (auto next_part = parse_string_composite())
  782. return create<AST::Juxtaposition>(string.release_nonnull(), next_part.release_nonnull()); // Concatenate String StringComposite
  783. return string;
  784. }
  785. if (auto variable = parse_variable()) {
  786. if (auto next_part = parse_string_composite())
  787. return create<AST::Juxtaposition>(variable.release_nonnull(), next_part.release_nonnull()); // Concatenate Variable StringComposite
  788. return variable;
  789. }
  790. if (auto glob = parse_glob()) {
  791. if (auto next_part = parse_string_composite())
  792. return create<AST::Juxtaposition>(glob.release_nonnull(), next_part.release_nonnull()); // Concatenate Glob StringComposite
  793. return glob;
  794. }
  795. if (auto bareword = parse_bareword()) {
  796. if (auto next_part = parse_string_composite())
  797. return create<AST::Juxtaposition>(bareword.release_nonnull(), next_part.release_nonnull()); // Concatenate Bareword StringComposite
  798. return bareword;
  799. }
  800. if (auto inline_command = parse_evaluate()) {
  801. if (auto next_part = parse_string_composite())
  802. return create<AST::Juxtaposition>(inline_command.release_nonnull(), next_part.release_nonnull()); // Concatenate Execute StringComposite
  803. return inline_command;
  804. }
  805. return nullptr;
  806. }
  807. RefPtr<AST::Node> Parser::parse_string()
  808. {
  809. auto rule_start = push_start();
  810. if (at_end())
  811. return nullptr;
  812. if (peek() == '"') {
  813. consume();
  814. auto inner = parse_doublequoted_string_inner();
  815. if (!inner)
  816. inner = create<AST::SyntaxError>("Unexpected EOF in string");
  817. if (!expect('"')) {
  818. inner = create<AST::DoubleQuotedString>(move(inner));
  819. inner->set_is_syntax_error(*create<AST::SyntaxError>("Expected a terminating double quote"));
  820. return inner;
  821. }
  822. return create<AST::DoubleQuotedString>(move(inner)); // Double Quoted String
  823. }
  824. if (peek() == '\'') {
  825. consume();
  826. auto text = consume_while(is_not('\''));
  827. bool is_error = false;
  828. if (!expect('\''))
  829. is_error = true;
  830. auto result = create<AST::StringLiteral>(move(text)); // String Literal
  831. if (is_error)
  832. result->set_is_syntax_error(*create<AST::SyntaxError>("Expected a terminating single quote"));
  833. return move(result);
  834. }
  835. return nullptr;
  836. }
  837. RefPtr<AST::Node> Parser::parse_doublequoted_string_inner()
  838. {
  839. auto rule_start = push_start();
  840. if (at_end())
  841. return nullptr;
  842. StringBuilder builder;
  843. while (!at_end() && peek() != '"') {
  844. if (peek() == '\\') {
  845. consume();
  846. if (at_end()) {
  847. break;
  848. }
  849. auto ch = consume();
  850. switch (ch) {
  851. case '\\':
  852. default:
  853. builder.append(ch);
  854. break;
  855. case 'x': {
  856. if (m_input.length() <= m_offset + 2)
  857. break;
  858. auto first_nibble = tolower(consume());
  859. auto second_nibble = tolower(consume());
  860. if (!isxdigit(first_nibble) || !isxdigit(second_nibble)) {
  861. builder.append(first_nibble);
  862. builder.append(second_nibble);
  863. break;
  864. }
  865. builder.append(to_byte(first_nibble, second_nibble));
  866. break;
  867. }
  868. case 'a':
  869. builder.append('\a');
  870. break;
  871. case 'b':
  872. builder.append('\b');
  873. break;
  874. case 'e':
  875. builder.append('\x1b');
  876. break;
  877. case 'f':
  878. builder.append('\f');
  879. break;
  880. case 'r':
  881. builder.append('\r');
  882. break;
  883. case 'n':
  884. builder.append('\n');
  885. break;
  886. }
  887. continue;
  888. }
  889. if (peek() == '$') {
  890. auto string_literal = create<AST::StringLiteral>(builder.to_string()); // String Literal
  891. if (auto variable = parse_variable()) {
  892. auto inner = create<AST::StringPartCompose>(
  893. move(string_literal),
  894. variable.release_nonnull()); // Compose String Variable
  895. if (auto string = parse_doublequoted_string_inner()) {
  896. return create<AST::StringPartCompose>(move(inner), string.release_nonnull()); // Compose Composition Composition
  897. }
  898. return inner;
  899. }
  900. if (auto evaluate = parse_evaluate()) {
  901. auto composition = create<AST::StringPartCompose>(
  902. move(string_literal),
  903. evaluate.release_nonnull()); // Compose String Sequence
  904. if (auto string = parse_doublequoted_string_inner()) {
  905. return create<AST::StringPartCompose>(move(composition), string.release_nonnull()); // Compose Composition Composition
  906. }
  907. return composition;
  908. }
  909. }
  910. builder.append(consume());
  911. }
  912. return create<AST::StringLiteral>(builder.to_string()); // String Literal
  913. }
  914. RefPtr<AST::Node> Parser::parse_variable()
  915. {
  916. auto rule_start = push_start();
  917. if (at_end())
  918. return nullptr;
  919. if (peek() != '$')
  920. return nullptr;
  921. consume();
  922. switch (peek()) {
  923. case '$':
  924. case '?':
  925. case '*':
  926. case '#':
  927. return create<AST::SpecialVariable>(consume()); // Variable Special
  928. default:
  929. break;
  930. }
  931. auto name = consume_while(is_word_character);
  932. if (name.length() == 0) {
  933. putback();
  934. return nullptr;
  935. }
  936. return create<AST::SimpleVariable>(move(name)); // Variable Simple
  937. }
  938. RefPtr<AST::Node> Parser::parse_evaluate()
  939. {
  940. auto rule_start = push_start();
  941. if (at_end())
  942. return nullptr;
  943. if (peek() != '$')
  944. return nullptr;
  945. consume();
  946. if (peek() == '(') {
  947. consume();
  948. auto inner = parse_pipe_sequence();
  949. if (!inner)
  950. inner = create<AST::SyntaxError>("Unexpected EOF in list");
  951. if (!expect(')'))
  952. inner->set_is_syntax_error(*create<AST::SyntaxError>("Expected a terminating close paren"));
  953. return create<AST::Execute>(inner.release_nonnull(), true);
  954. }
  955. auto inner = parse_expression();
  956. if (!inner) {
  957. inner = create<AST::SyntaxError>("Expected a command");
  958. } else {
  959. if (inner->is_list()) {
  960. auto execute_inner = create<AST::Execute>(inner.release_nonnull(), true);
  961. inner = move(execute_inner);
  962. } else {
  963. auto dyn_inner = create<AST::DynamicEvaluate>(inner.release_nonnull());
  964. inner = move(dyn_inner);
  965. }
  966. }
  967. return inner;
  968. }
  969. RefPtr<AST::Node> Parser::parse_comment()
  970. {
  971. if (at_end())
  972. return nullptr;
  973. if (peek() != '#')
  974. return nullptr;
  975. consume();
  976. auto text = consume_while(is_not('\n'));
  977. return create<AST::Comment>(move(text)); // Comment
  978. }
  979. RefPtr<AST::Node> Parser::parse_bareword()
  980. {
  981. auto rule_start = push_start();
  982. StringBuilder builder;
  983. auto is_acceptable_bareword_character = [](char c) {
  984. return strchr("\\\"'*$&#|(){} ?;<>\n", c) == nullptr;
  985. };
  986. while (!at_end()) {
  987. char ch = peek();
  988. if (ch == '\\') {
  989. consume();
  990. if (!at_end()) {
  991. ch = consume();
  992. if (is_acceptable_bareword_character(ch))
  993. builder.append('\\');
  994. }
  995. builder.append(ch);
  996. continue;
  997. }
  998. if (is_acceptable_bareword_character(ch)) {
  999. builder.append(consume());
  1000. continue;
  1001. }
  1002. break;
  1003. }
  1004. if (builder.is_empty())
  1005. return nullptr;
  1006. auto current_end = m_offset;
  1007. auto string = builder.to_string();
  1008. if (string.starts_with('~')) {
  1009. String username;
  1010. RefPtr<AST::Node> tilde, text;
  1011. auto first_slash_index = string.index_of("/");
  1012. if (first_slash_index.has_value()) {
  1013. username = string.substring_view(1, first_slash_index.value() - 1);
  1014. string = string.substring_view(first_slash_index.value(), string.length() - first_slash_index.value());
  1015. } else {
  1016. username = string.substring_view(1, string.length() - 1);
  1017. string = "";
  1018. }
  1019. // Synthesize a Tilde Node with the correct positioning information.
  1020. {
  1021. m_offset -= string.length();
  1022. tilde = create<AST::Tilde>(move(username));
  1023. }
  1024. if (string.is_empty())
  1025. return tilde;
  1026. // Synthesize a BarewordLiteral Node with the correct positioning information.
  1027. {
  1028. m_offset = tilde->position().end_offset;
  1029. auto text_start = push_start();
  1030. m_offset = current_end;
  1031. text = create<AST::BarewordLiteral>(move(string));
  1032. }
  1033. return create<AST::Juxtaposition>(tilde.release_nonnull(), text.release_nonnull()); // Juxtaposition Varible Bareword
  1034. }
  1035. if (string.starts_with("\\~")) {
  1036. // Un-escape the tilde, but only at the start (where it would be an expansion)
  1037. string = string.substring(1, string.length() - 1);
  1038. }
  1039. return create<AST::BarewordLiteral>(move(string)); // Bareword Literal
  1040. }
  1041. RefPtr<AST::Node> Parser::parse_glob()
  1042. {
  1043. auto rule_start = push_start();
  1044. auto bareword_part = parse_bareword();
  1045. if (at_end())
  1046. return bareword_part;
  1047. char ch = peek();
  1048. if (ch == '*' || ch == '?') {
  1049. consume();
  1050. StringBuilder textbuilder;
  1051. if (bareword_part) {
  1052. StringView text;
  1053. if (bareword_part->is_bareword()) {
  1054. auto bareword = static_cast<AST::BarewordLiteral*>(bareword_part.ptr());
  1055. text = bareword->text();
  1056. } else {
  1057. // FIXME: Allow composition of tilde+bareword with globs: '~/foo/bar/baz*'
  1058. putback();
  1059. bareword_part->set_is_syntax_error(*create<AST::SyntaxError>(String::format("Unexpected %s inside a glob", bareword_part->class_name().characters())));
  1060. return bareword_part;
  1061. }
  1062. textbuilder.append(text);
  1063. }
  1064. textbuilder.append(ch);
  1065. auto glob_after = parse_glob();
  1066. if (glob_after) {
  1067. if (glob_after->is_glob()) {
  1068. auto glob = static_cast<AST::BarewordLiteral*>(glob_after.ptr());
  1069. textbuilder.append(glob->text());
  1070. } else if (glob_after->is_bareword()) {
  1071. auto bareword = static_cast<AST::BarewordLiteral*>(glob_after.ptr());
  1072. textbuilder.append(bareword->text());
  1073. } else {
  1074. ASSERT_NOT_REACHED();
  1075. }
  1076. }
  1077. return create<AST::Glob>(textbuilder.to_string()); // Glob
  1078. }
  1079. return bareword_part;
  1080. }
  1081. StringView Parser::consume_while(Function<bool(char)> condition)
  1082. {
  1083. auto start_offset = m_offset;
  1084. while (!at_end() && condition(peek()))
  1085. consume();
  1086. return m_input.substring_view(start_offset, m_offset - start_offset);
  1087. }