Parser.cpp 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076
  1. /*
  2. * Copyright (c) 2020, the SerenityOS developers.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "Parser.h"
  27. #include <ctype.h>
  28. #include <stdio.h>
  29. #include <unistd.h>
  30. char Parser::peek()
  31. {
  32. if (m_offset == m_input.length())
  33. return 0;
  34. ASSERT(m_offset < m_input.length());
  35. return m_input[m_offset];
  36. }
  37. char Parser::consume()
  38. {
  39. auto ch = peek();
  40. ++m_offset;
  41. return ch;
  42. }
  43. void Parser::putback()
  44. {
  45. ASSERT(m_offset > 0);
  46. --m_offset;
  47. }
  48. bool Parser::expect(char ch)
  49. {
  50. return expect(StringView { &ch, 1 });
  51. }
  52. bool Parser::expect(const StringView& expected)
  53. {
  54. if (expected.length() + m_offset > m_input.length())
  55. return false;
  56. for (size_t i = 0; i < expected.length(); ++i) {
  57. if (peek() != expected[i])
  58. return false;
  59. consume();
  60. }
  61. return true;
  62. }
  63. template<typename A, typename... Args>
  64. NonnullRefPtr<A> Parser::create(Args... args)
  65. {
  66. return adopt(*new A(AST::Position { m_rule_start_offsets.last(), m_offset }, args...));
  67. }
  68. [[nodiscard]] OwnPtr<Parser::ScopedOffset> Parser::push_start()
  69. {
  70. return make<ScopedOffset>(m_rule_start_offsets, m_offset);
  71. }
  72. static constexpr bool is_whitespace(char c)
  73. {
  74. return c == ' ' || c == '\t';
  75. }
  76. static constexpr bool is_word_character(char c)
  77. {
  78. return (c <= '9' && c >= '0') || (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '_';
  79. }
  80. static constexpr bool is_digit(char c)
  81. {
  82. return c <= '9' && c >= '0';
  83. }
  84. static constexpr auto is_not(char c)
  85. {
  86. return [c](char ch) { return ch != c; };
  87. }
  88. static constexpr auto is_any_of(StringView s)
  89. {
  90. return [s](char ch) { return s.contains(ch); };
  91. }
  92. static inline char to_byte(char a, char b)
  93. {
  94. char buf[3] { a, b, 0 };
  95. return strtol(buf, nullptr, 16);
  96. }
  97. RefPtr<AST::Node> Parser::parse()
  98. {
  99. m_offset = 0;
  100. auto toplevel = parse_toplevel();
  101. if (m_offset < m_input.length()) {
  102. // Parsing stopped midway, this is a syntax error.
  103. auto error_start = push_start();
  104. m_offset = m_input.length();
  105. auto syntax_error_node = create<AST::SyntaxError>("Unexpected tokens past the end");
  106. if (toplevel)
  107. return create<AST::Join>(move(toplevel), move(syntax_error_node));
  108. return syntax_error_node;
  109. }
  110. return toplevel;
  111. }
  112. RefPtr<AST::Node> Parser::parse_toplevel()
  113. {
  114. auto rule_start = push_start();
  115. if (auto sequence = parse_sequence())
  116. return create<AST::Execute>(sequence);
  117. return nullptr;
  118. }
  119. RefPtr<AST::Node> Parser::parse_sequence()
  120. {
  121. consume_while(is_any_of(" \t\n;")); // ignore whitespaces or terminators without effect.
  122. auto rule_start = push_start();
  123. auto var_decls = parse_variable_decls();
  124. switch (peek()) {
  125. case '}':
  126. return var_decls;
  127. case ';':
  128. case '\n': {
  129. if (!var_decls)
  130. break;
  131. consume_while(is_any_of("\n;"));
  132. auto rest = parse_sequence();
  133. if (rest)
  134. return create<AST::Sequence>(move(var_decls), move(rest));
  135. return var_decls;
  136. }
  137. default:
  138. break;
  139. }
  140. auto first = parse_or_logical_sequence();
  141. if (!first)
  142. return var_decls;
  143. if (var_decls)
  144. first = create<AST::Sequence>(move(var_decls), move(first));
  145. consume_while(is_whitespace);
  146. switch (peek()) {
  147. case ';':
  148. case '\n':
  149. consume_while(is_any_of("\n;"));
  150. if (auto expr = parse_sequence()) {
  151. return create<AST::Sequence>(move(first), move(expr)); // Sequence
  152. }
  153. return first;
  154. case '&': {
  155. auto execute_pipe_seq = first->would_execute() ? first : static_cast<RefPtr<AST::Node>>(create<AST::Execute>(first));
  156. consume();
  157. auto bg = create<AST::Background>(move(first)); // Execute Background
  158. if (auto rest = parse_sequence())
  159. return create<AST::Sequence>(move(bg), move(rest)); // Sequence Background Sequence
  160. return bg;
  161. }
  162. default:
  163. return first;
  164. }
  165. }
  166. RefPtr<AST::Node> Parser::parse_variable_decls()
  167. {
  168. auto rule_start = push_start();
  169. consume_while(is_whitespace);
  170. auto offset_before_name = m_offset;
  171. auto var_name = consume_while(is_word_character);
  172. if (var_name.is_empty())
  173. return nullptr;
  174. if (!expect('=')) {
  175. m_offset = offset_before_name;
  176. return nullptr;
  177. }
  178. auto name_expr = create<AST::BarewordLiteral>(move(var_name));
  179. auto start = push_start();
  180. auto expression = parse_expression();
  181. if (!expression || expression->is_syntax_error()) {
  182. m_offset = start->offset;
  183. if (peek() == '(') {
  184. consume();
  185. auto command = parse_pipe_sequence();
  186. if (!command)
  187. m_offset = start->offset;
  188. else if (!expect(')'))
  189. command->set_is_syntax_error(*create<AST::SyntaxError>("Expected a terminating close paren"));
  190. expression = command;
  191. }
  192. }
  193. if (!expression) {
  194. if (is_whitespace(peek())) {
  195. auto string_start = push_start();
  196. expression = create<AST::StringLiteral>("");
  197. } else {
  198. m_offset = offset_before_name;
  199. return nullptr;
  200. }
  201. }
  202. Vector<AST::VariableDeclarations::Variable> variables;
  203. variables.append({ move(name_expr), expression.release_nonnull() });
  204. if (consume_while(is_whitespace).is_empty())
  205. return create<AST::VariableDeclarations>(move(variables));
  206. auto rest = parse_variable_decls();
  207. if (!rest)
  208. return create<AST::VariableDeclarations>(move(variables));
  209. ASSERT(rest->is_variable_decls());
  210. auto* rest_decl = static_cast<AST::VariableDeclarations*>(rest.ptr());
  211. variables.append(rest_decl->variables());
  212. return create<AST::VariableDeclarations>(move(variables));
  213. }
  214. RefPtr<AST::Node> Parser::parse_or_logical_sequence()
  215. {
  216. consume_while(is_whitespace);
  217. auto rule_start = push_start();
  218. auto and_sequence = parse_and_logical_sequence();
  219. if (!and_sequence)
  220. return nullptr;
  221. consume_while(is_whitespace);
  222. auto saved_offset = m_offset;
  223. if (!expect("||")) {
  224. m_offset = saved_offset;
  225. return and_sequence;
  226. }
  227. auto right_and_sequence = parse_and_logical_sequence();
  228. if (!right_and_sequence)
  229. right_and_sequence = create<AST::SyntaxError>("Expected an expression after '||'");
  230. return create<AST::Or>(move(and_sequence), move(right_and_sequence));
  231. }
  232. RefPtr<AST::Node> Parser::parse_and_logical_sequence()
  233. {
  234. consume_while(is_whitespace);
  235. auto rule_start = push_start();
  236. auto pipe_sequence = parse_pipe_sequence();
  237. if (!pipe_sequence)
  238. return nullptr;
  239. consume_while(is_whitespace);
  240. auto saved_offset = m_offset;
  241. if (!expect("&&")) {
  242. m_offset = saved_offset;
  243. return pipe_sequence;
  244. }
  245. auto right_and_sequence = parse_and_logical_sequence();
  246. if (!right_and_sequence)
  247. right_and_sequence = create<AST::SyntaxError>("Expected an expression after '&&'");
  248. return create<AST::And>(move(pipe_sequence), move(right_and_sequence));
  249. }
  250. RefPtr<AST::Node> Parser::parse_pipe_sequence()
  251. {
  252. auto rule_start = push_start();
  253. auto left = parse_control_structure();
  254. if (!left) {
  255. if (auto cmd = parse_command())
  256. left = cmd;
  257. else
  258. return nullptr;
  259. }
  260. consume_while(is_whitespace);
  261. if (peek() != '|')
  262. return left;
  263. consume();
  264. if (auto pipe_seq = parse_pipe_sequence()) {
  265. return create<AST::Pipe>(move(left), move(pipe_seq)); // Pipe
  266. }
  267. putback();
  268. return left;
  269. }
  270. RefPtr<AST::Node> Parser::parse_command()
  271. {
  272. auto rule_start = push_start();
  273. consume_while(is_whitespace);
  274. auto redir = parse_redirection();
  275. if (!redir) {
  276. auto list_expr = parse_list_expression();
  277. if (!list_expr)
  278. return nullptr;
  279. auto cast = create<AST::CastToCommand>(move(list_expr)); // Cast List Command
  280. auto next_command = parse_command();
  281. if (!next_command)
  282. return cast;
  283. return create<AST::Join>(move(cast), move(next_command)); // Join List Command
  284. }
  285. auto command = parse_command();
  286. if (!command)
  287. return redir;
  288. return create<AST::Join>(move(redir), command); // Join Command Command
  289. }
  290. RefPtr<AST::Node> Parser::parse_control_structure()
  291. {
  292. auto rule_start = push_start();
  293. consume_while(is_whitespace);
  294. if (auto for_loop = parse_for_loop())
  295. return for_loop;
  296. if (auto if_expr = parse_if_expr())
  297. return if_expr;
  298. if (auto subshell = parse_subshell())
  299. return subshell;
  300. return nullptr;
  301. }
  302. RefPtr<AST::Node> Parser::parse_for_loop()
  303. {
  304. auto rule_start = push_start();
  305. if (!expect("for")) {
  306. m_offset = rule_start->offset;
  307. return nullptr;
  308. }
  309. if (consume_while(is_any_of(" \t\n")).is_empty()) {
  310. m_offset = rule_start->offset;
  311. return nullptr;
  312. }
  313. auto variable_name = consume_while(is_word_character);
  314. Optional<size_t> in_start_position;
  315. if (variable_name.is_empty()) {
  316. variable_name = "it";
  317. } else {
  318. consume_while(is_whitespace);
  319. auto in_error_start = push_start();
  320. in_start_position = in_error_start->offset;
  321. if (!expect("in")) {
  322. auto syntax_error = create<AST::SyntaxError>("Expected 'in' after a variable name in a 'for' loop");
  323. return create<AST::ForLoop>(move(variable_name), move(syntax_error), nullptr); // ForLoop Var Iterated Block
  324. }
  325. }
  326. consume_while(is_whitespace);
  327. RefPtr<AST::Node> iterated_expression;
  328. {
  329. auto iter_error_start = push_start();
  330. iterated_expression = parse_expression();
  331. if (!iterated_expression) {
  332. auto syntax_error = create<AST::SyntaxError>("Expected an expression in 'for' loop");
  333. return create<AST::ForLoop>(move(variable_name), move(syntax_error), nullptr, move(in_start_position)); // ForLoop Var Iterated Block
  334. }
  335. }
  336. consume_while(is_any_of(" \t\n"));
  337. {
  338. auto obrace_error_start = push_start();
  339. if (!expect('{')) {
  340. auto syntax_error = create<AST::SyntaxError>("Expected an open brace '{' to start a 'for' loop body");
  341. return create<AST::ForLoop>(move(variable_name), move(iterated_expression), move(syntax_error), move(in_start_position)); // ForLoop Var Iterated Block
  342. }
  343. }
  344. auto body = parse_toplevel();
  345. {
  346. auto cbrace_error_start = push_start();
  347. if (!expect('}')) {
  348. auto error_start = push_start();
  349. RefPtr<AST::SyntaxError> syntax_error = create<AST::SyntaxError>("Expected a close brace '}' to end a 'for' loop body");
  350. if (body)
  351. body->set_is_syntax_error(*syntax_error);
  352. else
  353. body = syntax_error;
  354. }
  355. }
  356. return create<AST::ForLoop>(move(variable_name), move(iterated_expression), move(body), move(in_start_position)); // ForLoop Var Iterated Block
  357. }
  358. RefPtr<AST::Node> Parser::parse_if_expr()
  359. {
  360. auto rule_start = push_start();
  361. if (!expect("if")) {
  362. m_offset = rule_start->offset;
  363. return nullptr;
  364. }
  365. if (consume_while(is_any_of(" \t\n")).is_empty()) {
  366. m_offset = rule_start->offset;
  367. return nullptr;
  368. }
  369. RefPtr<AST::Node> condition;
  370. {
  371. auto cond_error_start = push_start();
  372. condition = parse_or_logical_sequence();
  373. if (!condition) {
  374. auto syntax_error = create<AST::SyntaxError>("Expected a logical sequence after 'if'");
  375. return create<AST::IfCond>(Optional<AST::Position> {}, move(syntax_error), nullptr, nullptr);
  376. }
  377. }
  378. auto parse_braced_toplevel = [&]() -> RefPtr<AST::Node> {
  379. {
  380. auto obrace_error_start = push_start();
  381. if (!expect('{')) {
  382. auto syntax_error = create<AST::SyntaxError>("Expected an open brace '{' to start an 'if' true branch");
  383. return syntax_error;
  384. }
  385. }
  386. auto body = parse_toplevel();
  387. {
  388. auto cbrace_error_start = push_start();
  389. if (!expect('}')) {
  390. auto error_start = push_start();
  391. RefPtr<AST::SyntaxError> syntax_error = create<AST::SyntaxError>("Expected a close brace '}' to end an 'if' true branch");
  392. if (body)
  393. body->set_is_syntax_error(*syntax_error);
  394. else
  395. body = syntax_error;
  396. }
  397. }
  398. return body;
  399. };
  400. consume_while(is_whitespace);
  401. auto true_branch = parse_braced_toplevel();
  402. if (true_branch && true_branch->is_syntax_error())
  403. return create<AST::IfCond>(Optional<AST::Position> {}, move(condition), move(true_branch), nullptr); // If expr syntax_error
  404. consume_while(is_whitespace);
  405. Optional<AST::Position> else_position;
  406. {
  407. auto else_start = push_start();
  408. if (expect("else"))
  409. else_position = AST::Position { else_start->offset, m_offset };
  410. }
  411. if (else_position.has_value()) {
  412. consume_while(is_whitespace);
  413. if (peek() == '{') {
  414. auto false_branch = parse_braced_toplevel();
  415. return create<AST::IfCond>(else_position, move(condition), move(true_branch), move(false_branch)); // If expr true_branch Else false_branch
  416. }
  417. auto else_if_branch = parse_if_expr();
  418. return create<AST::IfCond>(else_position, move(condition), move(true_branch), move(else_if_branch)); // If expr true_branch Else If ...
  419. }
  420. return create<AST::IfCond>(else_position, move(condition), move(true_branch), nullptr); // If expr true_branch
  421. }
  422. RefPtr<AST::Node> Parser::parse_subshell()
  423. {
  424. auto rule_start = push_start();
  425. if (!expect('{'))
  426. return nullptr;
  427. auto body = parse_toplevel();
  428. {
  429. auto cbrace_error_start = push_start();
  430. if (!expect('}')) {
  431. auto error_start = push_start();
  432. RefPtr<AST::SyntaxError> syntax_error = create<AST::SyntaxError>("Expected a close brace '}' to end a subshell");
  433. if (body)
  434. body->set_is_syntax_error(*syntax_error);
  435. else
  436. body = syntax_error;
  437. }
  438. }
  439. return create<AST::Subshell>(move(body));
  440. }
  441. RefPtr<AST::Node> Parser::parse_redirection()
  442. {
  443. auto rule_start = push_start();
  444. auto pipe_fd = 0;
  445. auto number = consume_while(is_digit);
  446. if (number.is_empty()) {
  447. pipe_fd = -1;
  448. } else {
  449. auto fd = number.to_int();
  450. ASSERT(fd.has_value());
  451. pipe_fd = fd.value();
  452. }
  453. switch (peek()) {
  454. case '>': {
  455. consume();
  456. if (peek() == '>') {
  457. consume();
  458. consume_while(is_whitespace);
  459. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO;
  460. auto path = parse_expression();
  461. if (!path) {
  462. if (!at_end()) {
  463. // Eat a character and hope the problem goes away
  464. consume();
  465. }
  466. return create<AST::SyntaxError>("Expected a path");
  467. }
  468. return create<AST::WriteAppendRedirection>(pipe_fd, move(path)); // Redirection WriteAppend
  469. }
  470. if (peek() == '&') {
  471. consume();
  472. // FIXME: 'fd>&-' Syntax not the best. needs discussion.
  473. if (peek() == '-') {
  474. consume();
  475. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO;
  476. return create<AST::CloseFdRedirection>(pipe_fd); // Redirection CloseFd
  477. }
  478. int dest_pipe_fd = 0;
  479. auto number = consume_while(is_digit);
  480. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO;
  481. if (number.is_empty()) {
  482. dest_pipe_fd = -1;
  483. } else {
  484. auto fd = number.to_int();
  485. ASSERT(fd.has_value());
  486. dest_pipe_fd = fd.value();
  487. }
  488. auto redir = create<AST::Fd2FdRedirection>(pipe_fd, dest_pipe_fd); // Redirection Fd2Fd
  489. if (dest_pipe_fd == -1)
  490. redir->set_is_syntax_error(*create<AST::SyntaxError>("Expected a file descriptor"));
  491. return redir;
  492. }
  493. consume_while(is_whitespace);
  494. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDOUT_FILENO;
  495. auto path = parse_expression();
  496. if (!path) {
  497. if (!at_end()) {
  498. // Eat a character and hope the problem goes away
  499. consume();
  500. }
  501. return create<AST::SyntaxError>("Expected a path");
  502. }
  503. return create<AST::WriteRedirection>(pipe_fd, move(path)); // Redirection Write
  504. }
  505. case '<': {
  506. consume();
  507. enum {
  508. Read,
  509. ReadWrite,
  510. } mode { Read };
  511. if (peek() == '>') {
  512. mode = ReadWrite;
  513. consume();
  514. }
  515. consume_while(is_whitespace);
  516. pipe_fd = pipe_fd >= 0 ? pipe_fd : STDIN_FILENO;
  517. auto path = parse_expression();
  518. if (!path) {
  519. if (!at_end()) {
  520. // Eat a character and hope the problem goes away
  521. consume();
  522. }
  523. return create<AST::SyntaxError>("Expected a path");
  524. }
  525. if (mode == Read)
  526. return create<AST::ReadRedirection>(pipe_fd, move(path)); // Redirection Read
  527. return create<AST::ReadWriteRedirection>(pipe_fd, move(path)); // Redirection ReadWrite
  528. }
  529. default:
  530. m_offset = rule_start->offset;
  531. return nullptr;
  532. }
  533. }
  534. RefPtr<AST::Node> Parser::parse_list_expression()
  535. {
  536. consume_while(is_whitespace);
  537. auto rule_start = push_start();
  538. Vector<RefPtr<AST::Node>> nodes;
  539. do {
  540. auto expr = parse_expression();
  541. if (!expr)
  542. break;
  543. nodes.append(move(expr));
  544. } while (!consume_while(is_whitespace).is_empty());
  545. if (nodes.is_empty())
  546. return nullptr;
  547. return create<AST::ListConcatenate>(move(nodes)); // Concatenate List
  548. }
  549. RefPtr<AST::Node> Parser::parse_expression()
  550. {
  551. auto rule_start = push_start();
  552. auto starting_char = peek();
  553. auto read_concat = [&](auto expr) -> RefPtr<AST::Node> {
  554. if (is_whitespace(peek()))
  555. return expr;
  556. if (auto next_expr = parse_expression())
  557. return create<AST::Juxtaposition>(move(expr), move(next_expr));
  558. return expr;
  559. };
  560. if (strchr("&|){} ;<>\n", starting_char) != nullptr)
  561. return nullptr;
  562. if (isdigit(starting_char)) {
  563. ScopedValueRollback offset_rollback { m_offset };
  564. auto redir = parse_redirection();
  565. if (redir)
  566. return nullptr;
  567. }
  568. if (starting_char == '$') {
  569. if (auto variable = parse_variable())
  570. return read_concat(variable);
  571. if (auto inline_exec = parse_evaluate())
  572. return read_concat(inline_exec);
  573. }
  574. if (starting_char == '#')
  575. return parse_comment();
  576. if (starting_char == '(') {
  577. consume();
  578. auto list = parse_list_expression();
  579. if (!expect(')')) {
  580. m_offset = rule_start->offset;
  581. return nullptr;
  582. }
  583. return read_concat(create<AST::CastToList>(move(list))); // Cast To List
  584. }
  585. return read_concat(parse_string_composite());
  586. }
  587. RefPtr<AST::Node> Parser::parse_string_composite()
  588. {
  589. auto rule_start = push_start();
  590. if (auto string = parse_string()) {
  591. if (auto next_part = parse_string_composite())
  592. return create<AST::Juxtaposition>(move(string), move(next_part)); // Concatenate String StringComposite
  593. return string;
  594. }
  595. if (auto variable = parse_variable()) {
  596. if (auto next_part = parse_string_composite())
  597. return create<AST::Juxtaposition>(move(variable), move(next_part)); // Concatenate Variable StringComposite
  598. return variable;
  599. }
  600. if (auto glob = parse_glob()) {
  601. if (auto next_part = parse_string_composite())
  602. return create<AST::Juxtaposition>(move(glob), move(next_part)); // Concatenate Glob StringComposite
  603. return glob;
  604. }
  605. if (auto bareword = parse_bareword()) {
  606. if (auto next_part = parse_string_composite())
  607. return create<AST::Juxtaposition>(move(bareword), move(next_part)); // Concatenate Bareword StringComposite
  608. return bareword;
  609. }
  610. if (auto inline_command = parse_evaluate()) {
  611. if (auto next_part = parse_string_composite())
  612. return create<AST::Juxtaposition>(move(inline_command), move(next_part)); // Concatenate Execute StringComposite
  613. return inline_command;
  614. }
  615. return nullptr;
  616. }
  617. RefPtr<AST::Node> Parser::parse_string()
  618. {
  619. auto rule_start = push_start();
  620. if (at_end())
  621. return nullptr;
  622. if (peek() == '"') {
  623. consume();
  624. auto inner = parse_doublequoted_string_inner();
  625. if (!inner)
  626. inner = create<AST::SyntaxError>("Unexpected EOF in string");
  627. if (!expect('"')) {
  628. inner = create<AST::DoubleQuotedString>(move(inner));
  629. inner->set_is_syntax_error(*create<AST::SyntaxError>("Expected a terminating double quote"));
  630. return inner;
  631. }
  632. return create<AST::DoubleQuotedString>(move(inner)); // Double Quoted String
  633. }
  634. if (peek() == '\'') {
  635. consume();
  636. auto text = consume_while(is_not('\''));
  637. bool is_error = false;
  638. if (!expect('\''))
  639. is_error = true;
  640. auto result = create<AST::StringLiteral>(move(text)); // String Literal
  641. if (is_error)
  642. result->set_is_syntax_error(*create<AST::SyntaxError>("Expected a terminating single quote"));
  643. return move(result);
  644. }
  645. return nullptr;
  646. }
  647. RefPtr<AST::Node> Parser::parse_doublequoted_string_inner()
  648. {
  649. auto rule_start = push_start();
  650. if (at_end())
  651. return nullptr;
  652. StringBuilder builder;
  653. while (!at_end() && peek() != '"') {
  654. if (peek() == '\\') {
  655. consume();
  656. if (at_end()) {
  657. break;
  658. }
  659. auto ch = consume();
  660. switch (ch) {
  661. case '\\':
  662. default:
  663. builder.append(ch);
  664. break;
  665. case 'x': {
  666. if (m_input.length() <= m_offset + 2)
  667. break;
  668. auto first_nibble = tolower(consume());
  669. auto second_nibble = tolower(consume());
  670. if (!isxdigit(first_nibble) || !isxdigit(second_nibble)) {
  671. builder.append(first_nibble);
  672. builder.append(second_nibble);
  673. break;
  674. }
  675. builder.append(to_byte(first_nibble, second_nibble));
  676. break;
  677. }
  678. case 'a':
  679. builder.append('\a');
  680. break;
  681. case 'b':
  682. builder.append('\b');
  683. break;
  684. case 'e':
  685. builder.append('\x1b');
  686. break;
  687. case 'f':
  688. builder.append('\f');
  689. break;
  690. case 'r':
  691. builder.append('\r');
  692. break;
  693. case 'n':
  694. builder.append('\n');
  695. break;
  696. }
  697. continue;
  698. }
  699. if (peek() == '$') {
  700. auto string_literal = create<AST::StringLiteral>(builder.to_string()); // String Literal
  701. if (auto variable = parse_variable()) {
  702. auto inner = create<AST::StringPartCompose>(
  703. move(string_literal),
  704. move(variable)); // Compose String Variable
  705. if (auto string = parse_doublequoted_string_inner()) {
  706. return create<AST::StringPartCompose>(move(inner), move(string)); // Compose Composition Composition
  707. }
  708. return inner;
  709. }
  710. if (auto evaluate = parse_evaluate()) {
  711. auto composition = create<AST::StringPartCompose>(
  712. move(string_literal),
  713. move(evaluate)); // Compose String Sequence
  714. if (auto string = parse_doublequoted_string_inner()) {
  715. return create<AST::StringPartCompose>(move(composition), move(string)); // Compose Composition Composition
  716. }
  717. return composition;
  718. }
  719. }
  720. builder.append(consume());
  721. }
  722. return create<AST::StringLiteral>(builder.to_string()); // String Literal
  723. }
  724. RefPtr<AST::Node> Parser::parse_variable()
  725. {
  726. auto rule_start = push_start();
  727. if (at_end())
  728. return nullptr;
  729. if (peek() != '$')
  730. return nullptr;
  731. consume();
  732. switch (peek()) {
  733. case '$':
  734. case '?':
  735. case '*':
  736. case '#':
  737. return create<AST::SpecialVariable>(consume()); // Variable Special
  738. default:
  739. break;
  740. }
  741. auto name = consume_while(is_word_character);
  742. if (name.length() == 0) {
  743. putback();
  744. return nullptr;
  745. }
  746. return create<AST::SimpleVariable>(move(name)); // Variable Simple
  747. }
  748. RefPtr<AST::Node> Parser::parse_evaluate()
  749. {
  750. auto rule_start = push_start();
  751. if (at_end())
  752. return nullptr;
  753. if (peek() != '$')
  754. return nullptr;
  755. consume();
  756. if (peek() == '(') {
  757. consume();
  758. auto inner = parse_pipe_sequence();
  759. if (!inner)
  760. inner = create<AST::SyntaxError>("Unexpected EOF in list");
  761. if (!expect(')'))
  762. inner->set_is_syntax_error(*create<AST::SyntaxError>("Expected a terminating close paren"));
  763. return create<AST::Execute>(move(inner), true);
  764. }
  765. auto inner = parse_expression();
  766. if (!inner) {
  767. inner = create<AST::SyntaxError>("Expected a command");
  768. } else {
  769. if (inner->is_list()) {
  770. auto execute_inner = create<AST::Execute>(move(inner), true);
  771. inner = execute_inner;
  772. } else {
  773. auto dyn_inner = create<AST::DynamicEvaluate>(move(inner));
  774. inner = dyn_inner;
  775. }
  776. }
  777. return inner;
  778. }
  779. RefPtr<AST::Node> Parser::parse_comment()
  780. {
  781. if (at_end())
  782. return nullptr;
  783. if (peek() != '#')
  784. return nullptr;
  785. consume();
  786. auto text = consume_while(is_not('\n'));
  787. return create<AST::Comment>(move(text)); // Comment
  788. }
  789. RefPtr<AST::Node> Parser::parse_bareword()
  790. {
  791. auto rule_start = push_start();
  792. StringBuilder builder;
  793. auto is_acceptable_bareword_character = [](char c) {
  794. return strchr("\\\"'*$&#|(){} ?;<>\n", c) == nullptr;
  795. };
  796. while (!at_end()) {
  797. char ch = peek();
  798. if (ch == '\\') {
  799. consume();
  800. if (!at_end()) {
  801. ch = consume();
  802. if (is_acceptable_bareword_character(ch))
  803. builder.append('\\');
  804. }
  805. builder.append(ch);
  806. continue;
  807. }
  808. if (is_acceptable_bareword_character(ch)) {
  809. builder.append(consume());
  810. continue;
  811. }
  812. break;
  813. }
  814. if (builder.is_empty())
  815. return nullptr;
  816. auto current_end = m_offset;
  817. auto string = builder.to_string();
  818. if (string.starts_with('~')) {
  819. String username;
  820. RefPtr<AST::Node> tilde, text;
  821. auto first_slash_index = string.index_of("/");
  822. if (first_slash_index.has_value()) {
  823. username = string.substring_view(1, first_slash_index.value() - 1);
  824. string = string.substring_view(first_slash_index.value(), string.length() - first_slash_index.value());
  825. } else {
  826. username = string.substring_view(1, string.length() - 1);
  827. string = "";
  828. }
  829. // Synthesize a Tilde Node with the correct positioning information.
  830. {
  831. m_offset -= string.length();
  832. tilde = create<AST::Tilde>(move(username));
  833. }
  834. if (string.is_empty())
  835. return tilde;
  836. // Synthesize a BarewordLiteral Node with the correct positioning information.
  837. {
  838. m_offset = tilde->position().end_offset;
  839. auto text_start = push_start();
  840. m_offset = current_end;
  841. text = create<AST::BarewordLiteral>(move(string));
  842. }
  843. return create<AST::Juxtaposition>(move(tilde), move(text)); // Juxtaposition Varible Bareword
  844. }
  845. if (string.starts_with("\\~")) {
  846. // Un-escape the tilde, but only at the start (where it would be an expansion)
  847. string = string.substring(1, string.length() - 1);
  848. }
  849. return create<AST::BarewordLiteral>(move(string)); // Bareword Literal
  850. }
  851. RefPtr<AST::Node> Parser::parse_glob()
  852. {
  853. auto rule_start = push_start();
  854. auto bareword_part = parse_bareword();
  855. if (at_end())
  856. return bareword_part;
  857. char ch = peek();
  858. if (ch == '*' || ch == '?') {
  859. consume();
  860. StringBuilder textbuilder;
  861. if (bareword_part) {
  862. StringView text;
  863. if (bareword_part->is_bareword()) {
  864. auto bareword = static_cast<AST::BarewordLiteral*>(bareword_part.ptr());
  865. text = bareword->text();
  866. } else {
  867. // FIXME: Allow composition of tilde+bareword with globs: '~/foo/bar/baz*'
  868. putback();
  869. bareword_part->set_is_syntax_error(*create<AST::SyntaxError>(String::format("Unexpected %s inside a glob", bareword_part->class_name().characters())));
  870. return bareword_part;
  871. }
  872. textbuilder.append(text);
  873. }
  874. textbuilder.append(ch);
  875. auto glob_after = parse_glob();
  876. if (glob_after) {
  877. if (glob_after->is_glob()) {
  878. auto glob = static_cast<AST::BarewordLiteral*>(glob_after.ptr());
  879. textbuilder.append(glob->text());
  880. } else if (glob_after->is_bareword()) {
  881. auto bareword = static_cast<AST::BarewordLiteral*>(glob_after.ptr());
  882. textbuilder.append(bareword->text());
  883. } else {
  884. ASSERT_NOT_REACHED();
  885. }
  886. }
  887. return create<AST::Glob>(textbuilder.to_string()); // Glob
  888. }
  889. return bareword_part;
  890. }
  891. StringView Parser::consume_while(Function<bool(char)> condition)
  892. {
  893. auto start_offset = m_offset;
  894. while (!at_end() && condition(peek()))
  895. consume();
  896. return m_input.substring_view(start_offset, m_offset - start_offset);
  897. }