PosixLexer.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
  1. /*
  2. * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <Shell/PosixLexer.h>
  8. static bool is_operator(StringView text)
  9. {
  10. return Shell::Posix::Token::operator_from_name(text).has_value();
  11. }
  12. static bool is_part_of_operator(StringView text, char ch)
  13. {
  14. StringBuilder builder;
  15. builder.append(text);
  16. builder.append(ch);
  17. return Shell::Posix::Token::operator_from_name(builder.string_view()).has_value();
  18. }
  19. namespace Shell::Posix {
  20. Vector<Token> Lexer::batch_next()
  21. {
  22. for (; m_next_reduction != Reduction::None;) {
  23. auto result = reduce(m_next_reduction);
  24. m_next_reduction = result.next_reduction;
  25. if (!result.tokens.is_empty())
  26. return result.tokens;
  27. }
  28. return {};
  29. }
  30. ExpansionRange Lexer::range(ssize_t offset) const
  31. {
  32. return {
  33. m_state.position.end_offset - m_state.position.start_offset + offset - 1,
  34. 0,
  35. };
  36. }
  37. char Lexer::consume()
  38. {
  39. auto ch = m_lexer.consume();
  40. if (ch == '\n') {
  41. m_state.position.end_line.line_number++;
  42. m_state.position.end_line.line_column = 0;
  43. }
  44. m_state.position.end_offset++;
  45. return ch;
  46. }
  47. bool Lexer::consume_specific(char ch)
  48. {
  49. if (m_lexer.peek() == ch) {
  50. consume();
  51. return true;
  52. }
  53. return false;
  54. }
  55. Lexer::ReductionResult Lexer::reduce(Reduction reduction)
  56. {
  57. switch (reduction) {
  58. case Reduction::None:
  59. return { {}, Reduction::None };
  60. case Reduction::End:
  61. return reduce_end();
  62. case Reduction::Operator:
  63. return reduce_operator();
  64. case Reduction::Comment:
  65. return reduce_comment();
  66. case Reduction::SingleQuotedString:
  67. return reduce_single_quoted_string();
  68. case Reduction::DoubleQuotedString:
  69. return reduce_double_quoted_string();
  70. case Reduction::Expansion:
  71. return reduce_expansion();
  72. case Reduction::CommandExpansion:
  73. return reduce_command_expansion();
  74. case Reduction::Start:
  75. return reduce_start();
  76. case Reduction::ArithmeticExpansion:
  77. return reduce_arithmetic_expansion();
  78. case Reduction::SpecialParameterExpansion:
  79. return reduce_special_parameter_expansion();
  80. case Reduction::ParameterExpansion:
  81. return reduce_parameter_expansion();
  82. case Reduction::CommandOrArithmeticSubstitutionExpansion:
  83. return reduce_command_or_arithmetic_substitution_expansion();
  84. case Reduction::ExtendedParameterExpansion:
  85. return reduce_extended_parameter_expansion();
  86. }
  87. VERIFY_NOT_REACHED();
  88. }
  89. Lexer::ReductionResult Lexer::reduce_end()
  90. {
  91. return {
  92. .tokens = { Token::eof() },
  93. .next_reduction = Reduction::None,
  94. };
  95. }
  96. Lexer::ReductionResult Lexer::reduce_operator()
  97. {
  98. if (m_lexer.is_eof()) {
  99. if (is_operator(m_state.buffer.string_view())) {
  100. auto tokens = Token::operators_from(m_state);
  101. m_state.buffer.clear();
  102. m_state.position.start_offset = m_state.position.end_offset;
  103. m_state.position.start_line = m_state.position.end_line;
  104. return {
  105. .tokens = move(tokens),
  106. .next_reduction = Reduction::End,
  107. };
  108. }
  109. return reduce(Reduction::Start);
  110. }
  111. if (is_part_of_operator(m_state.buffer.string_view(), m_lexer.peek())) {
  112. m_state.buffer.append(consume());
  113. return {
  114. .tokens = {},
  115. .next_reduction = Reduction::Operator,
  116. };
  117. }
  118. auto tokens = Vector<Token> {};
  119. if (is_operator(m_state.buffer.string_view())) {
  120. tokens.extend(Token::operators_from(m_state));
  121. m_state.buffer.clear();
  122. m_state.position.start_offset = m_state.position.end_offset;
  123. m_state.position.start_line = m_state.position.end_line;
  124. }
  125. auto result = reduce(Reduction::Start);
  126. tokens.extend(move(result.tokens));
  127. return {
  128. .tokens = move(tokens),
  129. .next_reduction = result.next_reduction,
  130. };
  131. }
  132. Lexer::ReductionResult Lexer::reduce_comment()
  133. {
  134. if (m_lexer.is_eof()) {
  135. return {
  136. .tokens = {},
  137. .next_reduction = Reduction::End,
  138. };
  139. }
  140. if (consume() == '\n') {
  141. return {
  142. .tokens = { Token::newline() },
  143. .next_reduction = Reduction::Start,
  144. };
  145. }
  146. return {
  147. .tokens = {},
  148. .next_reduction = Reduction::Comment,
  149. };
  150. }
  151. Lexer::ReductionResult Lexer::reduce_single_quoted_string()
  152. {
  153. if (m_lexer.is_eof()) {
  154. auto tokens = Token::maybe_from_state(m_state);
  155. tokens.append(Token::continuation('\''));
  156. return {
  157. .tokens = move(tokens),
  158. .next_reduction = Reduction::End,
  159. };
  160. }
  161. auto ch = consume();
  162. m_state.buffer.append(ch);
  163. if (ch == '\'') {
  164. return {
  165. .tokens = {},
  166. .next_reduction = Reduction::Start,
  167. };
  168. }
  169. return {
  170. .tokens = {},
  171. .next_reduction = Reduction::SingleQuotedString,
  172. };
  173. }
  174. Lexer::ReductionResult Lexer::reduce_double_quoted_string()
  175. {
  176. m_state.previous_reduction = Reduction::DoubleQuotedString;
  177. if (m_lexer.is_eof()) {
  178. auto tokens = Token::maybe_from_state(m_state);
  179. tokens.append(Token::continuation('"'));
  180. return {
  181. .tokens = move(tokens),
  182. .next_reduction = Reduction::End,
  183. };
  184. }
  185. auto ch = consume();
  186. m_state.buffer.append(ch);
  187. if (m_state.escaping) {
  188. m_state.escaping = false;
  189. return {
  190. .tokens = {},
  191. .next_reduction = Reduction::DoubleQuotedString,
  192. };
  193. }
  194. switch (ch) {
  195. case '\\':
  196. m_state.escaping = true;
  197. return {
  198. .tokens = {},
  199. .next_reduction = Reduction::DoubleQuotedString,
  200. };
  201. case '"':
  202. m_state.previous_reduction = Reduction::Start;
  203. return {
  204. .tokens = {},
  205. .next_reduction = Reduction::Start,
  206. };
  207. case '$':
  208. if (m_lexer.next_is("("))
  209. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
  210. else
  211. m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
  212. return {
  213. .tokens = {},
  214. .next_reduction = Reduction::Expansion,
  215. };
  216. case '`':
  217. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
  218. return {
  219. .tokens = {},
  220. .next_reduction = Reduction::CommandExpansion,
  221. };
  222. default:
  223. return {
  224. .tokens = {},
  225. .next_reduction = Reduction::DoubleQuotedString,
  226. };
  227. }
  228. }
  229. Lexer::ReductionResult Lexer::reduce_expansion()
  230. {
  231. if (m_lexer.is_eof())
  232. return reduce(m_state.previous_reduction);
  233. auto ch = m_lexer.peek();
  234. switch (ch) {
  235. case '{':
  236. consume();
  237. m_state.buffer.append(ch);
  238. return {
  239. .tokens = {},
  240. .next_reduction = Reduction::ExtendedParameterExpansion,
  241. };
  242. case '(':
  243. consume();
  244. m_state.buffer.append(ch);
  245. return {
  246. .tokens = {},
  247. .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
  248. };
  249. case 'a' ... 'z':
  250. case 'A' ... 'Z':
  251. case '_': {
  252. consume();
  253. m_state.buffer.append(ch);
  254. auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
  255. expansion.parameter.append(ch);
  256. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  257. return {
  258. .tokens = {},
  259. .next_reduction = Reduction::ParameterExpansion,
  260. };
  261. }
  262. case '0' ... '9':
  263. case '-':
  264. case '!':
  265. case '@':
  266. case '#':
  267. case '?':
  268. case '*':
  269. case '$':
  270. return reduce(Reduction::SpecialParameterExpansion);
  271. default:
  272. m_state.buffer.append(ch);
  273. return reduce(m_state.previous_reduction);
  274. }
  275. }
  276. Lexer::ReductionResult Lexer::reduce_command_expansion()
  277. {
  278. if (m_lexer.is_eof()) {
  279. auto& expansion = m_state.expansions.last().get<CommandExpansion>();
  280. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  281. return {
  282. .tokens = { Token::continuation('`') },
  283. .next_reduction = m_state.previous_reduction,
  284. };
  285. }
  286. auto ch = consume();
  287. if (!m_state.escaping && ch == '`') {
  288. m_state.buffer.append(ch);
  289. auto& expansion = m_state.expansions.last().get<CommandExpansion>();
  290. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  291. return {
  292. .tokens = {},
  293. .next_reduction = m_state.previous_reduction,
  294. };
  295. }
  296. if (!m_state.escaping && ch == '\\') {
  297. m_state.escaping = true;
  298. return {
  299. .tokens = {},
  300. .next_reduction = Reduction::CommandExpansion,
  301. };
  302. }
  303. m_state.escaping = false;
  304. m_state.buffer.append(ch);
  305. m_state.expansions.last().get<CommandExpansion>().command.append(ch);
  306. return {
  307. .tokens = {},
  308. .next_reduction = Reduction::CommandExpansion,
  309. };
  310. }
  311. Lexer::ReductionResult Lexer::reduce_start()
  312. {
  313. if (m_lexer.is_eof()) {
  314. auto tokens = Token::maybe_from_state(m_state);
  315. m_state.buffer.clear();
  316. m_state.position.start_offset = m_state.position.end_offset;
  317. m_state.position.start_line = m_state.position.end_line;
  318. return {
  319. .tokens = move(tokens),
  320. .next_reduction = Reduction::End,
  321. };
  322. }
  323. if (m_state.escaping && consume_specific('\n')) {
  324. m_state.escaping = false;
  325. auto buffer = m_state.buffer.to_deprecated_string().substring(0, m_state.buffer.length() - 1);
  326. m_state.buffer.clear();
  327. m_state.buffer.append(buffer);
  328. return {
  329. .tokens = {},
  330. .next_reduction = Reduction::Start,
  331. };
  332. }
  333. if (!m_state.escaping && m_lexer.peek() == '#' && m_state.buffer.is_empty()) {
  334. consume();
  335. return {
  336. .tokens = {},
  337. .next_reduction = Reduction::Comment,
  338. };
  339. }
  340. if (!m_state.escaping && consume_specific('\n')) {
  341. auto tokens = Token::maybe_from_state(m_state);
  342. tokens.append(Token::newline());
  343. m_state.buffer.clear();
  344. m_state.position.start_offset = m_state.position.end_offset;
  345. m_state.position.start_line = m_state.position.end_line;
  346. return {
  347. .tokens = move(tokens),
  348. .next_reduction = Reduction::Start,
  349. };
  350. }
  351. if (!m_state.escaping && consume_specific('\\')) {
  352. m_state.escaping = true;
  353. m_state.buffer.append('\\');
  354. return {
  355. .tokens = {},
  356. .next_reduction = Reduction::Start,
  357. };
  358. }
  359. if (!m_state.escaping && is_part_of_operator(""sv, m_lexer.peek())) {
  360. auto tokens = Token::maybe_from_state(m_state);
  361. m_state.buffer.clear();
  362. m_state.buffer.append(consume());
  363. m_state.position.start_offset = m_state.position.end_offset;
  364. m_state.position.start_line = m_state.position.end_line;
  365. return {
  366. .tokens = move(tokens),
  367. .next_reduction = Reduction::Operator,
  368. };
  369. }
  370. if (!m_state.escaping && consume_specific('\'')) {
  371. m_state.buffer.append('\'');
  372. return {
  373. .tokens = {},
  374. .next_reduction = Reduction::SingleQuotedString,
  375. };
  376. }
  377. if (!m_state.escaping && consume_specific('"')) {
  378. m_state.buffer.append('"');
  379. return {
  380. .tokens = {},
  381. .next_reduction = Reduction::DoubleQuotedString,
  382. };
  383. }
  384. if (!m_state.escaping && is_ascii_space(m_lexer.peek())) {
  385. consume();
  386. auto tokens = Token::maybe_from_state(m_state);
  387. m_state.buffer.clear();
  388. m_state.expansions.clear();
  389. m_state.position.start_offset = m_state.position.end_offset;
  390. m_state.position.start_line = m_state.position.end_line;
  391. return {
  392. .tokens = move(tokens),
  393. .next_reduction = Reduction::Start,
  394. };
  395. }
  396. if (!m_state.escaping && consume_specific('$')) {
  397. m_state.buffer.append('$');
  398. if (m_lexer.next_is("("))
  399. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
  400. else
  401. m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
  402. return {
  403. .tokens = {},
  404. .next_reduction = Reduction::Expansion,
  405. };
  406. }
  407. if (!m_state.escaping && consume_specific('`')) {
  408. m_state.buffer.append('`');
  409. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
  410. return {
  411. .tokens = {},
  412. .next_reduction = Reduction::CommandExpansion,
  413. };
  414. }
  415. m_state.escaping = false;
  416. m_state.buffer.append(consume());
  417. return {
  418. .tokens = {},
  419. .next_reduction = Reduction::Start,
  420. };
  421. }
  422. Lexer::ReductionResult Lexer::reduce_arithmetic_expansion()
  423. {
  424. if (m_lexer.is_eof()) {
  425. auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
  426. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  427. return {
  428. .tokens = { Token::continuation("$((") },
  429. .next_reduction = m_state.previous_reduction,
  430. };
  431. }
  432. if (m_lexer.peek() == ')' && m_state.buffer.string_view().ends_with(')')) {
  433. m_state.buffer.append(consume());
  434. auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
  435. expansion.expression = expansion.value.to_deprecated_string().substring(0, expansion.value.length() - 1);
  436. expansion.value.clear();
  437. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  438. return {
  439. .tokens = {},
  440. .next_reduction = m_state.previous_reduction,
  441. };
  442. }
  443. auto ch = consume();
  444. m_state.buffer.append(ch);
  445. m_state.expansions.last().get<ArithmeticExpansion>().value.append(ch);
  446. return {
  447. .tokens = {},
  448. .next_reduction = Reduction::ArithmeticExpansion,
  449. };
  450. }
  451. Lexer::ReductionResult Lexer::reduce_special_parameter_expansion()
  452. {
  453. auto ch = consume();
  454. m_state.buffer.append(ch);
  455. m_state.expansions.last() = ParameterExpansion {
  456. .parameter = StringBuilder {},
  457. .range = range(-1),
  458. };
  459. m_state.expansions.last().get<ParameterExpansion>().parameter.append(ch);
  460. return {
  461. .tokens = {},
  462. .next_reduction = m_state.previous_reduction,
  463. };
  464. }
  465. Lexer::ReductionResult Lexer::reduce_parameter_expansion()
  466. {
  467. auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
  468. if (m_lexer.is_eof()) {
  469. return {
  470. .tokens = {},
  471. .next_reduction = Reduction::Start,
  472. };
  473. }
  474. auto next = m_lexer.peek();
  475. if (is_ascii_alphanumeric(next)) {
  476. m_state.buffer.append(consume());
  477. expansion.parameter.append(next);
  478. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  479. return {
  480. .tokens = {},
  481. .next_reduction = Reduction::ParameterExpansion,
  482. };
  483. }
  484. return reduce(m_state.previous_reduction);
  485. }
  486. Lexer::ReductionResult Lexer::reduce_command_or_arithmetic_substitution_expansion()
  487. {
  488. if (m_lexer.is_eof()) {
  489. return {
  490. .tokens = { Token::continuation("$(") },
  491. .next_reduction = m_state.previous_reduction,
  492. };
  493. }
  494. auto ch = m_lexer.peek();
  495. if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) {
  496. m_state.buffer.append(consume());
  497. m_state.expansions.last() = ArithmeticExpansion {
  498. .expression = "",
  499. .value = StringBuilder {},
  500. .range = range(-2)
  501. };
  502. return {
  503. .tokens = {},
  504. .next_reduction = Reduction::ArithmeticExpansion,
  505. };
  506. }
  507. if (ch == ')') {
  508. m_state.buffer.append(consume());
  509. m_state.expansions.last().visit([&](auto& expansion) {
  510. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  511. });
  512. return {
  513. .tokens = {},
  514. .next_reduction = m_state.previous_reduction,
  515. };
  516. }
  517. m_state.buffer.append(consume());
  518. m_state.expansions.last().get<CommandExpansion>().command.append(ch);
  519. return {
  520. .tokens = {},
  521. .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
  522. };
  523. }
  524. Lexer::ReductionResult Lexer::reduce_extended_parameter_expansion()
  525. {
  526. auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
  527. if (m_lexer.is_eof()) {
  528. return {
  529. .tokens = { Token::continuation("${") },
  530. .next_reduction = m_state.previous_reduction,
  531. };
  532. }
  533. auto ch = m_lexer.peek();
  534. if (ch == '}') {
  535. m_state.buffer.append(consume());
  536. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  537. return {
  538. .tokens = {},
  539. .next_reduction = m_state.previous_reduction,
  540. };
  541. }
  542. m_state.buffer.append(consume());
  543. expansion.parameter.append(ch);
  544. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  545. return {
  546. .tokens = {},
  547. .next_reduction = Reduction::ExtendedParameterExpansion,
  548. };
  549. }
  550. StringView Token::type_name() const
  551. {
  552. switch (type) {
  553. case Type::Eof:
  554. return "Eof"sv;
  555. case Type::Newline:
  556. return "Newline"sv;
  557. case Type::Continuation:
  558. return "Continuation"sv;
  559. case Type::Token:
  560. return "Token"sv;
  561. case Type::And:
  562. return "And"sv;
  563. case Type::Pipe:
  564. return "Pipe"sv;
  565. case Type::OpenParen:
  566. return "OpenParen"sv;
  567. case Type::CloseParen:
  568. return "CloseParen"sv;
  569. case Type::Great:
  570. return "Great"sv;
  571. case Type::Less:
  572. return "Less"sv;
  573. case Type::AndIf:
  574. return "AndIf"sv;
  575. case Type::OrIf:
  576. return "OrIf"sv;
  577. case Type::DoubleSemicolon:
  578. return "DoubleSemicolon"sv;
  579. case Type::DoubleLess:
  580. return "DoubleLess"sv;
  581. case Type::DoubleGreat:
  582. return "DoubleGreat"sv;
  583. case Type::LessAnd:
  584. return "LessAnd"sv;
  585. case Type::GreatAnd:
  586. return "GreatAnd"sv;
  587. case Type::LessGreat:
  588. return "LessGreat"sv;
  589. case Type::DoubleLessDash:
  590. return "DoubleLessDash"sv;
  591. case Type::Clobber:
  592. return "Clobber"sv;
  593. case Type::Semicolon:
  594. return "Semicolon"sv;
  595. case Type::AssignmentWord:
  596. return "AssignmentWord"sv;
  597. case Type::Bang:
  598. return "Bang"sv;
  599. case Type::Case:
  600. return "Case"sv;
  601. case Type::CloseBrace:
  602. return "CloseBrace"sv;
  603. case Type::Do:
  604. return "Do"sv;
  605. case Type::Done:
  606. return "Done"sv;
  607. case Type::Elif:
  608. return "Elif"sv;
  609. case Type::Else:
  610. return "Else"sv;
  611. case Type::Esac:
  612. return "Esac"sv;
  613. case Type::Fi:
  614. return "Fi"sv;
  615. case Type::For:
  616. return "For"sv;
  617. case Type::If:
  618. return "If"sv;
  619. case Type::In:
  620. return "In"sv;
  621. case Type::IoNumber:
  622. return "IoNumber"sv;
  623. case Type::OpenBrace:
  624. return "OpenBrace"sv;
  625. case Type::Then:
  626. return "Then"sv;
  627. case Type::Until:
  628. return "Until"sv;
  629. case Type::VariableName:
  630. return "VariableName"sv;
  631. case Type::While:
  632. return "While"sv;
  633. case Type::Word:
  634. return "Word"sv;
  635. }
  636. return "Idk"sv;
  637. }
  638. }