PosixLexer.cpp 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006
  1. /*
  2. * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <Shell/PosixLexer.h>
  8. static bool is_operator(StringView text)
  9. {
  10. return Shell::Posix::Token::operator_from_name(text).has_value();
  11. }
  12. static bool is_part_of_operator(StringView text, char ch)
  13. {
  14. StringBuilder builder;
  15. builder.append(text);
  16. builder.append(ch);
  17. return Shell::Posix::Token::operator_from_name(builder.string_view()).has_value();
  18. }
  19. namespace Shell::Posix {
  20. ErrorOr<Vector<Token>> Lexer::batch_next(Optional<Reduction> starting_reduction)
  21. {
  22. if (starting_reduction.has_value())
  23. m_next_reduction = *starting_reduction;
  24. for (; m_next_reduction != Reduction::None;) {
  25. auto result = TRY(reduce(m_next_reduction));
  26. m_next_reduction = result.next_reduction;
  27. if (!result.tokens.is_empty())
  28. return result.tokens;
  29. }
  30. return Vector<Token> {};
  31. }
  32. ExpansionRange Lexer::range(ssize_t offset) const
  33. {
  34. return {
  35. m_state.position.end_offset - m_state.position.start_offset + offset,
  36. 0,
  37. };
  38. }
  39. char Lexer::consume()
  40. {
  41. auto ch = m_lexer.consume();
  42. if (ch == '\n') {
  43. m_state.position.end_line.line_number++;
  44. m_state.position.end_line.line_column = 0;
  45. }
  46. m_state.position.end_offset++;
  47. return ch;
  48. }
  49. void Lexer::reconsume(StringView string)
  50. {
  51. for (auto byte : string.bytes()) {
  52. if (byte == '\n') {
  53. m_state.position.end_line.line_number++;
  54. m_state.position.end_line.line_column = 0;
  55. }
  56. m_state.position.end_offset++;
  57. }
  58. }
  59. bool Lexer::consume_specific(char ch)
  60. {
  61. if (m_lexer.peek() == ch) {
  62. consume();
  63. return true;
  64. }
  65. return false;
  66. }
  67. ErrorOr<Lexer::ReductionResult> Lexer::reduce(Reduction reduction)
  68. {
  69. switch (reduction) {
  70. case Reduction::None:
  71. return ReductionResult { {}, Reduction::None };
  72. case Reduction::End:
  73. return reduce_end();
  74. case Reduction::Operator:
  75. return reduce_operator();
  76. case Reduction::Comment:
  77. return reduce_comment();
  78. case Reduction::SingleQuotedString:
  79. return reduce_single_quoted_string();
  80. case Reduction::DoubleQuotedString:
  81. return reduce_double_quoted_string();
  82. case Reduction::Expansion:
  83. return reduce_expansion();
  84. case Reduction::CommandExpansion:
  85. return reduce_command_expansion();
  86. case Reduction::Start:
  87. return reduce_start();
  88. case Reduction::ArithmeticExpansion:
  89. return reduce_arithmetic_expansion();
  90. case Reduction::SpecialParameterExpansion:
  91. return reduce_special_parameter_expansion();
  92. case Reduction::ParameterExpansion:
  93. return reduce_parameter_expansion();
  94. case Reduction::CommandOrArithmeticSubstitutionExpansion:
  95. return reduce_command_or_arithmetic_substitution_expansion();
  96. case Reduction::ExtendedParameterExpansion:
  97. return reduce_extended_parameter_expansion();
  98. case Reduction::HeredocContents:
  99. return reduce_heredoc_contents();
  100. }
  101. VERIFY_NOT_REACHED();
  102. }
  103. ErrorOr<Lexer::ReductionResult> Lexer::reduce_end()
  104. {
  105. return ReductionResult {
  106. .tokens = { Token::eof() },
  107. .next_reduction = Reduction::None,
  108. };
  109. }
  110. Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token)
  111. {
  112. StringBuilder builder;
  113. enum ParseState {
  114. Free,
  115. InDoubleQuotes,
  116. InSingleQuotes,
  117. };
  118. Vector<ParseState, 4> parse_state;
  119. parse_state.append(Free);
  120. bool escaped = false;
  121. bool had_a_single_quote_segment = false;
  122. for (auto byte : token.value.bytes()) {
  123. switch (parse_state.last()) {
  124. case Free:
  125. switch (byte) {
  126. case '"':
  127. if (escaped) {
  128. builder.append(byte);
  129. escaped = false;
  130. } else {
  131. parse_state.append(InDoubleQuotes);
  132. }
  133. break;
  134. case '\'':
  135. if (escaped) {
  136. builder.append(byte);
  137. escaped = false;
  138. } else {
  139. had_a_single_quote_segment = true;
  140. parse_state.append(InSingleQuotes);
  141. }
  142. break;
  143. case '\\':
  144. if (escaped) {
  145. builder.append(byte);
  146. escaped = false;
  147. } else {
  148. escaped = true;
  149. }
  150. break;
  151. default:
  152. // NOTE: bash eats the backslash outside quotes :shrug:
  153. if (escaped && parse_state.last() != Free) {
  154. builder.append('\\');
  155. escaped = false;
  156. }
  157. builder.append(byte);
  158. break;
  159. }
  160. break;
  161. case InDoubleQuotes:
  162. if (!escaped && byte == '"') {
  163. parse_state.take_last();
  164. break;
  165. }
  166. if (escaped) {
  167. if (byte != '"')
  168. builder.append('\\');
  169. builder.append(byte);
  170. break;
  171. }
  172. if (byte == '\\')
  173. escaped = true;
  174. else
  175. builder.append(byte);
  176. break;
  177. case InSingleQuotes:
  178. if (byte == '\'') {
  179. parse_state.take_last();
  180. break;
  181. }
  182. builder.append(byte);
  183. break;
  184. }
  185. }
  186. // NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/
  187. return {
  188. .key = builder.to_string().release_value_but_fixme_should_propagate_errors(),
  189. .allow_interpolation = !had_a_single_quote_segment,
  190. };
  191. }
  192. ErrorOr<Lexer::ReductionResult> Lexer::reduce_operator()
  193. {
  194. if (m_lexer.is_eof()) {
  195. if (is_operator(m_state.buffer.string_view())) {
  196. auto tokens = TRY(Token::operators_from(m_state));
  197. m_state.buffer.clear();
  198. m_state.position.start_offset = m_state.position.end_offset;
  199. m_state.position.start_line = m_state.position.end_line;
  200. return ReductionResult {
  201. .tokens = move(tokens),
  202. .next_reduction = Reduction::End,
  203. };
  204. }
  205. return reduce(Reduction::Start);
  206. }
  207. if (is_part_of_operator(m_state.buffer.string_view(), m_lexer.peek())) {
  208. m_state.buffer.append(consume());
  209. return ReductionResult {
  210. .tokens = {},
  211. .next_reduction = Reduction::Operator,
  212. };
  213. }
  214. auto tokens = Vector<Token> {};
  215. if (is_operator(m_state.buffer.string_view())) {
  216. tokens.extend(TRY(Token::operators_from(m_state)));
  217. m_state.buffer.clear();
  218. m_state.position.start_offset = m_state.position.end_offset;
  219. m_state.position.start_line = m_state.position.end_line;
  220. }
  221. auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess);
  222. auto result = TRY(reduce(Reduction::Start));
  223. tokens.extend(move(result.tokens));
  224. while (expect_heredoc_entry && tokens.size() == 1) {
  225. result = TRY(reduce(result.next_reduction));
  226. tokens.extend(move(result.tokens));
  227. }
  228. if (expect_heredoc_entry && tokens.size() > 1) {
  229. auto [key, interpolation] = process_heredoc_key(tokens[1]);
  230. m_state.heredoc_entries.append(HeredocEntry {
  231. .key = key,
  232. .allow_interpolation = interpolation,
  233. .dedent = tokens[0].type == Token::Type::DoubleLessDash,
  234. });
  235. }
  236. return ReductionResult {
  237. .tokens = move(tokens),
  238. .next_reduction = result.next_reduction,
  239. };
  240. }
  241. ErrorOr<Lexer::ReductionResult> Lexer::reduce_comment()
  242. {
  243. if (m_lexer.is_eof()) {
  244. return ReductionResult {
  245. .tokens = {},
  246. .next_reduction = Reduction::End,
  247. };
  248. }
  249. if (consume() == '\n') {
  250. m_state.on_new_line = true;
  251. return ReductionResult {
  252. .tokens = { Token::newline() },
  253. .next_reduction = Reduction::Start,
  254. };
  255. }
  256. return ReductionResult {
  257. .tokens = {},
  258. .next_reduction = Reduction::Comment,
  259. };
  260. }
  261. ErrorOr<Lexer::ReductionResult> Lexer::reduce_single_quoted_string()
  262. {
  263. if (m_lexer.is_eof()) {
  264. auto tokens = TRY(Token::maybe_from_state(m_state));
  265. tokens.append(Token::continuation('\''));
  266. return ReductionResult {
  267. .tokens = move(tokens),
  268. .next_reduction = Reduction::End,
  269. };
  270. }
  271. auto ch = consume();
  272. m_state.buffer.append(ch);
  273. if (ch == '\'') {
  274. return ReductionResult {
  275. .tokens = {},
  276. .next_reduction = Reduction::Start,
  277. };
  278. }
  279. return ReductionResult {
  280. .tokens = {},
  281. .next_reduction = Reduction::SingleQuotedString,
  282. };
  283. }
  284. ErrorOr<Lexer::ReductionResult> Lexer::reduce_double_quoted_string()
  285. {
  286. m_state.previous_reduction = Reduction::DoubleQuotedString;
  287. if (m_lexer.is_eof()) {
  288. auto tokens = TRY(Token::maybe_from_state(m_state));
  289. tokens.append(Token::continuation('"'));
  290. return ReductionResult {
  291. .tokens = move(tokens),
  292. .next_reduction = Reduction::End,
  293. };
  294. }
  295. auto ch = consume();
  296. m_state.buffer.append(ch);
  297. if (m_state.escaping) {
  298. m_state.escaping = false;
  299. return ReductionResult {
  300. .tokens = {},
  301. .next_reduction = Reduction::DoubleQuotedString,
  302. };
  303. }
  304. switch (ch) {
  305. case '\\':
  306. m_state.escaping = true;
  307. return ReductionResult {
  308. .tokens = {},
  309. .next_reduction = Reduction::DoubleQuotedString,
  310. };
  311. case '"':
  312. m_state.previous_reduction = Reduction::Start;
  313. return ReductionResult {
  314. .tokens = {},
  315. .next_reduction = Reduction::Start,
  316. };
  317. case '$':
  318. if (m_lexer.next_is("("))
  319. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
  320. else
  321. m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
  322. return ReductionResult {
  323. .tokens = {},
  324. .next_reduction = Reduction::Expansion,
  325. };
  326. case '`':
  327. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
  328. return ReductionResult {
  329. .tokens = {},
  330. .next_reduction = Reduction::CommandExpansion,
  331. };
  332. default:
  333. return ReductionResult {
  334. .tokens = {},
  335. .next_reduction = Reduction::DoubleQuotedString,
  336. };
  337. }
  338. }
  339. ErrorOr<Lexer::ReductionResult> Lexer::reduce_expansion()
  340. {
  341. if (m_lexer.is_eof())
  342. return reduce(m_state.previous_reduction);
  343. auto ch = m_lexer.peek();
  344. switch (ch) {
  345. case '{':
  346. consume();
  347. m_state.buffer.append(ch);
  348. return ReductionResult {
  349. .tokens = {},
  350. .next_reduction = Reduction::ExtendedParameterExpansion,
  351. };
  352. case '(':
  353. consume();
  354. m_state.buffer.append(ch);
  355. return ReductionResult {
  356. .tokens = {},
  357. .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
  358. };
  359. case 'a' ... 'z':
  360. case 'A' ... 'Z':
  361. case '_': {
  362. consume();
  363. m_state.buffer.append(ch);
  364. auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
  365. expansion.parameter.append(ch);
  366. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  367. return ReductionResult {
  368. .tokens = {},
  369. .next_reduction = Reduction::ParameterExpansion,
  370. };
  371. }
  372. case '0' ... '9':
  373. case '-':
  374. case '!':
  375. case '@':
  376. case '#':
  377. case '?':
  378. case '*':
  379. case '$':
  380. return reduce(Reduction::SpecialParameterExpansion);
  381. default:
  382. m_state.buffer.append(ch);
  383. return reduce(m_state.previous_reduction);
  384. }
  385. }
  386. ErrorOr<Lexer::ReductionResult> Lexer::reduce_command_expansion()
  387. {
  388. if (m_lexer.is_eof()) {
  389. auto& expansion = m_state.expansions.last().get<CommandExpansion>();
  390. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  391. return ReductionResult {
  392. .tokens = { Token::continuation('`') },
  393. .next_reduction = m_state.previous_reduction,
  394. };
  395. }
  396. auto ch = consume();
  397. if (!m_state.escaping && ch == '`') {
  398. m_state.buffer.append(ch);
  399. auto& expansion = m_state.expansions.last().get<CommandExpansion>();
  400. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  401. return ReductionResult {
  402. .tokens = {},
  403. .next_reduction = m_state.previous_reduction,
  404. };
  405. }
  406. if (!m_state.escaping && ch == '\\') {
  407. m_state.escaping = true;
  408. return ReductionResult {
  409. .tokens = {},
  410. .next_reduction = Reduction::CommandExpansion,
  411. };
  412. }
  413. m_state.escaping = false;
  414. m_state.buffer.append(ch);
  415. m_state.expansions.last().get<CommandExpansion>().command.append(ch);
  416. return ReductionResult {
  417. .tokens = {},
  418. .next_reduction = Reduction::CommandExpansion,
  419. };
  420. }
  421. ErrorOr<Lexer::ReductionResult> Lexer::reduce_heredoc_contents()
  422. {
  423. if (m_lexer.is_eof()) {
  424. auto tokens = TRY(Token::maybe_from_state(m_state));
  425. m_state.buffer.clear();
  426. m_state.position.start_offset = m_state.position.end_offset;
  427. m_state.position.start_line = m_state.position.end_line;
  428. return ReductionResult {
  429. .tokens = move(tokens),
  430. .next_reduction = Reduction::End,
  431. };
  432. }
  433. if (!m_state.escaping && consume_specific('\\')) {
  434. m_state.escaping = true;
  435. m_state.buffer.append('\\');
  436. return ReductionResult {
  437. .tokens = {},
  438. .next_reduction = Reduction::HeredocContents,
  439. };
  440. }
  441. if (!m_state.escaping && consume_specific('$')) {
  442. m_state.buffer.append('$');
  443. if (m_lexer.next_is("("))
  444. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
  445. else
  446. m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
  447. return ReductionResult {
  448. .tokens = {},
  449. .next_reduction = Reduction::Expansion,
  450. };
  451. }
  452. if (!m_state.escaping && consume_specific('`')) {
  453. m_state.buffer.append('`');
  454. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
  455. return ReductionResult {
  456. .tokens = {},
  457. .next_reduction = Reduction::CommandExpansion,
  458. };
  459. }
  460. m_state.escaping = false;
  461. m_state.buffer.append(consume());
  462. return ReductionResult {
  463. .tokens = {},
  464. .next_reduction = Reduction::HeredocContents,
  465. };
  466. }
  467. ErrorOr<Lexer::ReductionResult> Lexer::reduce_start()
  468. {
  469. auto was_on_new_line = m_state.on_new_line;
  470. m_state.on_new_line = false;
  471. if (m_lexer.is_eof()) {
  472. auto tokens = TRY(Token::maybe_from_state(m_state));
  473. m_state.buffer.clear();
  474. m_state.expansions.clear();
  475. m_state.position.start_offset = m_state.position.end_offset;
  476. m_state.position.start_line = m_state.position.end_line;
  477. return ReductionResult {
  478. .tokens = move(tokens),
  479. .next_reduction = Reduction::End,
  480. };
  481. }
  482. if (was_on_new_line && !m_state.heredoc_entries.is_empty()) {
  483. auto const& entry = m_state.heredoc_entries.first();
  484. auto start_index = m_lexer.tell();
  485. Optional<size_t> end_index;
  486. for (; !m_lexer.is_eof();) {
  487. auto index = m_lexer.tell();
  488. auto possible_end_index = m_lexer.tell();
  489. if (m_lexer.consume_specific('\n')) {
  490. if (entry.dedent)
  491. m_lexer.ignore_while(is_any_of("\t"sv));
  492. if (m_lexer.consume_specific(entry.key.bytes_as_string_view())) {
  493. if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) {
  494. end_index = possible_end_index;
  495. break;
  496. }
  497. }
  498. }
  499. if (m_lexer.tell() == index)
  500. m_lexer.ignore();
  501. }
  502. auto contents = m_lexer.input().substring_view(start_index, end_index.value_or(m_lexer.tell()) - start_index);
  503. reconsume(contents);
  504. if (end_index.has_value())
  505. reconsume(m_lexer.input().substring_view_starting_after_substring(contents).substring_view(0, m_lexer.tell() - *end_index));
  506. m_state.buffer.clear();
  507. m_state.buffer.append(contents);
  508. auto token = TRY(Token::maybe_from_state(m_state)).first();
  509. token.relevant_heredoc_key = entry.key;
  510. token.type = Token::Type::HeredocContents;
  511. m_state.heredoc_entries.take_first();
  512. m_state.on_new_line = true;
  513. m_state.buffer.clear();
  514. m_state.position.start_offset = m_state.position.end_offset;
  515. m_state.position.start_line = m_state.position.end_line;
  516. Vector<Token> tokens { move(token), Token::newline() };
  517. return ReductionResult {
  518. .tokens = move(tokens),
  519. .next_reduction = Reduction::Start,
  520. };
  521. }
  522. if (m_state.escaping && consume_specific('\n')) {
  523. m_state.escaping = false;
  524. auto buffer = m_state.buffer.to_deprecated_string().substring(0, m_state.buffer.length() - 1);
  525. m_state.buffer.clear();
  526. m_state.buffer.append(buffer);
  527. return ReductionResult {
  528. .tokens = {},
  529. .next_reduction = Reduction::Start,
  530. };
  531. }
  532. if (!m_state.escaping && m_lexer.peek() == '#' && m_state.buffer.is_empty()) {
  533. consume();
  534. return ReductionResult {
  535. .tokens = {},
  536. .next_reduction = Reduction::Comment,
  537. };
  538. }
  539. if (!m_state.escaping && consume_specific('\n')) {
  540. auto tokens = TRY(Token::maybe_from_state(m_state));
  541. tokens.append(Token::newline());
  542. m_state.on_new_line = true;
  543. m_state.buffer.clear();
  544. m_state.expansions.clear();
  545. m_state.position.start_offset = m_state.position.end_offset;
  546. m_state.position.start_line = m_state.position.end_line;
  547. return ReductionResult {
  548. .tokens = move(tokens),
  549. .next_reduction = Reduction::Start,
  550. };
  551. }
  552. if (!m_state.escaping && consume_specific('\\')) {
  553. m_state.escaping = true;
  554. m_state.buffer.append('\\');
  555. return ReductionResult {
  556. .tokens = {},
  557. .next_reduction = Reduction::Start,
  558. };
  559. }
  560. if (!m_state.escaping && consume_specific('\'')) {
  561. m_state.buffer.append('\'');
  562. return ReductionResult {
  563. .tokens = {},
  564. .next_reduction = Reduction::SingleQuotedString,
  565. };
  566. }
  567. if (!m_state.escaping && consume_specific('"')) {
  568. m_state.buffer.append('"');
  569. return ReductionResult {
  570. .tokens = {},
  571. .next_reduction = Reduction::DoubleQuotedString,
  572. };
  573. }
  574. if (!m_state.escaping && is_ascii_space(m_lexer.peek())) {
  575. consume();
  576. auto tokens = TRY(Token::maybe_from_state(m_state));
  577. m_state.buffer.clear();
  578. m_state.expansions.clear();
  579. m_state.position.start_offset = m_state.position.end_offset;
  580. m_state.position.start_line = m_state.position.end_line;
  581. return ReductionResult {
  582. .tokens = move(tokens),
  583. .next_reduction = Reduction::Start,
  584. };
  585. }
  586. if (!m_state.escaping && consume_specific('$')) {
  587. m_state.buffer.append('$');
  588. if (m_lexer.next_is("("))
  589. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
  590. else
  591. m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
  592. return ReductionResult {
  593. .tokens = {},
  594. .next_reduction = Reduction::Expansion,
  595. };
  596. }
  597. if (!m_state.escaping && consume_specific('`')) {
  598. m_state.buffer.append('`');
  599. m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
  600. return ReductionResult {
  601. .tokens = {},
  602. .next_reduction = Reduction::CommandExpansion,
  603. };
  604. }
  605. if (!m_state.escaping && m_state.in_skip_mode && is_any_of("})"sv)(m_lexer.peek())) {
  606. // That's an eof for us.
  607. return ReductionResult {
  608. .tokens = {},
  609. .next_reduction = Reduction::None,
  610. };
  611. }
  612. if (!m_state.escaping && is_part_of_operator(""sv, m_lexer.peek())) {
  613. auto tokens = TRY(Token::maybe_from_state(m_state));
  614. m_state.buffer.clear();
  615. m_state.buffer.append(consume());
  616. m_state.expansions.clear();
  617. m_state.position.start_offset = m_state.position.end_offset;
  618. m_state.position.start_line = m_state.position.end_line;
  619. return ReductionResult {
  620. .tokens = move(tokens),
  621. .next_reduction = Reduction::Operator,
  622. };
  623. }
  624. m_state.escaping = false;
  625. m_state.buffer.append(consume());
  626. return ReductionResult {
  627. .tokens = {},
  628. .next_reduction = Reduction::Start,
  629. };
  630. }
  631. ErrorOr<Lexer::ReductionResult> Lexer::reduce_arithmetic_expansion()
  632. {
  633. if (m_lexer.is_eof()) {
  634. auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
  635. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  636. return ReductionResult {
  637. .tokens = { Token::continuation("$(("_string) },
  638. .next_reduction = m_state.previous_reduction,
  639. };
  640. }
  641. if (m_lexer.peek() == ')' && m_state.buffer.string_view().ends_with(')')) {
  642. m_state.buffer.append(consume());
  643. auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
  644. expansion.expression = TRY(String::from_utf8(expansion.value.string_view().substring_view(0, expansion.value.length() - 1)));
  645. expansion.value.clear();
  646. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  647. return ReductionResult {
  648. .tokens = {},
  649. .next_reduction = m_state.previous_reduction,
  650. };
  651. }
  652. auto ch = consume();
  653. m_state.buffer.append(ch);
  654. m_state.expansions.last().get<ArithmeticExpansion>().value.append(ch);
  655. return ReductionResult {
  656. .tokens = {},
  657. .next_reduction = Reduction::ArithmeticExpansion,
  658. };
  659. }
  660. ErrorOr<Lexer::ReductionResult> Lexer::reduce_special_parameter_expansion()
  661. {
  662. auto ch = consume();
  663. m_state.buffer.append(ch);
  664. m_state.expansions.last() = ParameterExpansion {
  665. .parameter = StringBuilder {},
  666. .range = range(-2),
  667. };
  668. auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
  669. expansion.parameter.append(ch);
  670. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  671. return ReductionResult {
  672. .tokens = {},
  673. .next_reduction = m_state.previous_reduction,
  674. };
  675. }
  676. ErrorOr<Lexer::ReductionResult> Lexer::reduce_parameter_expansion()
  677. {
  678. auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
  679. if (m_lexer.is_eof()) {
  680. return ReductionResult {
  681. .tokens = {},
  682. .next_reduction = Reduction::Start,
  683. };
  684. }
  685. auto next = m_lexer.peek();
  686. if (is_ascii_alphanumeric(next) || next == '_') {
  687. m_state.buffer.append(consume());
  688. expansion.parameter.append(next);
  689. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  690. return ReductionResult {
  691. .tokens = {},
  692. .next_reduction = Reduction::ParameterExpansion,
  693. };
  694. }
  695. return reduce(m_state.previous_reduction);
  696. }
  697. ErrorOr<Lexer::ReductionResult> Lexer::reduce_command_or_arithmetic_substitution_expansion()
  698. {
  699. auto ch = m_lexer.peek();
  700. if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) {
  701. m_state.buffer.append(consume());
  702. m_state.expansions.last() = ArithmeticExpansion {
  703. .expression = {},
  704. .value = StringBuilder {},
  705. .range = range(-2)
  706. };
  707. return ReductionResult {
  708. .tokens = {},
  709. .next_reduction = Reduction::ArithmeticExpansion,
  710. };
  711. }
  712. auto saved_position = m_state.position;
  713. {
  714. auto skip_mode = switch_to_skip_mode();
  715. auto next_reduction = Reduction::Start;
  716. do {
  717. auto result = TRY(reduce(next_reduction));
  718. next_reduction = result.next_reduction;
  719. } while (next_reduction != Reduction::None);
  720. saved_position = m_state.position;
  721. }
  722. auto const skipped_text = m_lexer.input().substring_view(m_state.position.end_offset, saved_position.end_offset - m_state.position.end_offset);
  723. m_state.position.end_offset = saved_position.end_offset;
  724. m_state.position.end_line = saved_position.end_line;
  725. m_state.buffer.append(skipped_text);
  726. m_state.expansions.last().get<CommandExpansion>().command.append(skipped_text);
  727. m_state.expansions.last().visit([&](auto& expansion) {
  728. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  729. });
  730. if (m_lexer.is_eof()) {
  731. return ReductionResult {
  732. .tokens = { Token::continuation("$("_string) },
  733. .next_reduction = m_state.previous_reduction,
  734. };
  735. }
  736. ch = m_lexer.peek();
  737. if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) {
  738. m_state.buffer.append(consume());
  739. m_state.expansions.last() = ArithmeticExpansion {
  740. .expression = {},
  741. .value = m_state.expansions.last().get<CommandExpansion>().command,
  742. .range = range(-2)
  743. };
  744. return ReductionResult {
  745. .tokens = {},
  746. .next_reduction = Reduction::ArithmeticExpansion,
  747. };
  748. }
  749. if (ch == ')') {
  750. m_state.buffer.append(consume());
  751. m_state.expansions.last().visit([&](auto& expansion) {
  752. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  753. });
  754. return ReductionResult {
  755. .tokens = {},
  756. .next_reduction = m_state.previous_reduction,
  757. };
  758. }
  759. m_state.buffer.append(consume());
  760. m_state.expansions.last().get<CommandExpansion>().command.append(ch);
  761. return ReductionResult {
  762. .tokens = {},
  763. .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
  764. };
  765. }
  766. ErrorOr<Lexer::ReductionResult> Lexer::reduce_extended_parameter_expansion()
  767. {
  768. auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
  769. if (m_lexer.is_eof()) {
  770. return ReductionResult {
  771. .tokens = { Token::continuation("${"_string) },
  772. .next_reduction = m_state.previous_reduction,
  773. };
  774. }
  775. auto ch = m_lexer.peek();
  776. if (ch == '}') {
  777. m_state.buffer.append(consume());
  778. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  779. return ReductionResult {
  780. .tokens = {},
  781. .next_reduction = m_state.previous_reduction,
  782. };
  783. }
  784. m_state.buffer.append(consume());
  785. expansion.parameter.append(ch);
  786. expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
  787. return ReductionResult {
  788. .tokens = {},
  789. .next_reduction = Reduction::ExtendedParameterExpansion,
  790. };
  791. }
  792. StringView Token::type_name() const
  793. {
  794. switch (type) {
  795. case Type::Eof:
  796. return "Eof"sv;
  797. case Type::Newline:
  798. return "Newline"sv;
  799. case Type::Continuation:
  800. return "Continuation"sv;
  801. case Type::Token:
  802. return "Token"sv;
  803. case Type::And:
  804. return "And"sv;
  805. case Type::Pipe:
  806. return "Pipe"sv;
  807. case Type::OpenParen:
  808. return "OpenParen"sv;
  809. case Type::CloseParen:
  810. return "CloseParen"sv;
  811. case Type::Great:
  812. return "Great"sv;
  813. case Type::Less:
  814. return "Less"sv;
  815. case Type::AndIf:
  816. return "AndIf"sv;
  817. case Type::OrIf:
  818. return "OrIf"sv;
  819. case Type::DoubleSemicolon:
  820. return "DoubleSemicolon"sv;
  821. case Type::DoubleLess:
  822. return "DoubleLess"sv;
  823. case Type::DoubleGreat:
  824. return "DoubleGreat"sv;
  825. case Type::LessAnd:
  826. return "LessAnd"sv;
  827. case Type::GreatAnd:
  828. return "GreatAnd"sv;
  829. case Type::LessGreat:
  830. return "LessGreat"sv;
  831. case Type::DoubleLessDash:
  832. return "DoubleLessDash"sv;
  833. case Type::Clobber:
  834. return "Clobber"sv;
  835. case Type::Semicolon:
  836. return "Semicolon"sv;
  837. case Type::HeredocContents:
  838. return "HeredocContents"sv;
  839. case Type::AssignmentWord:
  840. return "AssignmentWord"sv;
  841. case Type::Bang:
  842. return "Bang"sv;
  843. case Type::Case:
  844. return "Case"sv;
  845. case Type::CloseBrace:
  846. return "CloseBrace"sv;
  847. case Type::Do:
  848. return "Do"sv;
  849. case Type::Done:
  850. return "Done"sv;
  851. case Type::Elif:
  852. return "Elif"sv;
  853. case Type::Else:
  854. return "Else"sv;
  855. case Type::Esac:
  856. return "Esac"sv;
  857. case Type::Fi:
  858. return "Fi"sv;
  859. case Type::For:
  860. return "For"sv;
  861. case Type::If:
  862. return "If"sv;
  863. case Type::In:
  864. return "In"sv;
  865. case Type::IoNumber:
  866. return "IoNumber"sv;
  867. case Type::OpenBrace:
  868. return "OpenBrace"sv;
  869. case Type::Then:
  870. return "Then"sv;
  871. case Type::Until:
  872. return "Until"sv;
  873. case Type::VariableName:
  874. return "VariableName"sv;
  875. case Type::While:
  876. return "While"sv;
  877. case Type::Word:
  878. return "Word"sv;
  879. }
  880. return "Idk"sv;
  881. }
  882. }