|
@@ -23,8 +23,11 @@ static bool is_part_of_operator(StringView text, char ch)
|
|
|
|
|
|
namespace Shell::Posix {
|
|
|
|
|
|
-Vector<Token> Lexer::batch_next()
|
|
|
+Vector<Token> Lexer::batch_next(Optional<Reduction> starting_reduction)
|
|
|
{
|
|
|
+ if (starting_reduction.has_value())
|
|
|
+ m_next_reduction = *starting_reduction;
|
|
|
+
|
|
|
for (; m_next_reduction != Reduction::None;) {
|
|
|
auto result = reduce(m_next_reduction);
|
|
|
m_next_reduction = result.next_reduction;
|
|
@@ -55,6 +58,18 @@ char Lexer::consume()
|
|
|
return ch;
|
|
|
}
|
|
|
|
|
|
+void Lexer::reconsume(StringView string)
|
|
|
+{
|
|
|
+ for (auto byte : string.bytes()) {
|
|
|
+ if (byte == '\n') {
|
|
|
+ m_state.position.end_line.line_number++;
|
|
|
+ m_state.position.end_line.line_column = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ m_state.position.end_offset++;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
bool Lexer::consume_specific(char ch)
|
|
|
{
|
|
|
if (m_lexer.peek() == ch) {
|
|
@@ -95,6 +110,8 @@ Lexer::ReductionResult Lexer::reduce(Reduction reduction)
|
|
|
return reduce_command_or_arithmetic_substitution_expansion();
|
|
|
case Reduction::ExtendedParameterExpansion:
|
|
|
return reduce_extended_parameter_expansion();
|
|
|
+ case Reduction::HeredocContents:
|
|
|
+ return reduce_heredoc_contents();
|
|
|
}
|
|
|
|
|
|
VERIFY_NOT_REACHED();
|
|
@@ -108,6 +125,91 @@ Lexer::ReductionResult Lexer::reduce_end()
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token)
|
|
|
+{
|
|
|
+ StringBuilder builder;
|
|
|
+ enum ParseState {
|
|
|
+ Free,
|
|
|
+ InDoubleQuotes,
|
|
|
+ InSingleQuotes,
|
|
|
+ };
|
|
|
+ Vector<ParseState, 4> parse_state;
|
|
|
+ parse_state.append(Free);
|
|
|
+ bool escaped = false;
|
|
|
+ bool had_a_single_quote_segment = false;
|
|
|
+
|
|
|
+ for (auto byte : token.value.bytes()) {
|
|
|
+ switch (parse_state.last()) {
|
|
|
+ case Free:
|
|
|
+ switch (byte) {
|
|
|
+ case '"':
|
|
|
+ if (escaped) {
|
|
|
+ builder.append(byte);
|
|
|
+ escaped = false;
|
|
|
+ } else {
|
|
|
+ parse_state.append(InDoubleQuotes);
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case '\'':
|
|
|
+ if (escaped) {
|
|
|
+ builder.append(byte);
|
|
|
+ escaped = false;
|
|
|
+ } else {
|
|
|
+ had_a_single_quote_segment = true;
|
|
|
+ parse_state.append(InSingleQuotes);
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case '\\':
|
|
|
+ if (escaped) {
|
|
|
+ builder.append(byte);
|
|
|
+ escaped = false;
|
|
|
+ } else {
|
|
|
+ escaped = true;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ if (escaped) {
|
|
|
+ builder.append('\\');
|
|
|
+ escaped = false;
|
|
|
+ }
|
|
|
+ builder.append(byte);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case InDoubleQuotes:
|
|
|
+ if (!escaped && byte == '"') {
|
|
|
+ parse_state.take_last();
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (escaped) {
|
|
|
+ if (byte != '"')
|
|
|
+ builder.append('\\');
|
|
|
+ builder.append(byte);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (byte == '\\')
|
|
|
+ escaped = true;
|
|
|
+ else
|
|
|
+ builder.append(byte);
|
|
|
+ break;
|
|
|
+ case InSingleQuotes:
|
|
|
+ if (byte == '\'') {
|
|
|
+ parse_state.take_last();
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ builder.append(byte);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/
|
|
|
+
|
|
|
+ return {
|
|
|
+ .key = builder.to_deprecated_string(),
|
|
|
+ .allow_interpolation = !had_a_single_quote_segment,
|
|
|
+ };
|
|
|
+}
|
|
|
+
|
|
|
Lexer::ReductionResult Lexer::reduce_operator()
|
|
|
{
|
|
|
if (m_lexer.is_eof()) {
|
|
@@ -142,8 +244,25 @@ Lexer::ReductionResult Lexer::reduce_operator()
|
|
|
m_state.position.start_line = m_state.position.end_line;
|
|
|
}
|
|
|
|
|
|
+ auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess);
|
|
|
+
|
|
|
auto result = reduce(Reduction::Start);
|
|
|
tokens.extend(move(result.tokens));
|
|
|
+
|
|
|
+ while (expect_heredoc_entry && tokens.size() == 1) {
|
|
|
+ result = reduce(result.next_reduction);
|
|
|
+ tokens.extend(move(result.tokens));
|
|
|
+ }
|
|
|
+
|
|
|
+ if (expect_heredoc_entry && tokens.size() > 1) {
|
|
|
+ auto [key, interpolation] = process_heredoc_key(tokens[1]);
|
|
|
+ m_state.heredoc_entries.enqueue(HeredocEntry {
|
|
|
+ .key = key,
|
|
|
+ .allow_interpolation = interpolation,
|
|
|
+ .dedent = tokens[0].type == Token::Type::DoubleLessDash,
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
return {
|
|
|
.tokens = move(tokens),
|
|
|
.next_reduction = result.next_reduction,
|
|
@@ -160,6 +279,7 @@ Lexer::ReductionResult Lexer::reduce_comment()
|
|
|
}
|
|
|
|
|
|
if (consume() == '\n') {
|
|
|
+ m_state.on_new_line = true;
|
|
|
return {
|
|
|
.tokens = { Token::newline() },
|
|
|
.next_reduction = Reduction::Start,
|
|
@@ -352,8 +472,64 @@ Lexer::ReductionResult Lexer::reduce_command_expansion()
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+Lexer::ReductionResult Lexer::reduce_heredoc_contents()
|
|
|
+{
|
|
|
+ if (m_lexer.is_eof()) {
|
|
|
+ auto tokens = Token::maybe_from_state(m_state);
|
|
|
+ m_state.buffer.clear();
|
|
|
+ m_state.position.start_offset = m_state.position.end_offset;
|
|
|
+ m_state.position.start_line = m_state.position.end_line;
|
|
|
+
|
|
|
+ return {
|
|
|
+ .tokens = move(tokens),
|
|
|
+ .next_reduction = Reduction::End,
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!m_state.escaping && consume_specific('\\')) {
|
|
|
+ m_state.escaping = true;
|
|
|
+ m_state.buffer.append('\\');
|
|
|
+ return {
|
|
|
+ .tokens = {},
|
|
|
+ .next_reduction = Reduction::HeredocContents,
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!m_state.escaping && consume_specific('$')) {
|
|
|
+ m_state.buffer.append('$');
|
|
|
+ if (m_lexer.next_is("("))
|
|
|
+ m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
|
|
|
+ else
|
|
|
+ m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
|
|
|
+
|
|
|
+ return {
|
|
|
+ .tokens = {},
|
|
|
+ .next_reduction = Reduction::Expansion,
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!m_state.escaping && consume_specific('`')) {
|
|
|
+ m_state.buffer.append('`');
|
|
|
+ m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
|
|
|
+ return {
|
|
|
+ .tokens = {},
|
|
|
+ .next_reduction = Reduction::CommandExpansion,
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
+ m_state.escaping = false;
|
|
|
+ m_state.buffer.append(consume());
|
|
|
+ return {
|
|
|
+ .tokens = {},
|
|
|
+ .next_reduction = Reduction::HeredocContents,
|
|
|
+ };
|
|
|
+}
|
|
|
+
|
|
|
Lexer::ReductionResult Lexer::reduce_start()
|
|
|
{
|
|
|
+ auto was_on_new_line = m_state.on_new_line;
|
|
|
+ m_state.on_new_line = false;
|
|
|
+
|
|
|
if (m_lexer.is_eof()) {
|
|
|
auto tokens = Token::maybe_from_state(m_state);
|
|
|
m_state.buffer.clear();
|
|
@@ -366,6 +542,51 @@ Lexer::ReductionResult Lexer::reduce_start()
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+ if (was_on_new_line && !m_state.heredoc_entries.is_empty()) {
|
|
|
+ auto const& entry = m_state.heredoc_entries.head();
|
|
|
+
|
|
|
+ auto start_index = m_lexer.tell();
|
|
|
+ Optional<size_t> end_index;
|
|
|
+
|
|
|
+ for (; !m_lexer.is_eof();) {
|
|
|
+ auto index = m_lexer.tell();
|
|
|
+ auto possible_end_index = m_lexer.tell();
|
|
|
+ if (m_lexer.consume_specific('\n')) {
|
|
|
+ if (entry.dedent)
|
|
|
+ m_lexer.ignore_while(is_any_of("\t"sv));
|
|
|
+ if (m_lexer.consume_specific(entry.key.view())) {
|
|
|
+ if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) {
|
|
|
+ end_index = possible_end_index;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (m_lexer.tell() == index)
|
|
|
+ m_lexer.ignore();
|
|
|
+ }
|
|
|
+
|
|
|
+ auto contents = m_lexer.input().substring_view(start_index, end_index.value_or(m_lexer.tell()) - start_index);
|
|
|
+ reconsume(contents);
|
|
|
+
|
|
|
+ m_state.buffer.clear();
|
|
|
+ m_state.buffer.append(contents);
|
|
|
+
|
|
|
+ auto token = Token::maybe_from_state(m_state).first();
|
|
|
+ token.relevant_heredoc_key = entry.key;
|
|
|
+ token.type = Token::Type::HeredocContents;
|
|
|
+
|
|
|
+ m_state.heredoc_entries.dequeue();
|
|
|
+
|
|
|
+ m_state.on_new_line = true;
|
|
|
+
|
|
|
+ m_state.buffer.clear();
|
|
|
+
|
|
|
+ return {
|
|
|
+ .tokens = { move(token) },
|
|
|
+ .next_reduction = Reduction::Start,
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
if (m_state.escaping && consume_specific('\n')) {
|
|
|
m_state.escaping = false;
|
|
|
|
|
@@ -391,6 +612,8 @@ Lexer::ReductionResult Lexer::reduce_start()
|
|
|
auto tokens = Token::maybe_from_state(m_state);
|
|
|
tokens.append(Token::newline());
|
|
|
|
|
|
+ m_state.on_new_line = true;
|
|
|
+
|
|
|
m_state.buffer.clear();
|
|
|
m_state.position.start_offset = m_state.position.end_offset;
|
|
|
m_state.position.start_line = m_state.position.end_line;
|
|
@@ -678,6 +901,8 @@ StringView Token::type_name() const
|
|
|
return "Clobber"sv;
|
|
|
case Type::Semicolon:
|
|
|
return "Semicolon"sv;
|
|
|
+ case Type::HeredocContents:
|
|
|
+ return "HeredocContents"sv;
|
|
|
case Type::AssignmentWord:
|
|
|
return "AssignmentWord"sv;
|
|
|
case Type::Bang:
|