Shell: Convert the POSIX parser/lexer to ErrorOr

This commit is contained in:
Ali Mohammad Pur 2023-02-19 19:31:17 +03:30 committed by Ali Mohammad Pur
parent 0c28fd41ed
commit b2b851b361
Notes: sideshowbarker 2024-07-17 07:31:31 +09:00
4 changed files with 358 additions and 332 deletions

View file

@ -23,19 +23,19 @@ static bool is_part_of_operator(StringView text, char ch)
namespace Shell::Posix { namespace Shell::Posix {
Vector<Token> Lexer::batch_next(Optional<Reduction> starting_reduction) ErrorOr<Vector<Token>> Lexer::batch_next(Optional<Reduction> starting_reduction)
{ {
if (starting_reduction.has_value()) if (starting_reduction.has_value())
m_next_reduction = *starting_reduction; m_next_reduction = *starting_reduction;
for (; m_next_reduction != Reduction::None;) { for (; m_next_reduction != Reduction::None;) {
auto result = reduce(m_next_reduction); auto result = TRY(reduce(m_next_reduction));
m_next_reduction = result.next_reduction; m_next_reduction = result.next_reduction;
if (!result.tokens.is_empty()) if (!result.tokens.is_empty())
return result.tokens; return result.tokens;
} }
return {}; return Vector<Token> {};
} }
ExpansionRange Lexer::range(ssize_t offset) const ExpansionRange Lexer::range(ssize_t offset) const
@ -79,11 +79,11 @@ bool Lexer::consume_specific(char ch)
return false; return false;
} }
Lexer::ReductionResult Lexer::reduce(Reduction reduction) ErrorOr<Lexer::ReductionResult> Lexer::reduce(Reduction reduction)
{ {
switch (reduction) { switch (reduction) {
case Reduction::None: case Reduction::None:
return { {}, Reduction::None }; return ReductionResult { {}, Reduction::None };
case Reduction::End: case Reduction::End:
return reduce_end(); return reduce_end();
case Reduction::Operator: case Reduction::Operator:
@ -117,9 +117,9 @@ Lexer::ReductionResult Lexer::reduce(Reduction reduction)
VERIFY_NOT_REACHED(); VERIFY_NOT_REACHED();
} }
Lexer::ReductionResult Lexer::reduce_end() ErrorOr<Lexer::ReductionResult> Lexer::reduce_end()
{ {
return { return ReductionResult {
.tokens = { Token::eof() }, .tokens = { Token::eof() },
.next_reduction = Reduction::None, .next_reduction = Reduction::None,
}; };
@ -205,21 +205,21 @@ Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token)
// NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/ // NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/
return { return {
.key = builder.to_deprecated_string(), .key = builder.to_string().release_value_but_fixme_should_propagate_errors(),
.allow_interpolation = !had_a_single_quote_segment, .allow_interpolation = !had_a_single_quote_segment,
}; };
} }
Lexer::ReductionResult Lexer::reduce_operator() ErrorOr<Lexer::ReductionResult> Lexer::reduce_operator()
{ {
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
if (is_operator(m_state.buffer.string_view())) { if (is_operator(m_state.buffer.string_view())) {
auto tokens = Token::operators_from(m_state); auto tokens = TRY(Token::operators_from(m_state));
m_state.buffer.clear(); m_state.buffer.clear();
m_state.position.start_offset = m_state.position.end_offset; m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line; m_state.position.start_line = m_state.position.end_line;
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = Reduction::End, .next_reduction = Reduction::End,
}; };
@ -230,7 +230,7 @@ Lexer::ReductionResult Lexer::reduce_operator()
if (is_part_of_operator(m_state.buffer.string_view(), m_lexer.peek())) { if (is_part_of_operator(m_state.buffer.string_view(), m_lexer.peek())) {
m_state.buffer.append(consume()); m_state.buffer.append(consume());
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Operator, .next_reduction = Reduction::Operator,
}; };
@ -238,7 +238,7 @@ Lexer::ReductionResult Lexer::reduce_operator()
auto tokens = Vector<Token> {}; auto tokens = Vector<Token> {};
if (is_operator(m_state.buffer.string_view())) { if (is_operator(m_state.buffer.string_view())) {
tokens.extend(Token::operators_from(m_state)); tokens.extend(TRY(Token::operators_from(m_state)));
m_state.buffer.clear(); m_state.buffer.clear();
m_state.position.start_offset = m_state.position.end_offset; m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line; m_state.position.start_line = m_state.position.end_line;
@ -246,11 +246,11 @@ Lexer::ReductionResult Lexer::reduce_operator()
auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess); auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess);
auto result = reduce(Reduction::Start); auto result = TRY(reduce(Reduction::Start));
tokens.extend(move(result.tokens)); tokens.extend(move(result.tokens));
while (expect_heredoc_entry && tokens.size() == 1) { while (expect_heredoc_entry && tokens.size() == 1) {
result = reduce(result.next_reduction); result = TRY(reduce(result.next_reduction));
tokens.extend(move(result.tokens)); tokens.extend(move(result.tokens));
} }
@ -263,16 +263,16 @@ Lexer::ReductionResult Lexer::reduce_operator()
}); });
} }
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = result.next_reduction, .next_reduction = result.next_reduction,
}; };
} }
Lexer::ReductionResult Lexer::reduce_comment() ErrorOr<Lexer::ReductionResult> Lexer::reduce_comment()
{ {
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::End, .next_reduction = Reduction::End,
}; };
@ -280,24 +280,24 @@ Lexer::ReductionResult Lexer::reduce_comment()
if (consume() == '\n') { if (consume() == '\n') {
m_state.on_new_line = true; m_state.on_new_line = true;
return { return ReductionResult {
.tokens = { Token::newline() }, .tokens = { Token::newline() },
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
} }
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Comment, .next_reduction = Reduction::Comment,
}; };
} }
Lexer::ReductionResult Lexer::reduce_single_quoted_string() ErrorOr<Lexer::ReductionResult> Lexer::reduce_single_quoted_string()
{ {
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
auto tokens = Token::maybe_from_state(m_state); auto tokens = TRY(Token::maybe_from_state(m_state));
tokens.append(Token::continuation('\'')); tokens.append(Token::continuation('\''));
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = Reduction::End, .next_reduction = Reduction::End,
}; };
@ -307,25 +307,25 @@ Lexer::ReductionResult Lexer::reduce_single_quoted_string()
m_state.buffer.append(ch); m_state.buffer.append(ch);
if (ch == '\'') { if (ch == '\'') {
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
} }
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::SingleQuotedString, .next_reduction = Reduction::SingleQuotedString,
}; };
} }
Lexer::ReductionResult Lexer::reduce_double_quoted_string() ErrorOr<Lexer::ReductionResult> Lexer::reduce_double_quoted_string()
{ {
m_state.previous_reduction = Reduction::DoubleQuotedString; m_state.previous_reduction = Reduction::DoubleQuotedString;
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
auto tokens = Token::maybe_from_state(m_state); auto tokens = TRY(Token::maybe_from_state(m_state));
tokens.append(Token::continuation('"')); tokens.append(Token::continuation('"'));
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = Reduction::End, .next_reduction = Reduction::End,
}; };
@ -337,7 +337,7 @@ Lexer::ReductionResult Lexer::reduce_double_quoted_string()
if (m_state.escaping) { if (m_state.escaping) {
m_state.escaping = false; m_state.escaping = false;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::DoubleQuotedString, .next_reduction = Reduction::DoubleQuotedString,
}; };
@ -346,13 +346,13 @@ Lexer::ReductionResult Lexer::reduce_double_quoted_string()
switch (ch) { switch (ch) {
case '\\': case '\\':
m_state.escaping = true; m_state.escaping = true;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::DoubleQuotedString, .next_reduction = Reduction::DoubleQuotedString,
}; };
case '"': case '"':
m_state.previous_reduction = Reduction::Start; m_state.previous_reduction = Reduction::Start;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
@ -361,25 +361,25 @@ Lexer::ReductionResult Lexer::reduce_double_quoted_string()
m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
else else
m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() }); m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Expansion, .next_reduction = Reduction::Expansion,
}; };
case '`': case '`':
m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::CommandExpansion, .next_reduction = Reduction::CommandExpansion,
}; };
default: default:
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::DoubleQuotedString, .next_reduction = Reduction::DoubleQuotedString,
}; };
} }
} }
Lexer::ReductionResult Lexer::reduce_expansion() ErrorOr<Lexer::ReductionResult> Lexer::reduce_expansion()
{ {
if (m_lexer.is_eof()) if (m_lexer.is_eof())
return reduce(m_state.previous_reduction); return reduce(m_state.previous_reduction);
@ -390,14 +390,14 @@ Lexer::ReductionResult Lexer::reduce_expansion()
case '{': case '{':
consume(); consume();
m_state.buffer.append(ch); m_state.buffer.append(ch);
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::ExtendedParameterExpansion, .next_reduction = Reduction::ExtendedParameterExpansion,
}; };
case '(': case '(':
consume(); consume();
m_state.buffer.append(ch); m_state.buffer.append(ch);
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion, .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
}; };
@ -410,7 +410,7 @@ Lexer::ReductionResult Lexer::reduce_expansion()
expansion.parameter.append(ch); expansion.parameter.append(ch);
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::ParameterExpansion, .next_reduction = Reduction::ParameterExpansion,
}; };
@ -430,13 +430,13 @@ Lexer::ReductionResult Lexer::reduce_expansion()
} }
} }
Lexer::ReductionResult Lexer::reduce_command_expansion() ErrorOr<Lexer::ReductionResult> Lexer::reduce_command_expansion()
{ {
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
auto& expansion = m_state.expansions.last().get<CommandExpansion>(); auto& expansion = m_state.expansions.last().get<CommandExpansion>();
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = { Token::continuation('`') }, .tokens = { Token::continuation('`') },
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
@ -449,7 +449,7 @@ Lexer::ReductionResult Lexer::reduce_command_expansion()
auto& expansion = m_state.expansions.last().get<CommandExpansion>(); auto& expansion = m_state.expansions.last().get<CommandExpansion>();
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
@ -457,7 +457,7 @@ Lexer::ReductionResult Lexer::reduce_command_expansion()
if (!m_state.escaping && ch == '\\') { if (!m_state.escaping && ch == '\\') {
m_state.escaping = true; m_state.escaping = true;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::CommandExpansion, .next_reduction = Reduction::CommandExpansion,
}; };
@ -466,21 +466,21 @@ Lexer::ReductionResult Lexer::reduce_command_expansion()
m_state.escaping = false; m_state.escaping = false;
m_state.buffer.append(ch); m_state.buffer.append(ch);
m_state.expansions.last().get<CommandExpansion>().command.append(ch); m_state.expansions.last().get<CommandExpansion>().command.append(ch);
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::CommandExpansion, .next_reduction = Reduction::CommandExpansion,
}; };
} }
Lexer::ReductionResult Lexer::reduce_heredoc_contents() ErrorOr<Lexer::ReductionResult> Lexer::reduce_heredoc_contents()
{ {
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
auto tokens = Token::maybe_from_state(m_state); auto tokens = TRY(Token::maybe_from_state(m_state));
m_state.buffer.clear(); m_state.buffer.clear();
m_state.position.start_offset = m_state.position.end_offset; m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line; m_state.position.start_line = m_state.position.end_line;
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = Reduction::End, .next_reduction = Reduction::End,
}; };
@ -489,7 +489,7 @@ Lexer::ReductionResult Lexer::reduce_heredoc_contents()
if (!m_state.escaping && consume_specific('\\')) { if (!m_state.escaping && consume_specific('\\')) {
m_state.escaping = true; m_state.escaping = true;
m_state.buffer.append('\\'); m_state.buffer.append('\\');
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::HeredocContents, .next_reduction = Reduction::HeredocContents,
}; };
@ -502,7 +502,7 @@ Lexer::ReductionResult Lexer::reduce_heredoc_contents()
else else
m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() }); m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Expansion, .next_reduction = Reduction::Expansion,
}; };
@ -511,7 +511,7 @@ Lexer::ReductionResult Lexer::reduce_heredoc_contents()
if (!m_state.escaping && consume_specific('`')) { if (!m_state.escaping && consume_specific('`')) {
m_state.buffer.append('`'); m_state.buffer.append('`');
m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::CommandExpansion, .next_reduction = Reduction::CommandExpansion,
}; };
@ -519,25 +519,25 @@ Lexer::ReductionResult Lexer::reduce_heredoc_contents()
m_state.escaping = false; m_state.escaping = false;
m_state.buffer.append(consume()); m_state.buffer.append(consume());
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::HeredocContents, .next_reduction = Reduction::HeredocContents,
}; };
} }
Lexer::ReductionResult Lexer::reduce_start() ErrorOr<Lexer::ReductionResult> Lexer::reduce_start()
{ {
auto was_on_new_line = m_state.on_new_line; auto was_on_new_line = m_state.on_new_line;
m_state.on_new_line = false; m_state.on_new_line = false;
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
auto tokens = Token::maybe_from_state(m_state); auto tokens = TRY(Token::maybe_from_state(m_state));
m_state.buffer.clear(); m_state.buffer.clear();
m_state.expansions.clear(); m_state.expansions.clear();
m_state.position.start_offset = m_state.position.end_offset; m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line; m_state.position.start_line = m_state.position.end_line;
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = Reduction::End, .next_reduction = Reduction::End,
}; };
@ -555,7 +555,7 @@ Lexer::ReductionResult Lexer::reduce_start()
if (m_lexer.consume_specific('\n')) { if (m_lexer.consume_specific('\n')) {
if (entry.dedent) if (entry.dedent)
m_lexer.ignore_while(is_any_of("\t"sv)); m_lexer.ignore_while(is_any_of("\t"sv));
if (m_lexer.consume_specific(entry.key.view())) { if (m_lexer.consume_specific(entry.key.bytes_as_string_view())) {
if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) { if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) {
end_index = possible_end_index; end_index = possible_end_index;
break; break;
@ -572,7 +572,7 @@ Lexer::ReductionResult Lexer::reduce_start()
m_state.buffer.clear(); m_state.buffer.clear();
m_state.buffer.append(contents); m_state.buffer.append(contents);
auto token = Token::maybe_from_state(m_state).first(); auto token = TRY(Token::maybe_from_state(m_state)).first();
token.relevant_heredoc_key = entry.key; token.relevant_heredoc_key = entry.key;
token.type = Token::Type::HeredocContents; token.type = Token::Type::HeredocContents;
@ -582,7 +582,7 @@ Lexer::ReductionResult Lexer::reduce_start()
m_state.buffer.clear(); m_state.buffer.clear();
return { return ReductionResult {
.tokens = { move(token) }, .tokens = { move(token) },
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
@ -595,7 +595,7 @@ Lexer::ReductionResult Lexer::reduce_start()
m_state.buffer.clear(); m_state.buffer.clear();
m_state.buffer.append(buffer); m_state.buffer.append(buffer);
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
@ -603,14 +603,14 @@ Lexer::ReductionResult Lexer::reduce_start()
if (!m_state.escaping && m_lexer.peek() == '#' && m_state.buffer.is_empty()) { if (!m_state.escaping && m_lexer.peek() == '#' && m_state.buffer.is_empty()) {
consume(); consume();
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Comment, .next_reduction = Reduction::Comment,
}; };
} }
if (!m_state.escaping && consume_specific('\n')) { if (!m_state.escaping && consume_specific('\n')) {
auto tokens = Token::maybe_from_state(m_state); auto tokens = TRY(Token::maybe_from_state(m_state));
tokens.append(Token::newline()); tokens.append(Token::newline());
m_state.on_new_line = true; m_state.on_new_line = true;
@ -620,7 +620,7 @@ Lexer::ReductionResult Lexer::reduce_start()
m_state.position.start_offset = m_state.position.end_offset; m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line; m_state.position.start_line = m_state.position.end_line;
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
@ -629,21 +629,21 @@ Lexer::ReductionResult Lexer::reduce_start()
if (!m_state.escaping && consume_specific('\\')) { if (!m_state.escaping && consume_specific('\\')) {
m_state.escaping = true; m_state.escaping = true;
m_state.buffer.append('\\'); m_state.buffer.append('\\');
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
} }
if (!m_state.escaping && is_part_of_operator(""sv, m_lexer.peek())) { if (!m_state.escaping && is_part_of_operator(""sv, m_lexer.peek())) {
auto tokens = Token::maybe_from_state(m_state); auto tokens = TRY(Token::maybe_from_state(m_state));
m_state.buffer.clear(); m_state.buffer.clear();
m_state.buffer.append(consume()); m_state.buffer.append(consume());
m_state.expansions.clear(); m_state.expansions.clear();
m_state.position.start_offset = m_state.position.end_offset; m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line; m_state.position.start_line = m_state.position.end_line;
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = Reduction::Operator, .next_reduction = Reduction::Operator,
}; };
@ -651,7 +651,7 @@ Lexer::ReductionResult Lexer::reduce_start()
if (!m_state.escaping && consume_specific('\'')) { if (!m_state.escaping && consume_specific('\'')) {
m_state.buffer.append('\''); m_state.buffer.append('\'');
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::SingleQuotedString, .next_reduction = Reduction::SingleQuotedString,
}; };
@ -659,7 +659,7 @@ Lexer::ReductionResult Lexer::reduce_start()
if (!m_state.escaping && consume_specific('"')) { if (!m_state.escaping && consume_specific('"')) {
m_state.buffer.append('"'); m_state.buffer.append('"');
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::DoubleQuotedString, .next_reduction = Reduction::DoubleQuotedString,
}; };
@ -667,13 +667,13 @@ Lexer::ReductionResult Lexer::reduce_start()
if (!m_state.escaping && is_ascii_space(m_lexer.peek())) { if (!m_state.escaping && is_ascii_space(m_lexer.peek())) {
consume(); consume();
auto tokens = Token::maybe_from_state(m_state); auto tokens = TRY(Token::maybe_from_state(m_state));
m_state.buffer.clear(); m_state.buffer.clear();
m_state.expansions.clear(); m_state.expansions.clear();
m_state.position.start_offset = m_state.position.end_offset; m_state.position.start_offset = m_state.position.end_offset;
m_state.position.start_line = m_state.position.end_line; m_state.position.start_line = m_state.position.end_line;
return { return ReductionResult {
.tokens = move(tokens), .tokens = move(tokens),
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
@ -686,7 +686,7 @@ Lexer::ReductionResult Lexer::reduce_start()
else else
m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() }); m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range() });
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Expansion, .next_reduction = Reduction::Expansion,
}; };
@ -695,7 +695,7 @@ Lexer::ReductionResult Lexer::reduce_start()
if (!m_state.escaping && consume_specific('`')) { if (!m_state.escaping && consume_specific('`')) {
m_state.buffer.append('`'); m_state.buffer.append('`');
m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() }); m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range() });
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::CommandExpansion, .next_reduction = Reduction::CommandExpansion,
}; };
@ -703,20 +703,20 @@ Lexer::ReductionResult Lexer::reduce_start()
m_state.escaping = false; m_state.escaping = false;
m_state.buffer.append(consume()); m_state.buffer.append(consume());
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
} }
Lexer::ReductionResult Lexer::reduce_arithmetic_expansion() ErrorOr<Lexer::ReductionResult> Lexer::reduce_arithmetic_expansion()
{ {
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>(); auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = { Token::continuation("$((") }, .tokens = { Token::continuation(String::from_utf8_short_string("$(("sv)) },
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
} }
@ -724,11 +724,11 @@ Lexer::ReductionResult Lexer::reduce_arithmetic_expansion()
if (m_lexer.peek() == ')' && m_state.buffer.string_view().ends_with(')')) { if (m_lexer.peek() == ')' && m_state.buffer.string_view().ends_with(')')) {
m_state.buffer.append(consume()); m_state.buffer.append(consume());
auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>(); auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
expansion.expression = expansion.value.to_deprecated_string().substring(0, expansion.value.length() - 1); expansion.expression = TRY(String::from_utf8(expansion.value.string_view().substring_view(0, expansion.value.length() - 1)));
expansion.value.clear(); expansion.value.clear();
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
@ -737,13 +737,13 @@ Lexer::ReductionResult Lexer::reduce_arithmetic_expansion()
auto ch = consume(); auto ch = consume();
m_state.buffer.append(ch); m_state.buffer.append(ch);
m_state.expansions.last().get<ArithmeticExpansion>().value.append(ch); m_state.expansions.last().get<ArithmeticExpansion>().value.append(ch);
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::ArithmeticExpansion, .next_reduction = Reduction::ArithmeticExpansion,
}; };
} }
Lexer::ReductionResult Lexer::reduce_special_parameter_expansion() ErrorOr<Lexer::ReductionResult> Lexer::reduce_special_parameter_expansion()
{ {
auto ch = consume(); auto ch = consume();
m_state.buffer.append(ch); m_state.buffer.append(ch);
@ -755,18 +755,18 @@ Lexer::ReductionResult Lexer::reduce_special_parameter_expansion()
expansion.parameter.append(ch); expansion.parameter.append(ch);
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
} }
Lexer::ReductionResult Lexer::reduce_parameter_expansion() ErrorOr<Lexer::ReductionResult> Lexer::reduce_parameter_expansion()
{ {
auto& expansion = m_state.expansions.last().get<ParameterExpansion>(); auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::Start, .next_reduction = Reduction::Start,
}; };
@ -778,7 +778,7 @@ Lexer::ReductionResult Lexer::reduce_parameter_expansion()
expansion.parameter.append(next); expansion.parameter.append(next);
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::ParameterExpansion, .next_reduction = Reduction::ParameterExpansion,
}; };
@ -787,11 +787,11 @@ Lexer::ReductionResult Lexer::reduce_parameter_expansion()
return reduce(m_state.previous_reduction); return reduce(m_state.previous_reduction);
} }
Lexer::ReductionResult Lexer::reduce_command_or_arithmetic_substitution_expansion() ErrorOr<Lexer::ReductionResult> Lexer::reduce_command_or_arithmetic_substitution_expansion()
{ {
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
return { return ReductionResult {
.tokens = { Token::continuation("$(") }, .tokens = { Token::continuation(String::from_utf8_short_string("$("sv)) },
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
} }
@ -800,11 +800,11 @@ Lexer::ReductionResult Lexer::reduce_command_or_arithmetic_substitution_expansio
if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) { if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) {
m_state.buffer.append(consume()); m_state.buffer.append(consume());
m_state.expansions.last() = ArithmeticExpansion { m_state.expansions.last() = ArithmeticExpansion {
.expression = "", .expression = {},
.value = StringBuilder {}, .value = StringBuilder {},
.range = range(-2) .range = range(-2)
}; };
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::ArithmeticExpansion, .next_reduction = Reduction::ArithmeticExpansion,
}; };
@ -815,7 +815,7 @@ Lexer::ReductionResult Lexer::reduce_command_or_arithmetic_substitution_expansio
m_state.expansions.last().visit([&](auto& expansion) { m_state.expansions.last().visit([&](auto& expansion) {
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
}); });
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
@ -823,19 +823,19 @@ Lexer::ReductionResult Lexer::reduce_command_or_arithmetic_substitution_expansio
m_state.buffer.append(consume()); m_state.buffer.append(consume());
m_state.expansions.last().get<CommandExpansion>().command.append(ch); m_state.expansions.last().get<CommandExpansion>().command.append(ch);
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion, .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
}; };
} }
Lexer::ReductionResult Lexer::reduce_extended_parameter_expansion() ErrorOr<Lexer::ReductionResult> Lexer::reduce_extended_parameter_expansion()
{ {
auto& expansion = m_state.expansions.last().get<ParameterExpansion>(); auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
if (m_lexer.is_eof()) { if (m_lexer.is_eof()) {
return { return ReductionResult {
.tokens = { Token::continuation("${") }, .tokens = { Token::continuation(String::from_utf8_short_string("${"sv)) },
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
} }
@ -845,7 +845,7 @@ Lexer::ReductionResult Lexer::reduce_extended_parameter_expansion()
m_state.buffer.append(consume()); m_state.buffer.append(consume());
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = m_state.previous_reduction, .next_reduction = m_state.previous_reduction,
}; };
@ -855,7 +855,7 @@ Lexer::ReductionResult Lexer::reduce_extended_parameter_expansion()
expansion.parameter.append(ch); expansion.parameter.append(ch);
expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset; expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
return { return ReductionResult {
.tokens = {}, .tokens = {},
.next_reduction = Reduction::ExtendedParameterExpansion, .next_reduction = Reduction::ExtendedParameterExpansion,
}; };

View file

@ -6,9 +6,9 @@
#pragma once #pragma once
#include <AK/DeprecatedString.h>
#include <AK/GenericLexer.h> #include <AK/GenericLexer.h>
#include <AK/Queue.h> #include <AK/Queue.h>
#include <AK/String.h>
#include <AK/Variant.h> #include <AK/Variant.h>
#include <AK/Vector.h> #include <AK/Vector.h>
#include <Shell/AST.h> #include <Shell/AST.h>
@ -51,7 +51,7 @@ struct CommandExpansion {
}; };
struct ArithmeticExpansion { struct ArithmeticExpansion {
DeprecatedString expression; String expression;
StringBuilder value; StringBuilder value;
ExpansionRange range; ExpansionRange range;
}; };
@ -59,8 +59,8 @@ struct ArithmeticExpansion {
using Expansion = Variant<ParameterExpansion, CommandExpansion, ArithmeticExpansion>; using Expansion = Variant<ParameterExpansion, CommandExpansion, ArithmeticExpansion>;
struct ResolvedParameterExpansion { struct ResolvedParameterExpansion {
DeprecatedString parameter; String parameter;
DeprecatedString argument; String argument;
ExpansionRange range; ExpansionRange range;
enum class Op { enum class Op {
UseDefaultValue, // ${parameter:-word} UseDefaultValue, // ${parameter:-word}
@ -182,7 +182,7 @@ struct ResolvedCommandExpansion {
using ResolvedExpansion = Variant<ResolvedParameterExpansion, ResolvedCommandExpansion>; using ResolvedExpansion = Variant<ResolvedParameterExpansion, ResolvedCommandExpansion>;
struct HeredocEntry { struct HeredocEntry {
DeprecatedString key; String key;
bool allow_interpolation; bool allow_interpolation;
bool dedent; bool dedent;
}; };
@ -257,27 +257,27 @@ struct Token {
}; };
Type type; Type type;
DeprecatedString value; String value;
Optional<AST::Position> position; Optional<AST::Position> position;
Vector<Expansion> expansions; Vector<Expansion> expansions;
Vector<ResolvedExpansion> resolved_expansions {}; Vector<ResolvedExpansion> resolved_expansions {};
StringView original_text; StringView original_text;
Optional<DeprecatedString> relevant_heredoc_key {}; Optional<String> relevant_heredoc_key {};
bool could_be_start_of_a_simple_command { false }; bool could_be_start_of_a_simple_command { false };
static Vector<Token> maybe_from_state(State const& state) static ErrorOr<Vector<Token>> maybe_from_state(State const& state)
{ {
if (state.buffer.is_empty() || state.buffer.string_view().trim_whitespace().is_empty()) if (state.buffer.is_empty() || state.buffer.string_view().trim_whitespace().is_empty())
return {}; return Vector<Token> {};
auto token = Token { auto token = Token {
.type = Type::Token, .type = Type::Token,
.value = state.buffer.to_deprecated_string(), .value = TRY(state.buffer.to_string()),
.position = state.position, .position = state.position,
.expansions = state.expansions, .expansions = state.expansions,
.original_text = {}, .original_text = {},
}; };
return { move(token) }; return Vector<Token> { move(token) };
} }
static Optional<Token::Type> operator_from_name(StringView name) static Optional<Token::Type> operator_from_name(StringView name)
@ -320,14 +320,14 @@ struct Token {
return {}; return {};
} }
static Vector<Token> operators_from(State const& state) static ErrorOr<Vector<Token>> operators_from(State const& state)
{ {
auto name = state.buffer.string_view(); auto name = TRY(state.buffer.to_string());
auto type = operator_from_name(name); auto type = operator_from_name(name);
if (!type.has_value()) if (!type.has_value())
return {}; return Vector<Token> {};
return { return Vector {
Token { Token {
.type = *type, .type = *type,
.value = name, .value = name,
@ -353,7 +353,7 @@ struct Token {
{ {
return { return {
.type = Type::Newline, .type = Type::Newline,
.value = "\n", .value = String::from_utf8_short_string("\n"sv),
.position = {}, .position = {},
.expansions = {}, .expansions = {},
.original_text = {}, .original_text = {},
@ -364,14 +364,14 @@ struct Token {
{ {
return { return {
.type = Type::Continuation, .type = Type::Continuation,
.value = DeprecatedString::formatted("{:c}", expected), .value = String::from_code_point(expected),
.position = {}, .position = {},
.expansions = {}, .expansions = {},
.original_text = {}, .original_text = {},
}; };
} }
static Token continuation(DeprecatedString expected) static Token continuation(String expected)
{ {
return { return {
.type = Type::Continuation, .type = Type::Continuation,
@ -392,10 +392,10 @@ public:
{ {
} }
Vector<Token> batch_next(Optional<Reduction> starting_reduction = {}); ErrorOr<Vector<Token>> batch_next(Optional<Reduction> starting_reduction = {});
struct HeredocKeyResult { struct HeredocKeyResult {
DeprecatedString key; String key;
bool allow_interpolation; bool allow_interpolation;
}; };
@ -407,21 +407,21 @@ private:
Reduction next_reduction { Reduction::None }; Reduction next_reduction { Reduction::None };
}; };
ReductionResult reduce(Reduction); ErrorOr<ReductionResult> reduce(Reduction);
ReductionResult reduce_end(); ErrorOr<ReductionResult> reduce_end();
ReductionResult reduce_operator(); ErrorOr<ReductionResult> reduce_operator();
ReductionResult reduce_comment(); ErrorOr<ReductionResult> reduce_comment();
ReductionResult reduce_single_quoted_string(); ErrorOr<ReductionResult> reduce_single_quoted_string();
ReductionResult reduce_double_quoted_string(); ErrorOr<ReductionResult> reduce_double_quoted_string();
ReductionResult reduce_expansion(); ErrorOr<ReductionResult> reduce_expansion();
ReductionResult reduce_command_expansion(); ErrorOr<ReductionResult> reduce_command_expansion();
ReductionResult reduce_start(); ErrorOr<ReductionResult> reduce_start();
ReductionResult reduce_arithmetic_expansion(); ErrorOr<ReductionResult> reduce_arithmetic_expansion();
ReductionResult reduce_special_parameter_expansion(); ErrorOr<ReductionResult> reduce_special_parameter_expansion();
ReductionResult reduce_parameter_expansion(); ErrorOr<ReductionResult> reduce_parameter_expansion();
ReductionResult reduce_command_or_arithmetic_substitution_expansion(); ErrorOr<ReductionResult> reduce_command_or_arithmetic_substitution_expansion();
ReductionResult reduce_extended_parameter_expansion(); ErrorOr<ReductionResult> reduce_extended_parameter_expansion();
ReductionResult reduce_heredoc_contents(); ErrorOr<ReductionResult> reduce_heredoc_contents();
char consume(); char consume();
bool consume_specific(char); bool consume_specific(char);

File diff suppressed because it is too large Load diff

View file

@ -18,7 +18,7 @@ public:
, m_in_interactive_mode(interactive) , m_in_interactive_mode(interactive)
, m_eof_token(Token::eof()) , m_eof_token(Token::eof())
{ {
fill_token_buffer(starting_reduction); (void)fill_token_buffer(starting_reduction);
} }
RefPtr<AST::Node> parse(); RefPtr<AST::Node> parse();
@ -31,9 +31,9 @@ public:
auto& errors() const { return m_errors; } auto& errors() const { return m_errors; }
private: private:
Optional<Token> next_expanded_token(Optional<Reduction> starting_reduction = {}); ErrorOr<Optional<Token>> next_expanded_token(Optional<Reduction> starting_reduction = {});
Vector<Token> perform_expansions(Vector<Token> tokens); Vector<Token> perform_expansions(Vector<Token> tokens);
void fill_token_buffer(Optional<Reduction> starting_reduction = {}); ErrorOr<void> fill_token_buffer(Optional<Reduction> starting_reduction = {});
void handle_heredoc_contents(); void handle_heredoc_contents();
Token const& peek() Token const& peek()
@ -66,34 +66,34 @@ private:
NonnullRefPtrVector<AST::Node> nodes; NonnullRefPtrVector<AST::Node> nodes;
}; };
RefPtr<AST::Node> parse_complete_command(); ErrorOr<RefPtr<AST::Node>> parse_complete_command();
RefPtr<AST::Node> parse_list(); ErrorOr<RefPtr<AST::Node>> parse_list();
RefPtr<AST::Node> parse_and_or(); ErrorOr<RefPtr<AST::Node>> parse_and_or();
RefPtr<AST::Node> parse_pipeline(); ErrorOr<RefPtr<AST::Node>> parse_pipeline();
RefPtr<AST::Node> parse_pipe_sequence(); ErrorOr<RefPtr<AST::Node>> parse_pipe_sequence();
RefPtr<AST::Node> parse_command(); ErrorOr<RefPtr<AST::Node>> parse_command();
RefPtr<AST::Node> parse_compound_command(); ErrorOr<RefPtr<AST::Node>> parse_compound_command();
RefPtr<AST::Node> parse_subshell(); ErrorOr<RefPtr<AST::Node>> parse_subshell();
RefPtr<AST::Node> parse_compound_list(); ErrorOr<RefPtr<AST::Node>> parse_compound_list();
RefPtr<AST::Node> parse_term(); ErrorOr<RefPtr<AST::Node>> parse_term();
RefPtr<AST::Node> parse_for_clause(); ErrorOr<RefPtr<AST::Node>> parse_for_clause();
RefPtr<AST::Node> parse_case_clause(); ErrorOr<RefPtr<AST::Node>> parse_case_clause();
CaseItemsResult parse_case_list(); ErrorOr<RefPtr<AST::Node>> parse_if_clause();
RefPtr<AST::Node> parse_if_clause(); ErrorOr<RefPtr<AST::Node>> parse_while_clause();
RefPtr<AST::Node> parse_while_clause(); ErrorOr<RefPtr<AST::Node>> parse_until_clause();
RefPtr<AST::Node> parse_until_clause(); ErrorOr<RefPtr<AST::Node>> parse_function_definition();
RefPtr<AST::Node> parse_function_definition(); ErrorOr<RefPtr<AST::Node>> parse_function_body();
RefPtr<AST::Node> parse_function_body(); ErrorOr<RefPtr<AST::Node>> parse_brace_group();
RefPtr<AST::Node> parse_brace_group(); ErrorOr<RefPtr<AST::Node>> parse_do_group();
RefPtr<AST::Node> parse_do_group(); ErrorOr<RefPtr<AST::Node>> parse_simple_command();
RefPtr<AST::Node> parse_simple_command(); ErrorOr<RefPtr<AST::Node>> parse_prefix();
RefPtr<AST::Node> parse_prefix(); ErrorOr<RefPtr<AST::Node>> parse_suffix();
RefPtr<AST::Node> parse_suffix(); ErrorOr<RefPtr<AST::Node>> parse_io_redirect();
RefPtr<AST::Node> parse_io_redirect(); ErrorOr<RefPtr<AST::Node>> parse_redirect_list();
RefPtr<AST::Node> parse_redirect_list(); ErrorOr<RefPtr<AST::Node>> parse_io_file(AST::Position, Optional<int> fd);
RefPtr<AST::Node> parse_io_file(AST::Position, Optional<int> fd); ErrorOr<RefPtr<AST::Node>> parse_io_here(AST::Position, Optional<int> fd);
RefPtr<AST::Node> parse_io_here(AST::Position, Optional<int> fd); ErrorOr<RefPtr<AST::Node>> parse_word();
RefPtr<AST::Node> parse_word(); ErrorOr<CaseItemsResult> parse_case_list();
template<typename... Ts> template<typename... Ts>
void error(Token const& token, CheckedFormatString<Ts...> fmt, Ts&&... args) void error(Token const& token, CheckedFormatString<Ts...> fmt, Ts&&... args)
@ -111,7 +111,7 @@ private:
Vector<Token> m_previous_token_buffer; Vector<Token> m_previous_token_buffer;
Vector<Error> m_errors; Vector<Error> m_errors;
HashMap<DeprecatedString, NonnullRefPtr<AST::Heredoc>> m_unprocessed_heredoc_entries; HashMap<String, NonnullRefPtr<AST::Heredoc>> m_unprocessed_heredoc_entries;
Token m_eof_token; Token m_eof_token;