瀏覽代碼

Shell: Add support for heredocs

Closes #4283.
Heredocs are implemented in a way that makes them feel more like a
string (and not a weird redirection, a la bash).
There are two tunables, whether the string is dedented (`<<-` vs `<<~`)
and whether it allows interpolation (quoted key vs not).
To the familiar people, this is how Ruby handles them, and I feel is the
most elegant heredoc syntax.
Unlike the oddjob that is bash, heredocs are treated exactly as normal
strings, and can be used _anywhere_ where a string can be used.
They are *required* to appear in the same order as used after a newline
is seen when parsing the sequence that the heredoc is used in.
For instance:
```sh
echo <<-doc1 <<-doc2 | blah blah
contents for doc1
doc1
contents for doc2
doc2
```
The typical nice errors are also implemented :^)
Ali Mohammad Pur 4 年之前
父節點
當前提交
3048274f5e

+ 72 - 0
Userland/Shell/AST.cpp

@@ -1307,6 +1307,78 @@ Glob::~Glob()
 {
 }
 
+void Heredoc::dump(int level) const
+{
+    Node::dump(level);
+    print_indented("(End Key)", level + 1);
+    print_indented(m_end, level + 2);
+    print_indented("(Allows Interpolation)", level + 1);
+    print_indented(String::formatted("{}", m_allows_interpolation), level + 2);
+    print_indented("(Contents)", level + 1);
+    if (m_contents)
+        m_contents->dump(level + 2);
+    else
+        print_indented("(null)", level + 2);
+}
+
+RefPtr<Value> Heredoc::run(RefPtr<Shell> shell)
+{
+    if (!m_deindent)
+        return m_contents->run(shell);
+
+    // To deindent, first split to lines...
+    auto value = m_contents->run(shell);
+    if (!value)
+        return value;
+    auto list = value->resolve_as_list(shell);
+    // The list better have one entry, otherwise we've put the wrong kind of node inside this heredoc
+    VERIFY(list.size() == 1);
+    auto lines = list.first().split_view('\n');
+
+    // Now just trim each line and put them back in a string
+    StringBuilder builder { list.first().length() };
+    for (auto& line : lines) {
+        builder.append(line.trim_whitespace(TrimMode::Left));
+        builder.append('\n');
+    }
+
+    return create<StringValue>(builder.to_string());
+}
+
+void Heredoc::highlight_in_editor(Line::Editor& editor, Shell& shell, HighlightMetadata metadata)
+{
+    Line::Style content_style { Line::Style::Foreground(Line::Style::XtermColor::Yellow) };
+    if (metadata.is_first_in_list)
+        content_style.unify_with({ Line::Style::Bold });
+
+    if (!m_contents)
+        content_style.unify_with({ Line::Style::Foreground(Line::Style::XtermColor::Red) }, true);
+
+    editor.stylize({ m_position.start_offset, m_position.end_offset }, content_style);
+    if (m_contents)
+        m_contents->highlight_in_editor(editor, shell, metadata);
+}
+
+HitTestResult Heredoc::hit_test_position(size_t offset) const
+{
+    if (!m_contents)
+        return {};
+
+    return m_contents->hit_test_position(offset);
+}
+
+Heredoc::Heredoc(Position position, String end, bool allow_interpolation, bool deindent)
+    : Node(move(position))
+    , m_end(move(end))
+    , m_allows_interpolation(allow_interpolation)
+    , m_deindent(deindent)
+{
+}
+
+Heredoc::~Heredoc()
+{
+}
+
 void HistoryEvent::dump(int level) const
 {
     Node::dump(level);

+ 34 - 0
Userland/Shell/AST.h

@@ -474,6 +474,7 @@ public:
         ForLoop,
         FunctionDeclaration,
         Glob,
+        Heredoc,
         HistoryEvent,
         IfCond,
         ImmediateExpression,
@@ -1313,6 +1314,39 @@ private:
     NonnullRefPtr<Node> m_right;
 };
 
+class Heredoc final : public Node {
+public:
+    Heredoc(Position, String end, bool allow_interpolation, bool deindent);
+    virtual ~Heredoc();
+    virtual void visit(NodeVisitor& visitor) override { visitor.visit(this); }
+
+    const String& end() const { return m_end; }
+    bool allow_interpolation() const { return m_allows_interpolation; }
+    bool deindent() const { return m_deindent; }
+    const RefPtr<AST::Node>& contents() const { return m_contents; }
+    void set_contents(RefPtr<AST::Node> contents)
+    {
+        m_contents = move(contents);
+        if (m_contents->is_syntax_error())
+            set_is_syntax_error(m_contents->syntax_error_node());
+        else
+            clear_syntax_error();
+    }
+
+private:
+    NODE(Heredoc);
+    virtual void dump(int level) const override;
+    virtual RefPtr<Value> run(RefPtr<Shell>) override;
+    virtual void highlight_in_editor(Line::Editor&, Shell&, HighlightMetadata = {}) override;
+    virtual HitTestResult hit_test_position(size_t) const override;
+    virtual RefPtr<Node> leftmost_trivial_literal() const override { return this; };
+
+    String m_end;
+    bool m_allows_interpolation { false };
+    bool m_deindent { false };
+    RefPtr<AST::Node> m_contents;
+};
+
 class StringLiteral final : public Node {
 public:
     StringLiteral(Position, String);

+ 1 - 0
Userland/Shell/Forward.h

@@ -34,6 +34,7 @@ class Fd2FdRedirection;
 class FunctionDeclaration;
 class ForLoop;
 class Glob;
+class Heredoc;
 class HistoryEvent;
 class Execute;
 class IfCond;

+ 6 - 0
Userland/Shell/NodeVisitor.cpp

@@ -101,6 +101,12 @@ void NodeVisitor::visit(const AST::Glob*)
 {
 }
 
+void NodeVisitor::visit(const AST::Heredoc* node)
+{
+    if (node->contents())
+        node->contents()->visit(*this);
+}
+
 void NodeVisitor::visit(const AST::HistoryEvent*)
 {
 }

+ 1 - 0
Userland/Shell/NodeVisitor.h

@@ -30,6 +30,7 @@ public:
     virtual void visit(const AST::FunctionDeclaration*);
     virtual void visit(const AST::ForLoop*);
     virtual void visit(const AST::Glob*);
+    virtual void visit(const AST::Heredoc*);
     virtual void visit(const AST::HistoryEvent*);
     virtual void visit(const AST::Execute*);
     virtual void visit(const AST::IfCond*);

+ 223 - 8
Userland/Shell/Parser.cpp

@@ -7,6 +7,7 @@
 #include "Parser.h"
 #include "Shell.h"
 #include <AK/AllOf.h>
+#include <AK/ScopeGuard.h>
 #include <AK/ScopedValueRollback.h>
 #include <AK/TemporaryChange.h>
 #include <ctype.h>
@@ -187,9 +188,47 @@ RefPtr<AST::Node> Parser::parse_toplevel()
 
 Parser::SequenceParseResult Parser::parse_sequence()
 {
-    consume_while(is_any_of(" \t\n;")); // ignore whitespaces or terminators without effect.
-
     NonnullRefPtrVector<AST::Node> left;
+    auto read_terminators = [&](bool consider_tabs_and_spaces) {
+        if (m_heredoc_initiations.is_empty()) {
+        discard_terminators:;
+            consume_while(is_any_of(consider_tabs_and_spaces ? " \t\n;" : "\n;"));
+        } else {
+            for (;;) {
+                if (consider_tabs_and_spaces && (peek() == '\t' || peek() == ' ')) {
+                    consume();
+                    continue;
+                }
+                if (peek() == ';') {
+                    consume();
+                    continue;
+                }
+                if (peek() == '\n') {
+                    auto rule_start = push_start();
+                    consume();
+                    if (!parse_heredoc_entries()) {
+                        StringBuilder error_builder;
+                        error_builder.append("Expected to find heredoc entries for ");
+                        bool first = true;
+                        for (auto& entry : m_heredoc_initiations) {
+                            if (first)
+                                error_builder.appendff("{} (at {}:{})", entry.end, entry.node->position().start_line.line_column, entry.node->position().start_line.line_number);
+                            else
+                                error_builder.appendff(", {} (at {}:{})", entry.end, entry.node->position().start_line.line_column, entry.node->position().start_line.line_number);
+                            first = false;
+                        }
+                        left.append(create<AST::SyntaxError>(error_builder.build(), true));
+                        // Just read the rest of the newlines
+                        goto discard_terminators;
+                    }
+                    continue;
+                }
+                break;
+            }
+        }
+    };
+
+    read_terminators(true);
 
     auto rule_start = push_start();
     {
@@ -203,8 +242,10 @@ Parser::SequenceParseResult Parser::parse_sequence()
     switch (peek()) {
     case '}':
         return { move(left), {}, ShouldReadMoreSequences::No };
-    case ';':
-    case '\n': {
+    case '\n':
+        read_terminators(false);
+        [[fallthrough]];
+    case ';': {
         if (left.is_empty())
             break;
 
@@ -235,8 +276,10 @@ Parser::SequenceParseResult Parser::parse_sequence()
 
     pos_before_seps = save_offset();
     switch (peek()) {
-    case ';':
-    case '\n': {
+    case '\n':
+        read_terminators(false);
+        [[fallthrough]];
+    case ';': {
         consume_while(is_any_of("\n;"));
         auto pos_after_seps = save_offset();
         separator_positions.empend(pos_before_seps.offset, pos_after_seps.offset, pos_before_seps.line, pos_after_seps.line);
@@ -960,6 +1003,11 @@ RefPtr<AST::Node> Parser::parse_match_pattern()
 RefPtr<AST::Node> Parser::parse_redirection()
 {
     auto rule_start = push_start();
+
+    // heredoc entry
+    if (next_is("<<-") || next_is("<<~"))
+        return nullptr;
+
     auto pipe_fd = 0;
     auto number = consume_while(is_digit);
     if (number.is_empty()) {
@@ -1091,8 +1139,11 @@ RefPtr<AST::Node> Parser::parse_expression()
         return move(expr);
     };
 
-    if (strchr("&|)} ;<>\n", starting_char) != nullptr)
-        return nullptr;
+    // Heredocs are expressions, so allow them
+    if (!(next_is("<<-") || next_is("<<~"))) {
+        if (strchr("&|)} ;<>\n", starting_char) != nullptr)
+            return nullptr;
+    }
 
     if (m_extra_chars_not_allowed_in_barewords.contains_slow(starting_char))
         return nullptr;
@@ -1188,6 +1239,13 @@ RefPtr<AST::Node> Parser::parse_string_composite()
         return inline_command;
     }
 
+    if (auto heredoc = parse_heredoc_initiation_record()) {
+        if (auto next_part = parse_string_composite())
+            return create<AST::Juxtaposition>(heredoc.release_nonnull(), next_part.release_nonnull()); // Concatenate Heredoc StringComposite
+
+        return heredoc;
+    }
+
     return nullptr;
 }
 
@@ -1852,6 +1910,163 @@ RefPtr<AST::Node> Parser::parse_brace_expansion_spec()
     return create<AST::BraceExpansion>(move(subexpressions));
 }
 
+RefPtr<AST::Node> Parser::parse_heredoc_initiation_record()
+{
+    if (!next_is("<<"))
+        return nullptr;
+
+    auto rule_start = push_start();
+
+    // '<' '<'
+    consume();
+    consume();
+
+    HeredocInitiationRecord record;
+    record.end = "<error>";
+
+    RefPtr<AST::SyntaxError> syntax_error_node;
+
+    // '-' | '~'
+    switch (peek()) {
+    case '-':
+        record.deindent = false;
+        consume();
+        break;
+    case '~':
+        record.deindent = true;
+        consume();
+        break;
+    default:
+        restore_to(*rule_start);
+        return nullptr;
+    }
+
+    // StringLiteral | bareword
+    if (auto bareword = parse_bareword()) {
+        if (bareword->is_syntax_error())
+            syntax_error_node = bareword->syntax_error_node();
+        else
+            record.end = static_cast<AST::BarewordLiteral*>(bareword.ptr())->text();
+
+        record.interpolate = true;
+    } else if (peek() == '\'') {
+        consume();
+        auto text = consume_while(is_not('\''));
+        bool is_error = false;
+        if (!expect('\''))
+            is_error = true;
+        if (is_error)
+            syntax_error_node = create<AST::SyntaxError>("Expected a terminating single quote", true);
+
+        record.end = text;
+        record.interpolate = false;
+    } else {
+        syntax_error_node = create<AST::SyntaxError>("Expected a bareword or a single-quoted string literal for heredoc end key", true);
+    }
+
+    auto node = create<AST::Heredoc>(record.end, record.interpolate, record.deindent);
+    if (syntax_error_node)
+        node->set_is_syntax_error(*syntax_error_node);
+    else
+        node->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected heredoc contents for heredoc with end key '{}'", node->end()), true));
+
+    record.node = node;
+    m_heredoc_initiations.append(move(record));
+
+    return node;
+}
+
+bool Parser::parse_heredoc_entries()
+{
+    // Try to parse heredoc entries, as reverse recorded in the initiation records
+    for (auto& record : m_heredoc_initiations) {
+        auto rule_start = push_start();
+        bool found_key = false;
+        if (!record.interpolate) {
+            // Since no interpolation is allowed, just read lines until we hit the key
+            Optional<Offset> last_line_offset;
+            for (;;) {
+                if (at_end())
+                    break;
+                if (peek() == '\n')
+                    consume();
+                last_line_offset = current_position();
+                auto line = consume_while(is_not('\n'));
+                if (peek() == '\n')
+                    consume();
+                if (line.trim_whitespace() == record.end) {
+                    found_key = true;
+                    break;
+                }
+            }
+
+            if (!last_line_offset.has_value())
+                last_line_offset = current_position();
+            // Now just wrap it in a StringLiteral and set it as the node's contents
+            auto node = create<AST::StringLiteral>(m_input.substring_view(rule_start->offset, last_line_offset->offset - rule_start->offset));
+            if (!found_key)
+                node->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected to find the heredoc key '{}', but found Eof", record.end), true));
+            record.node->set_contents(move(node));
+        } else {
+            // Interpolation is allowed, so we're going to read doublequoted string innards
+            // until we find a line that contains the key
+            auto end_condition = move(m_end_condition);
+            found_key = false;
+            set_end_condition([this, end = record.end, &found_key] {
+                if (found_key)
+                    return true;
+                auto offset = current_position();
+                auto cond = move(m_end_condition);
+                ScopeGuard guard {
+                    [&] {
+                        m_end_condition = move(cond);
+                    }
+                };
+                if (peek() == '\n') {
+                    consume();
+                    auto line = consume_while(is_not('\n'));
+                    if (peek() == '\n')
+                        consume();
+                    if (line.trim_whitespace() == end) {
+                        restore_to(offset.offset, offset.line);
+                        found_key = true;
+                        return true;
+                    }
+                }
+                restore_to(offset.offset, offset.line);
+                return false;
+            });
+
+            auto expr = parse_doublequoted_string_inner();
+            set_end_condition(move(end_condition));
+
+            if (found_key) {
+                auto offset = current_position();
+                if (peek() == '\n')
+                    consume();
+                auto line = consume_while(is_not('\n'));
+                if (peek() == '\n')
+                    consume();
+                if (line.trim_whitespace() != record.end)
+                    restore_to(offset.offset, offset.line);
+            }
+
+            if (!expr && found_key) {
+                expr = create<AST::StringLiteral>("");
+            } else if (!expr) {
+                expr = create<AST::SyntaxError>(String::formatted("Expected to find a valid string inside a heredoc (with end key '{}')", record.end), true);
+            } else if (!found_key) {
+                expr->set_is_syntax_error(*create<AST::SyntaxError>(String::formatted("Expected to find the heredoc key '{}'", record.end), true));
+            }
+
+            record.node->set_contents(create<AST::DoubleQuotedString>(move(expr)));
+        }
+    }
+
+    m_heredoc_initiations.clear();
+    return true;
+}
+
 StringView Parser::consume_while(Function<bool(char)> condition)
 {
     if (at_end())

+ 27 - 2
Userland/Shell/Parser.h

@@ -46,6 +46,13 @@ private:
         ShouldReadMoreSequences decision;
     };
 
+    struct HeredocInitiationRecord {
+        String end;
+        RefPtr<AST::Heredoc> node;
+        bool interpolate { false };
+        bool deindent { false };
+    };
+
     constexpr static size_t max_allowed_nested_rule_depth = 2048;
     RefPtr<AST::Node> parse_toplevel();
     SequenceParseResult parse_sequence();
@@ -81,11 +88,19 @@ private:
     RefPtr<AST::Node> parse_brace_expansion();
     RefPtr<AST::Node> parse_brace_expansion_spec();
     RefPtr<AST::Node> parse_immediate_expression();
+    RefPtr<AST::Node> parse_heredoc_initiation_record();
+    bool parse_heredoc_entries();
 
     template<typename A, typename... Args>
     NonnullRefPtr<A> create(Args... args);
 
-    bool at_end() const { return m_input.length() <= m_offset; }
+    void set_end_condition(Function<bool()> condition) { m_end_condition = move(condition); }
+    bool at_end() const
+    {
+        if (m_end_condition && m_end_condition())
+            return true;
+        return m_input.length() <= m_offset;
+    }
     char peek();
     char consume();
     bool expect(char);
@@ -144,6 +159,8 @@ private:
     Vector<size_t> m_rule_start_offsets;
     Vector<AST::Position::Line> m_rule_start_lines;
 
+    Function<bool()> m_end_condition;
+    Vector<HeredocInitiationRecord> m_heredoc_initiations;
     Vector<char> m_extra_chars_not_allowed_in_barewords;
     bool m_is_in_brace_expansion_spec { false };
     bool m_continuation_controls_allowed { false };
@@ -169,7 +186,9 @@ and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence
                       | pipe_sequence
 
 terminator :: ';'
-            | '\n'
+            | '\n' [?!heredoc_stack.is_empty] heredoc_entries
+
+heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries]
 
 variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '*
                 | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '*
@@ -233,6 +252,12 @@ string_composite :: string string_composite?
                   | bareword string_composite?
                   | glob string_composite?
                   | brace_expansion string_composite?
+                  | heredoc_initiator string_composite?    {append to heredoc_entries}
+
+heredoc_initiator :: '<' '<' '-' bareword         {*bareword, interpolate, no deindent}
+                   | '<' '<' '-' "'" [^']* "'"    {*string, no interpolate, no deindent}
+                   | '<' '<' '~' bareword         {*bareword, interpolate, deindent}
+                   | '<' '<' '~' "'" [^']* "'"    {*bareword, no interpolate, deindent}
 
 string :: '"' dquoted_string_inner '"'
         | "'" [^']* "'"