123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 |
- package parser
- // line parsers are dispatch calls that parse a single unit of text into a
- // Node object which contains the whole statement. Dockerfiles have varied
- // (but not usually unique, see ONBUILD for a unique example) parsing rules
- // per-command, and these unify the processing in a way that makes it
- // manageable.
- import (
- "encoding/json"
- "errors"
- "fmt"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- var (
- errDockerfileNotStringArray = errors.New("When using JSON array syntax, arrays must be comprised of strings only.")
- )
- // ignore the current argument. This will still leave a command parsed, but
- // will not incorporate the arguments into the ast.
- func parseIgnore(rest string, d *Directive) (*Node, map[string]bool, error) {
- return &Node{}, nil, nil
- }
- // used for onbuild. Could potentially be used for anything that represents a
- // statement with sub-statements.
- //
- // ONBUILD RUN foo bar -> (onbuild (run foo bar))
- //
- func parseSubCommand(rest string, d *Directive) (*Node, map[string]bool, error) {
- if rest == "" {
- return nil, nil, nil
- }
- _, child, err := ParseLine(rest, d)
- if err != nil {
- return nil, nil, err
- }
- return &Node{Children: []*Node{child}}, nil, nil
- }
- // helper to parse words (i.e space delimited or quoted strings) in a statement.
- // The quotes are preserved as part of this function and they are stripped later
- // as part of processWords().
- func parseWords(rest string, d *Directive) []string {
- const (
- inSpaces = iota // looking for start of a word
- inWord
- inQuote
- )
- words := []string{}
- phase := inSpaces
- word := ""
- quote := '\000'
- blankOK := false
- var ch rune
- var chWidth int
- for pos := 0; pos <= len(rest); pos += chWidth {
- if pos != len(rest) {
- ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
- }
- if phase == inSpaces { // Looking for start of word
- if pos == len(rest) { // end of input
- break
- }
- if unicode.IsSpace(ch) { // skip spaces
- continue
- }
- phase = inWord // found it, fall through
- }
- if (phase == inWord || phase == inQuote) && (pos == len(rest)) {
- if blankOK || len(word) > 0 {
- words = append(words, word)
- }
- break
- }
- if phase == inWord {
- if unicode.IsSpace(ch) {
- phase = inSpaces
- if blankOK || len(word) > 0 {
- words = append(words, word)
- }
- word = ""
- blankOK = false
- continue
- }
- if ch == '\'' || ch == '"' {
- quote = ch
- blankOK = true
- phase = inQuote
- }
- if ch == d.EscapeToken {
- if pos+chWidth == len(rest) {
- continue // just skip an escape token at end of line
- }
- // If we're not quoted and we see an escape token, then always just
- // add the escape token plus the char to the word, even if the char
- // is a quote.
- word += string(ch)
- pos += chWidth
- ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
- }
- word += string(ch)
- continue
- }
- if phase == inQuote {
- if ch == quote {
- phase = inWord
- }
- // The escape token is special except for ' quotes - can't escape anything for '
- if ch == d.EscapeToken && quote != '\'' {
- if pos+chWidth == len(rest) {
- phase = inWord
- continue // just skip the escape token at end
- }
- pos += chWidth
- word += string(ch)
- ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
- }
- word += string(ch)
- }
- }
- return words
- }
- // parse environment like statements. Note that this does *not* handle
- // variable interpolation, which will be handled in the evaluator.
- func parseNameVal(rest string, key string, d *Directive) (*Node, map[string]bool, error) {
- // This is kind of tricky because we need to support the old
- // variant: KEY name value
- // as well as the new one: KEY name=value ...
- // The trigger to know which one is being used will be whether we hit
- // a space or = first. space ==> old, "=" ==> new
- words := parseWords(rest, d)
- if len(words) == 0 {
- return nil, nil, nil
- }
- var rootnode *Node
- // Old format (KEY name value)
- if !strings.Contains(words[0], "=") {
- node := &Node{}
- rootnode = node
- strs := tokenWhitespace.Split(rest, 2)
- if len(strs) < 2 {
- return nil, nil, fmt.Errorf(key + " must have two arguments")
- }
- node.Value = strs[0]
- node.Next = &Node{}
- node.Next.Value = strs[1]
- } else {
- var prevNode *Node
- for i, word := range words {
- if !strings.Contains(word, "=") {
- return nil, nil, fmt.Errorf("Syntax error - can't find = in %q. Must be of the form: name=value", word)
- }
- parts := strings.SplitN(word, "=", 2)
- name := &Node{}
- value := &Node{}
- name.Next = value
- name.Value = parts[0]
- value.Value = parts[1]
- if i == 0 {
- rootnode = name
- } else {
- prevNode.Next = name
- }
- prevNode = value
- }
- }
- return rootnode, nil, nil
- }
- func parseEnv(rest string, d *Directive) (*Node, map[string]bool, error) {
- return parseNameVal(rest, "ENV", d)
- }
- func parseLabel(rest string, d *Directive) (*Node, map[string]bool, error) {
- return parseNameVal(rest, "LABEL", d)
- }
- // parses a statement containing one or more keyword definition(s) and/or
- // value assignments, like `name1 name2= name3="" name4=value`.
- // Note that this is a stricter format than the old format of assignment,
- // allowed by parseNameVal(), in a way that this only allows assignment of the
- // form `keyword=[<value>]` like `name2=`, `name3=""`, and `name4=value` above.
- // In addition, a keyword definition alone is of the form `keyword` like `name1`
- // above. And the assignments `name2=` and `name3=""` are equivalent and
- // assign an empty value to the respective keywords.
- func parseNameOrNameVal(rest string, d *Directive) (*Node, map[string]bool, error) {
- words := parseWords(rest, d)
- if len(words) == 0 {
- return nil, nil, nil
- }
- var (
- rootnode *Node
- prevNode *Node
- )
- for i, word := range words {
- node := &Node{}
- node.Value = word
- if i == 0 {
- rootnode = node
- } else {
- prevNode.Next = node
- }
- prevNode = node
- }
- return rootnode, nil, nil
- }
- // parses a whitespace-delimited set of arguments. The result is effectively a
- // linked list of string arguments.
- func parseStringsWhitespaceDelimited(rest string, d *Directive) (*Node, map[string]bool, error) {
- if rest == "" {
- return nil, nil, nil
- }
- node := &Node{}
- rootnode := node
- prevnode := node
- for _, str := range tokenWhitespace.Split(rest, -1) { // use regexp
- prevnode = node
- node.Value = str
- node.Next = &Node{}
- node = node.Next
- }
- // XXX to get around regexp.Split *always* providing an empty string at the
- // end due to how our loop is constructed, nil out the last node in the
- // chain.
- prevnode.Next = nil
- return rootnode, nil, nil
- }
- // parsestring just wraps the string in quotes and returns a working node.
- func parseString(rest string, d *Directive) (*Node, map[string]bool, error) {
- if rest == "" {
- return nil, nil, nil
- }
- n := &Node{}
- n.Value = rest
- return n, nil, nil
- }
- // parseJSON converts JSON arrays to an AST.
- func parseJSON(rest string, d *Directive) (*Node, map[string]bool, error) {
- rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
- if !strings.HasPrefix(rest, "[") {
- return nil, nil, fmt.Errorf(`Error parsing "%s" as a JSON array`, rest)
- }
- var myJSON []interface{}
- if err := json.NewDecoder(strings.NewReader(rest)).Decode(&myJSON); err != nil {
- return nil, nil, err
- }
- var top, prev *Node
- for _, str := range myJSON {
- s, ok := str.(string)
- if !ok {
- return nil, nil, errDockerfileNotStringArray
- }
- node := &Node{Value: s}
- if prev == nil {
- top = node
- } else {
- prev.Next = node
- }
- prev = node
- }
- return top, map[string]bool{"json": true}, nil
- }
- // parseMaybeJSON determines if the argument appears to be a JSON array. If
- // so, passes to parseJSON; if not, quotes the result and returns a single
- // node.
- func parseMaybeJSON(rest string, d *Directive) (*Node, map[string]bool, error) {
- if rest == "" {
- return nil, nil, nil
- }
- node, attrs, err := parseJSON(rest, d)
- if err == nil {
- return node, attrs, nil
- }
- if err == errDockerfileNotStringArray {
- return nil, nil, err
- }
- node = &Node{}
- node.Value = rest
- return node, nil, nil
- }
- // parseMaybeJSONToList determines if the argument appears to be a JSON array. If
- // so, passes to parseJSON; if not, attempts to parse it as a whitespace
- // delimited string.
- func parseMaybeJSONToList(rest string, d *Directive) (*Node, map[string]bool, error) {
- node, attrs, err := parseJSON(rest, d)
- if err == nil {
- return node, attrs, nil
- }
- if err == errDockerfileNotStringArray {
- return nil, nil, err
- }
- return parseStringsWhitespaceDelimited(rest, d)
- }
- // The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
- func parseHealthConfig(rest string, d *Directive) (*Node, map[string]bool, error) {
- // Find end of first argument
- var sep int
- for ; sep < len(rest); sep++ {
- if unicode.IsSpace(rune(rest[sep])) {
- break
- }
- }
- next := sep
- for ; next < len(rest); next++ {
- if !unicode.IsSpace(rune(rest[next])) {
- break
- }
- }
- if sep == 0 {
- return nil, nil, nil
- }
- typ := rest[:sep]
- cmd, attrs, err := parseMaybeJSON(rest[next:], d)
- if err != nil {
- return nil, nil, err
- }
- return &Node{Value: typ, Next: cmd}, attrs, err
- }
|