parser.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser
  3. import (
  4. "bufio"
  5. "io"
  6. "regexp"
  7. "strings"
  8. "unicode"
  9. "github.com/docker/docker/builder/dockerfile/command"
  10. )
  11. // Node is a structure used to represent a parse tree.
  12. //
  13. // In the node there are three fields, Value, Next, and Children. Value is the
  14. // current token's string value. Next is always the next non-child token, and
  15. // children contains all the children. Here's an example:
  16. //
  17. // (value next (child child-next child-next-next) next-next)
  18. //
  19. // This data structure is frankly pretty lousy for handling complex languages,
  20. // but lucky for us the Dockerfile isn't very complicated. This structure
  21. // works a little more effectively than a "proper" parse tree for our needs.
  22. //
  23. type Node struct {
  24. Value string // actual content
  25. Next *Node // the next item in the current sexp
  26. Children []*Node // the children of this sexp
  27. Attributes map[string]bool // special attributes for this node
  28. Original string // original line used before parsing
  29. Flags []string // only top Node should have this set
  30. StartLine int // the line in the original dockerfile where the node begins
  31. EndLine int // the line in the original dockerfile where the node ends
  32. }
  33. var (
  34. dispatch map[string]func(string) (*Node, map[string]bool, error)
  35. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  36. tokenLineContinuation = regexp.MustCompile(`\\[ \t]*$`)
  37. tokenComment = regexp.MustCompile(`^#.*$`)
  38. )
  39. func init() {
  40. // Dispatch Table. see line_parsers.go for the parse functions.
  41. // The command is parsed and mapped to the line parser. The line parser
  42. // receives the arguments but not the command, and returns an AST after
  43. // reformulating the arguments according to the rules in the parser
  44. // functions. Errors are propagated up by Parse() and the resulting AST can
  45. // be incorporated directly into the existing AST as a next.
  46. dispatch = map[string]func(string) (*Node, map[string]bool, error){
  47. command.User: parseString,
  48. command.Onbuild: parseSubCommand,
  49. command.Workdir: parseString,
  50. command.Env: parseEnv,
  51. command.Label: parseLabel,
  52. command.Maintainer: parseString,
  53. command.From: parseString,
  54. command.Add: parseMaybeJSONToList,
  55. command.Copy: parseMaybeJSONToList,
  56. command.Run: parseMaybeJSON,
  57. command.Cmd: parseMaybeJSON,
  58. command.Entrypoint: parseMaybeJSON,
  59. command.Expose: parseStringsWhitespaceDelimited,
  60. command.Volume: parseMaybeJSONToList,
  61. command.StopSignal: parseString,
  62. command.Arg: parseNameOrNameVal,
  63. }
  64. }
  65. // parse a line and return the remainder.
  66. func parseLine(line string) (string, *Node, error) {
  67. if line = stripComments(line); line == "" {
  68. return "", nil, nil
  69. }
  70. if tokenLineContinuation.MatchString(line) {
  71. line = tokenLineContinuation.ReplaceAllString(line, "")
  72. return line, nil, nil
  73. }
  74. cmd, flags, args, err := splitCommand(line)
  75. if err != nil {
  76. return "", nil, err
  77. }
  78. node := &Node{}
  79. node.Value = cmd
  80. sexp, attrs, err := fullDispatch(cmd, args)
  81. if err != nil {
  82. return "", nil, err
  83. }
  84. node.Next = sexp
  85. node.Attributes = attrs
  86. node.Original = line
  87. node.Flags = flags
  88. return "", node, nil
  89. }
  90. // Parse is the main parse routine.
  91. // It handles an io.ReadWriteCloser and returns the root of the AST.
  92. func Parse(rwc io.Reader) (*Node, error) {
  93. currentLine := 0
  94. root := &Node{}
  95. root.StartLine = -1
  96. scanner := bufio.NewScanner(rwc)
  97. for scanner.Scan() {
  98. scannedLine := strings.TrimLeftFunc(scanner.Text(), unicode.IsSpace)
  99. currentLine++
  100. line, child, err := parseLine(scannedLine)
  101. if err != nil {
  102. return nil, err
  103. }
  104. startLine := currentLine
  105. if line != "" && child == nil {
  106. for scanner.Scan() {
  107. newline := scanner.Text()
  108. currentLine++
  109. if stripComments(strings.TrimSpace(newline)) == "" {
  110. continue
  111. }
  112. line, child, err = parseLine(line + newline)
  113. if err != nil {
  114. return nil, err
  115. }
  116. if child != nil {
  117. break
  118. }
  119. }
  120. if child == nil && line != "" {
  121. _, child, err = parseLine(line)
  122. if err != nil {
  123. return nil, err
  124. }
  125. }
  126. }
  127. if child != nil {
  128. // Update the line information for the current child.
  129. child.StartLine = startLine
  130. child.EndLine = currentLine
  131. // Update the line information for the root. The starting line of the root is always the
  132. // starting line of the first child and the ending line is the ending line of the last child.
  133. if root.StartLine < 0 {
  134. root.StartLine = currentLine
  135. }
  136. root.EndLine = currentLine
  137. root.Children = append(root.Children, child)
  138. }
  139. }
  140. return root, nil
  141. }