parser.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser
  3. import (
  4. "bufio"
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strings"
  10. "unicode"
  11. "github.com/docker/docker/builder/dockerfile/command"
  12. )
  13. // Node is a structure used to represent a parse tree.
  14. //
  15. // In the node there are three fields, Value, Next, and Children. Value is the
  16. // current token's string value. Next is always the next non-child token, and
  17. // children contains all the children. Here's an example:
  18. //
  19. // (value next (child child-next child-next-next) next-next)
  20. //
  21. // This data structure is frankly pretty lousy for handling complex languages,
  22. // but lucky for us the Dockerfile isn't very complicated. This structure
  23. // works a little more effectively than a "proper" parse tree for our needs.
  24. //
  25. type Node struct {
  26. Value string // actual content
  27. Next *Node // the next item in the current sexp
  28. Children []*Node // the children of this sexp
  29. Attributes map[string]bool // special attributes for this node
  30. Original string // original line used before parsing
  31. Flags []string // only top Node should have this set
  32. StartLine int // the line in the original dockerfile where the node begins
  33. EndLine int // the line in the original dockerfile where the node ends
  34. }
  35. var (
  36. dispatch map[string]func(string) (*Node, map[string]bool, error)
  37. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  38. tokenLineContinuation *regexp.Regexp
  39. tokenEscape rune
  40. tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
  41. tokenComment = regexp.MustCompile(`^#.*$`)
  42. lookingForDirectives bool
  43. directiveEscapeSeen bool
  44. )
  45. const defaultTokenEscape = "\\"
  46. // setTokenEscape sets the default token for escaping characters in a Dockerfile.
  47. func setTokenEscape(s string) error {
  48. if s != "`" && s != "\\" {
  49. return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
  50. }
  51. tokenEscape = rune(s[0])
  52. tokenLineContinuation = regexp.MustCompile(`\` + s + `[ \t]*$`)
  53. return nil
  54. }
  55. func init() {
  56. // Dispatch Table. see line_parsers.go for the parse functions.
  57. // The command is parsed and mapped to the line parser. The line parser
  58. // receives the arguments but not the command, and returns an AST after
  59. // reformulating the arguments according to the rules in the parser
  60. // functions. Errors are propagated up by Parse() and the resulting AST can
  61. // be incorporated directly into the existing AST as a next.
  62. dispatch = map[string]func(string) (*Node, map[string]bool, error){
  63. command.User: parseString,
  64. command.Onbuild: parseSubCommand,
  65. command.Workdir: parseString,
  66. command.Env: parseEnv,
  67. command.Label: parseLabel,
  68. command.Maintainer: parseString,
  69. command.From: parseString,
  70. command.Add: parseMaybeJSONToList,
  71. command.Copy: parseMaybeJSONToList,
  72. command.Run: parseMaybeJSON,
  73. command.Cmd: parseMaybeJSON,
  74. command.Entrypoint: parseMaybeJSON,
  75. command.Expose: parseStringsWhitespaceDelimited,
  76. command.Volume: parseMaybeJSONToList,
  77. command.StopSignal: parseString,
  78. command.Arg: parseNameOrNameVal,
  79. command.Healthcheck: parseHealthConfig,
  80. }
  81. }
  82. // ParseLine parse a line and return the remainder.
  83. func ParseLine(line string) (string, *Node, error) {
  84. // Handle the parser directive '# escape=<char>. Parser directives must preceed
  85. // any builder instruction or other comments, and cannot be repeated.
  86. if lookingForDirectives {
  87. tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
  88. if len(tecMatch) > 0 {
  89. if directiveEscapeSeen == true {
  90. return "", nil, fmt.Errorf("only one escape parser directive can be used")
  91. }
  92. for i, n := range tokenEscapeCommand.SubexpNames() {
  93. if n == "escapechar" {
  94. if err := setTokenEscape(tecMatch[i]); err != nil {
  95. return "", nil, err
  96. }
  97. directiveEscapeSeen = true
  98. return "", nil, nil
  99. }
  100. }
  101. }
  102. }
  103. lookingForDirectives = false
  104. if line = stripComments(line); line == "" {
  105. return "", nil, nil
  106. }
  107. if tokenLineContinuation.MatchString(line) {
  108. line = tokenLineContinuation.ReplaceAllString(line, "")
  109. return line, nil, nil
  110. }
  111. cmd, flags, args, err := splitCommand(line)
  112. if err != nil {
  113. return "", nil, err
  114. }
  115. node := &Node{}
  116. node.Value = cmd
  117. sexp, attrs, err := fullDispatch(cmd, args)
  118. if err != nil {
  119. return "", nil, err
  120. }
  121. node.Next = sexp
  122. node.Attributes = attrs
  123. node.Original = line
  124. node.Flags = flags
  125. return "", node, nil
  126. }
  127. // Parse is the main parse routine.
  128. // It handles an io.ReadWriteCloser and returns the root of the AST.
  129. func Parse(rwc io.Reader) (*Node, error) {
  130. directiveEscapeSeen = false
  131. lookingForDirectives = true
  132. setTokenEscape(defaultTokenEscape) // Assume the default token for escape
  133. currentLine := 0
  134. root := &Node{}
  135. root.StartLine = -1
  136. scanner := bufio.NewScanner(rwc)
  137. utf8bom := []byte{0xEF, 0xBB, 0xBF}
  138. for scanner.Scan() {
  139. scannedBytes := scanner.Bytes()
  140. // We trim UTF8 BOM
  141. if currentLine == 0 {
  142. scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
  143. }
  144. scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
  145. currentLine++
  146. line, child, err := ParseLine(scannedLine)
  147. if err != nil {
  148. return nil, err
  149. }
  150. startLine := currentLine
  151. if line != "" && child == nil {
  152. for scanner.Scan() {
  153. newline := scanner.Text()
  154. currentLine++
  155. if stripComments(strings.TrimSpace(newline)) == "" {
  156. continue
  157. }
  158. line, child, err = ParseLine(line + newline)
  159. if err != nil {
  160. return nil, err
  161. }
  162. if child != nil {
  163. break
  164. }
  165. }
  166. if child == nil && line != "" {
  167. _, child, err = ParseLine(line)
  168. if err != nil {
  169. return nil, err
  170. }
  171. }
  172. }
  173. if child != nil {
  174. // Update the line information for the current child.
  175. child.StartLine = startLine
  176. child.EndLine = currentLine
  177. // Update the line information for the root. The starting line of the root is always the
  178. // starting line of the first child and the ending line is the ending line of the last child.
  179. if root.StartLine < 0 {
  180. root.StartLine = currentLine
  181. }
  182. root.EndLine = currentLine
  183. root.Children = append(root.Children, child)
  184. }
  185. }
  186. return root, nil
  187. }