parser.go 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser
  3. import (
  4. "bufio"
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strings"
  10. "unicode"
  11. "github.com/docker/docker/builder/dockerfile/command"
  12. )
  13. // Node is a structure used to represent a parse tree.
  14. //
  15. // In the node there are three fields, Value, Next, and Children. Value is the
  16. // current token's string value. Next is always the next non-child token, and
  17. // children contains all the children. Here's an example:
  18. //
  19. // (value next (child child-next child-next-next) next-next)
  20. //
  21. // This data structure is frankly pretty lousy for handling complex languages,
  22. // but lucky for us the Dockerfile isn't very complicated. This structure
  23. // works a little more effectively than a "proper" parse tree for our needs.
  24. //
  25. type Node struct {
  26. Value string // actual content
  27. Next *Node // the next item in the current sexp
  28. Children []*Node // the children of this sexp
  29. Attributes map[string]bool // special attributes for this node
  30. Original string // original line used before parsing
  31. Flags []string // only top Node should have this set
  32. StartLine int // the line in the original dockerfile where the node begins
  33. EndLine int // the line in the original dockerfile where the node ends
  34. }
  35. var (
  36. dispatch map[string]func(string) (*Node, map[string]bool, error)
  37. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  38. tokenLineContinuation *regexp.Regexp
  39. tokenEscape rune
  40. tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
  41. tokenComment = regexp.MustCompile(`^#.*$`)
  42. lookingForDirectives bool
  43. directiveEscapeSeen bool
  44. )
  45. const defaultTokenEscape = "\\"
  46. // setTokenEscape sets the default token for escaping characters in a Dockerfile.
  47. func setTokenEscape(s string) error {
  48. if s != "`" && s != "\\" {
  49. return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
  50. }
  51. tokenEscape = rune(s[0])
  52. tokenLineContinuation = regexp.MustCompile(`\` + s + `[ \t]*$`)
  53. return nil
  54. }
  55. func init() {
  56. // Dispatch Table. see line_parsers.go for the parse functions.
  57. // The command is parsed and mapped to the line parser. The line parser
  58. // receives the arguments but not the command, and returns an AST after
  59. // reformulating the arguments according to the rules in the parser
  60. // functions. Errors are propagated up by Parse() and the resulting AST can
  61. // be incorporated directly into the existing AST as a next.
  62. dispatch = map[string]func(string) (*Node, map[string]bool, error){
  63. command.Add: parseMaybeJSONToList,
  64. command.Arg: parseNameOrNameVal,
  65. command.Cmd: parseMaybeJSON,
  66. command.Copy: parseMaybeJSONToList,
  67. command.Entrypoint: parseMaybeJSON,
  68. command.Env: parseEnv,
  69. command.Expose: parseStringsWhitespaceDelimited,
  70. command.From: parseString,
  71. command.Healthcheck: parseHealthConfig,
  72. command.Label: parseLabel,
  73. command.Maintainer: parseString,
  74. command.Onbuild: parseSubCommand,
  75. command.Run: parseMaybeJSON,
  76. command.Shell: parseMaybeJSON,
  77. command.StopSignal: parseString,
  78. command.User: parseString,
  79. command.Volume: parseMaybeJSONToList,
  80. command.Workdir: parseString,
  81. }
  82. }
  83. // ParseLine parse a line and return the remainder.
  84. func ParseLine(line string) (string, *Node, error) {
  85. // Handle the parser directive '# escape=<char>. Parser directives must precede
  86. // any builder instruction or other comments, and cannot be repeated.
  87. if lookingForDirectives {
  88. tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
  89. if len(tecMatch) > 0 {
  90. if directiveEscapeSeen == true {
  91. return "", nil, fmt.Errorf("only one escape parser directive can be used")
  92. }
  93. for i, n := range tokenEscapeCommand.SubexpNames() {
  94. if n == "escapechar" {
  95. if err := setTokenEscape(tecMatch[i]); err != nil {
  96. return "", nil, err
  97. }
  98. directiveEscapeSeen = true
  99. return "", nil, nil
  100. }
  101. }
  102. }
  103. }
  104. lookingForDirectives = false
  105. if line = stripComments(line); line == "" {
  106. return "", nil, nil
  107. }
  108. if tokenLineContinuation.MatchString(line) {
  109. line = tokenLineContinuation.ReplaceAllString(line, "")
  110. return line, nil, nil
  111. }
  112. cmd, flags, args, err := splitCommand(line)
  113. if err != nil {
  114. return "", nil, err
  115. }
  116. node := &Node{}
  117. node.Value = cmd
  118. sexp, attrs, err := fullDispatch(cmd, args)
  119. if err != nil {
  120. return "", nil, err
  121. }
  122. node.Next = sexp
  123. node.Attributes = attrs
  124. node.Original = line
  125. node.Flags = flags
  126. return "", node, nil
  127. }
  128. // Parse is the main parse routine.
  129. // It handles an io.ReadWriteCloser and returns the root of the AST.
  130. func Parse(rwc io.Reader) (*Node, error) {
  131. directiveEscapeSeen = false
  132. lookingForDirectives = true
  133. setTokenEscape(defaultTokenEscape) // Assume the default token for escape
  134. currentLine := 0
  135. root := &Node{}
  136. root.StartLine = -1
  137. scanner := bufio.NewScanner(rwc)
  138. utf8bom := []byte{0xEF, 0xBB, 0xBF}
  139. for scanner.Scan() {
  140. scannedBytes := scanner.Bytes()
  141. // We trim UTF8 BOM
  142. if currentLine == 0 {
  143. scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
  144. }
  145. scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
  146. currentLine++
  147. line, child, err := ParseLine(scannedLine)
  148. if err != nil {
  149. return nil, err
  150. }
  151. startLine := currentLine
  152. if line != "" && child == nil {
  153. for scanner.Scan() {
  154. newline := scanner.Text()
  155. currentLine++
  156. if stripComments(strings.TrimSpace(newline)) == "" {
  157. continue
  158. }
  159. line, child, err = ParseLine(line + newline)
  160. if err != nil {
  161. return nil, err
  162. }
  163. if child != nil {
  164. break
  165. }
  166. }
  167. if child == nil && line != "" {
  168. _, child, err = ParseLine(line)
  169. if err != nil {
  170. return nil, err
  171. }
  172. }
  173. }
  174. if child != nil {
  175. // Update the line information for the current child.
  176. child.StartLine = startLine
  177. child.EndLine = currentLine
  178. // Update the line information for the root. The starting line of the root is always the
  179. // starting line of the first child and the ending line is the ending line of the last child.
  180. if root.StartLine < 0 {
  181. root.StartLine = currentLine
  182. }
  183. root.EndLine = currentLine
  184. root.Children = append(root.Children, child)
  185. }
  186. }
  187. return root, nil
  188. }