parser.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser
  3. import (
  4. "bufio"
  5. "fmt"
  6. "io"
  7. "regexp"
  8. "strings"
  9. "unicode"
  10. "github.com/docker/docker/builder/dockerfile/command"
  11. )
  12. // Node is a structure used to represent a parse tree.
  13. //
  14. // In the node there are three fields, Value, Next, and Children. Value is the
  15. // current token's string value. Next is always the next non-child token, and
  16. // children contains all the children. Here's an example:
  17. //
  18. // (value next (child child-next child-next-next) next-next)
  19. //
  20. // This data structure is frankly pretty lousy for handling complex languages,
  21. // but lucky for us the Dockerfile isn't very complicated. This structure
  22. // works a little more effectively than a "proper" parse tree for our needs.
  23. //
  24. type Node struct {
  25. Value string // actual content
  26. Next *Node // the next item in the current sexp
  27. Children []*Node // the children of this sexp
  28. Attributes map[string]bool // special attributes for this node
  29. Original string // original line used before parsing
  30. Flags []string // only top Node should have this set
  31. StartLine int // the line in the original dockerfile where the node begins
  32. EndLine int // the line in the original dockerfile where the node ends
  33. }
  34. var (
  35. dispatch map[string]func(string) (*Node, map[string]bool, error)
  36. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  37. tokenLineContinuation *regexp.Regexp
  38. tokenEscape rune
  39. tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
  40. tokenComment = regexp.MustCompile(`^#.*$`)
  41. lookingForDirectives bool
  42. directiveEscapeSeen bool
  43. )
  44. const defaultTokenEscape = "\\"
  45. // setTokenEscape sets the default token for escaping characters in a Dockerfile.
  46. func setTokenEscape(s string) error {
  47. if s != "`" && s != "\\" {
  48. return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
  49. }
  50. tokenEscape = rune(s[0])
  51. tokenLineContinuation = regexp.MustCompile(`\` + s + `[ \t]*$`)
  52. return nil
  53. }
  54. func init() {
  55. // Dispatch Table. see line_parsers.go for the parse functions.
  56. // The command is parsed and mapped to the line parser. The line parser
  57. // receives the arguments but not the command, and returns an AST after
  58. // reformulating the arguments according to the rules in the parser
  59. // functions. Errors are propagated up by Parse() and the resulting AST can
  60. // be incorporated directly into the existing AST as a next.
  61. dispatch = map[string]func(string) (*Node, map[string]bool, error){
  62. command.User: parseString,
  63. command.Onbuild: parseSubCommand,
  64. command.Workdir: parseString,
  65. command.Env: parseEnv,
  66. command.Label: parseLabel,
  67. command.Maintainer: parseString,
  68. command.From: parseString,
  69. command.Add: parseMaybeJSONToList,
  70. command.Copy: parseMaybeJSONToList,
  71. command.Run: parseMaybeJSON,
  72. command.Cmd: parseMaybeJSON,
  73. command.Entrypoint: parseMaybeJSON,
  74. command.Expose: parseStringsWhitespaceDelimited,
  75. command.Volume: parseMaybeJSONToList,
  76. command.StopSignal: parseString,
  77. command.Arg: parseNameOrNameVal,
  78. command.Healthcheck: parseHealthConfig,
  79. }
  80. }
  81. // ParseLine parse a line and return the remainder.
  82. func ParseLine(line string) (string, *Node, error) {
  83. // Handle the parser directive '# escape=<char>. Parser directives must preceed
  84. // any builder instruction or other comments, and cannot be repeated.
  85. if lookingForDirectives {
  86. tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
  87. if len(tecMatch) > 0 {
  88. if directiveEscapeSeen == true {
  89. return "", nil, fmt.Errorf("only one escape parser directive can be used")
  90. }
  91. for i, n := range tokenEscapeCommand.SubexpNames() {
  92. if n == "escapechar" {
  93. if err := setTokenEscape(tecMatch[i]); err != nil {
  94. return "", nil, err
  95. }
  96. directiveEscapeSeen = true
  97. return "", nil, nil
  98. }
  99. }
  100. }
  101. }
  102. lookingForDirectives = false
  103. if line = stripComments(line); line == "" {
  104. return "", nil, nil
  105. }
  106. if tokenLineContinuation.MatchString(line) {
  107. line = tokenLineContinuation.ReplaceAllString(line, "")
  108. return line, nil, nil
  109. }
  110. cmd, flags, args, err := splitCommand(line)
  111. if err != nil {
  112. return "", nil, err
  113. }
  114. node := &Node{}
  115. node.Value = cmd
  116. sexp, attrs, err := fullDispatch(cmd, args)
  117. if err != nil {
  118. return "", nil, err
  119. }
  120. node.Next = sexp
  121. node.Attributes = attrs
  122. node.Original = line
  123. node.Flags = flags
  124. return "", node, nil
  125. }
  126. // Parse is the main parse routine.
  127. // It handles an io.ReadWriteCloser and returns the root of the AST.
  128. func Parse(rwc io.Reader) (*Node, error) {
  129. directiveEscapeSeen = false
  130. lookingForDirectives = true
  131. setTokenEscape(defaultTokenEscape) // Assume the default token for escape
  132. currentLine := 0
  133. root := &Node{}
  134. root.StartLine = -1
  135. scanner := bufio.NewScanner(rwc)
  136. for scanner.Scan() {
  137. scannedLine := strings.TrimLeftFunc(scanner.Text(), unicode.IsSpace)
  138. currentLine++
  139. line, child, err := ParseLine(scannedLine)
  140. if err != nil {
  141. return nil, err
  142. }
  143. startLine := currentLine
  144. if line != "" && child == nil {
  145. for scanner.Scan() {
  146. newline := scanner.Text()
  147. currentLine++
  148. if stripComments(strings.TrimSpace(newline)) == "" {
  149. continue
  150. }
  151. line, child, err = ParseLine(line + newline)
  152. if err != nil {
  153. return nil, err
  154. }
  155. if child != nil {
  156. break
  157. }
  158. }
  159. if child == nil && line != "" {
  160. _, child, err = ParseLine(line)
  161. if err != nil {
  162. return nil, err
  163. }
  164. }
  165. }
  166. if child != nil {
  167. // Update the line information for the current child.
  168. child.StartLine = startLine
  169. child.EndLine = currentLine
  170. // Update the line information for the root. The starting line of the root is always the
  171. // starting line of the first child and the ending line is the ending line of the last child.
  172. if root.StartLine < 0 {
  173. root.StartLine = currentLine
  174. }
  175. root.EndLine = currentLine
  176. root.Children = append(root.Children, child)
  177. }
  178. }
  179. return root, nil
  180. }