parser.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser
  3. import (
  4. "bufio"
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strings"
  10. "unicode"
  11. "github.com/docker/docker/builder/dockerfile/command"
  12. )
  13. // Node is a structure used to represent a parse tree.
  14. //
  15. // In the node there are three fields, Value, Next, and Children. Value is the
  16. // current token's string value. Next is always the next non-child token, and
  17. // children contains all the children. Here's an example:
  18. //
  19. // (value next (child child-next child-next-next) next-next)
  20. //
  21. // This data structure is frankly pretty lousy for handling complex languages,
  22. // but lucky for us the Dockerfile isn't very complicated. This structure
  23. // works a little more effectively than a "proper" parse tree for our needs.
  24. //
  25. type Node struct {
  26. Value string // actual content
  27. Next *Node // the next item in the current sexp
  28. Children []*Node // the children of this sexp
  29. Attributes map[string]bool // special attributes for this node
  30. Original string // original line used before parsing
  31. Flags []string // only top Node should have this set
  32. StartLine int // the line in the original dockerfile where the node begins
  33. EndLine int // the line in the original dockerfile where the node ends
  34. }
  35. // Directive is the structure used during a build run to hold the state of
  36. // parsing directives.
  37. type Directive struct {
  38. EscapeToken rune // Current escape token
  39. LineContinuationRegex *regexp.Regexp // Current line contination regex
  40. LookingForDirectives bool // Whether we are currently looking for directives
  41. EscapeSeen bool // Whether the escape directive has been seen
  42. }
  43. var (
  44. dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error)
  45. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  46. tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
  47. tokenComment = regexp.MustCompile(`^#.*$`)
  48. )
  49. // DefaultEscapeToken is the default escape token
  50. const DefaultEscapeToken = "\\"
  51. // SetEscapeToken sets the default token for escaping characters in a Dockerfile.
  52. func SetEscapeToken(s string, d *Directive) error {
  53. if s != "`" && s != "\\" {
  54. return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
  55. }
  56. d.EscapeToken = rune(s[0])
  57. d.LineContinuationRegex = regexp.MustCompile(`\` + s + `$`)
  58. return nil
  59. }
  60. func init() {
  61. // Dispatch Table. see line_parsers.go for the parse functions.
  62. // The command is parsed and mapped to the line parser. The line parser
  63. // receives the arguments but not the command, and returns an AST after
  64. // reformulating the arguments according to the rules in the parser
  65. // functions. Errors are propagated up by Parse() and the resulting AST can
  66. // be incorporated directly into the existing AST as a next.
  67. dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
  68. command.Add: parseMaybeJSONToList,
  69. command.Arg: parseNameOrNameVal,
  70. command.Cmd: parseMaybeJSON,
  71. command.Copy: parseMaybeJSONToList,
  72. command.Entrypoint: parseMaybeJSON,
  73. command.Env: parseEnv,
  74. command.Expose: parseStringsWhitespaceDelimited,
  75. command.From: parseString,
  76. command.Healthcheck: parseHealthConfig,
  77. command.Label: parseLabel,
  78. command.Maintainer: parseString,
  79. command.Onbuild: parseSubCommand,
  80. command.Run: parseMaybeJSON,
  81. command.Shell: parseMaybeJSON,
  82. command.StopSignal: parseString,
  83. command.User: parseString,
  84. command.Volume: parseMaybeJSONToList,
  85. command.Workdir: parseString,
  86. }
  87. }
  88. // ParseLine parses a line and returns the remainder.
  89. func ParseLine(line string, d *Directive, ignoreCont bool) (string, *Node, error) {
  90. // Handle the parser directive '# escape=<char>. Parser directives must precede
  91. // any builder instruction or other comments, and cannot be repeated.
  92. if d.LookingForDirectives {
  93. tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
  94. if len(tecMatch) > 0 {
  95. if d.EscapeSeen == true {
  96. return "", nil, fmt.Errorf("only one escape parser directive can be used")
  97. }
  98. for i, n := range tokenEscapeCommand.SubexpNames() {
  99. if n == "escapechar" {
  100. if err := SetEscapeToken(tecMatch[i], d); err != nil {
  101. return "", nil, err
  102. }
  103. d.EscapeSeen = true
  104. return "", nil, nil
  105. }
  106. }
  107. }
  108. }
  109. d.LookingForDirectives = false
  110. if line = stripComments(line); line == "" {
  111. return "", nil, nil
  112. }
  113. if !ignoreCont && d.LineContinuationRegex.MatchString(line) {
  114. line = d.LineContinuationRegex.ReplaceAllString(line, "")
  115. return line, nil, nil
  116. }
  117. cmd, flags, args, err := splitCommand(line)
  118. if err != nil {
  119. return "", nil, err
  120. }
  121. node := &Node{}
  122. node.Value = cmd
  123. sexp, attrs, err := fullDispatch(cmd, args, d)
  124. if err != nil {
  125. return "", nil, err
  126. }
  127. node.Next = sexp
  128. node.Attributes = attrs
  129. node.Original = line
  130. node.Flags = flags
  131. return "", node, nil
  132. }
  133. // Parse is the main parse routine.
  134. // It handles an io.ReadWriteCloser and returns the root of the AST.
  135. func Parse(rwc io.Reader, d *Directive) (*Node, error) {
  136. currentLine := 0
  137. root := &Node{}
  138. root.StartLine = -1
  139. scanner := bufio.NewScanner(rwc)
  140. utf8bom := []byte{0xEF, 0xBB, 0xBF}
  141. for scanner.Scan() {
  142. scannedBytes := scanner.Bytes()
  143. // We trim UTF8 BOM
  144. if currentLine == 0 {
  145. scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
  146. }
  147. scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
  148. currentLine++
  149. line, child, err := ParseLine(scannedLine, d, false)
  150. if err != nil {
  151. return nil, err
  152. }
  153. startLine := currentLine
  154. if line != "" && child == nil {
  155. for scanner.Scan() {
  156. newline := scanner.Text()
  157. currentLine++
  158. if stripComments(strings.TrimSpace(newline)) == "" {
  159. continue
  160. }
  161. line, child, err = ParseLine(line+newline, d, false)
  162. if err != nil {
  163. return nil, err
  164. }
  165. if child != nil {
  166. break
  167. }
  168. }
  169. if child == nil && line != "" {
  170. // When we call ParseLine we'll pass in 'true' for
  171. // the ignoreCont param if we're at the EOF. This will
  172. // prevent the func from returning immediately w/o
  173. // parsing the line thinking that there's more input
  174. // to come.
  175. _, child, err = ParseLine(line, d, scanner.Err() == nil)
  176. if err != nil {
  177. return nil, err
  178. }
  179. }
  180. }
  181. if child != nil {
  182. // Update the line information for the current child.
  183. child.StartLine = startLine
  184. child.EndLine = currentLine
  185. // Update the line information for the root. The starting line of the root is always the
  186. // starting line of the first child and the ending line is the ending line of the last child.
  187. if root.StartLine < 0 {
  188. root.StartLine = currentLine
  189. }
  190. root.EndLine = currentLine
  191. root.Children = append(root.Children, child)
  192. }
  193. }
  194. return root, nil
  195. }