parser.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser
  3. import (
  4. "bufio"
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "unicode"
  12. "github.com/docker/docker/builder/dockerfile/command"
  13. )
  14. // Node is a structure used to represent a parse tree.
  15. //
  16. // In the node there are three fields, Value, Next, and Children. Value is the
  17. // current token's string value. Next is always the next non-child token, and
  18. // children contains all the children. Here's an example:
  19. //
  20. // (value next (child child-next child-next-next) next-next)
  21. //
  22. // This data structure is frankly pretty lousy for handling complex languages,
  23. // but lucky for us the Dockerfile isn't very complicated. This structure
  24. // works a little more effectively than a "proper" parse tree for our needs.
  25. //
  26. type Node struct {
  27. Value string // actual content
  28. Next *Node // the next item in the current sexp
  29. Children []*Node // the children of this sexp
  30. Attributes map[string]bool // special attributes for this node
  31. Original string // original line used before parsing
  32. Flags []string // only top Node should have this set
  33. StartLine int // the line in the original dockerfile where the node begins
  34. endLine int // the line in the original dockerfile where the node ends
  35. }
  36. // Dump dumps the AST defined by `node` as a list of sexps.
  37. // Returns a string suitable for printing.
  38. func (node *Node) Dump() string {
  39. str := ""
  40. str += node.Value
  41. if len(node.Flags) > 0 {
  42. str += fmt.Sprintf(" %q", node.Flags)
  43. }
  44. for _, n := range node.Children {
  45. str += "(" + n.Dump() + ")\n"
  46. }
  47. for n := node.Next; n != nil; n = n.Next {
  48. if len(n.Children) > 0 {
  49. str += " " + n.Dump()
  50. } else {
  51. str += " " + strconv.Quote(n.Value)
  52. }
  53. }
  54. return strings.TrimSpace(str)
  55. }
  56. var (
  57. dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error)
  58. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  59. tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
  60. tokenComment = regexp.MustCompile(`^#.*$`)
  61. )
  62. // DefaultEscapeToken is the default escape token
  63. const DefaultEscapeToken = '\\'
  64. // Directive is the structure used during a build run to hold the state of
  65. // parsing directives.
  66. type Directive struct {
  67. escapeToken rune // Current escape token
  68. lineContinuationRegex *regexp.Regexp // Current line continuation regex
  69. lookingForDirectives bool // Whether we are currently looking for directives
  70. escapeSeen bool // Whether the escape directive has been seen
  71. }
  72. // setEscapeToken sets the default token for escaping characters in a Dockerfile.
  73. func (d *Directive) setEscapeToken(s string) error {
  74. if s != "`" && s != "\\" {
  75. return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
  76. }
  77. d.escapeToken = rune(s[0])
  78. d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
  79. return nil
  80. }
  81. // NewDefaultDirective returns a new Directive with the default escapeToken token
  82. func NewDefaultDirective() *Directive {
  83. directive := Directive{
  84. escapeSeen: false,
  85. lookingForDirectives: true,
  86. }
  87. directive.setEscapeToken(string(DefaultEscapeToken))
  88. return &directive
  89. }
  90. func init() {
  91. // Dispatch Table. see line_parsers.go for the parse functions.
  92. // The command is parsed and mapped to the line parser. The line parser
  93. // receives the arguments but not the command, and returns an AST after
  94. // reformulating the arguments according to the rules in the parser
  95. // functions. Errors are propagated up by Parse() and the resulting AST can
  96. // be incorporated directly into the existing AST as a next.
  97. dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
  98. command.Add: parseMaybeJSONToList,
  99. command.Arg: parseNameOrNameVal,
  100. command.Cmd: parseMaybeJSON,
  101. command.Copy: parseMaybeJSONToList,
  102. command.Entrypoint: parseMaybeJSON,
  103. command.Env: parseEnv,
  104. command.Expose: parseStringsWhitespaceDelimited,
  105. command.From: parseStringsWhitespaceDelimited,
  106. command.Healthcheck: parseHealthConfig,
  107. command.Label: parseLabel,
  108. command.Maintainer: parseString,
  109. command.Onbuild: parseSubCommand,
  110. command.Run: parseMaybeJSON,
  111. command.Shell: parseMaybeJSON,
  112. command.StopSignal: parseString,
  113. command.User: parseString,
  114. command.Volume: parseMaybeJSONToList,
  115. command.Workdir: parseString,
  116. }
  117. }
  118. // ParseLine parses a line and returns the remainder.
  119. func ParseLine(line string, d *Directive, ignoreCont bool) (string, *Node, error) {
  120. if escapeFound, err := handleParserDirective(line, d); err != nil || escapeFound {
  121. d.escapeSeen = escapeFound
  122. return "", nil, err
  123. }
  124. d.lookingForDirectives = false
  125. if line = stripComments(line); line == "" {
  126. return "", nil, nil
  127. }
  128. if !ignoreCont && d.lineContinuationRegex.MatchString(line) {
  129. line = d.lineContinuationRegex.ReplaceAllString(line, "")
  130. return line, nil, nil
  131. }
  132. node, err := newNodeFromLine(line, d)
  133. return "", node, err
  134. }
  135. // newNodeFromLine splits the line into parts, and dispatches to a function
  136. // based on the command and command arguments. A Node is created from the
  137. // result of the dispatch.
  138. func newNodeFromLine(line string, directive *Directive) (*Node, error) {
  139. cmd, flags, args, err := splitCommand(line)
  140. if err != nil {
  141. return nil, err
  142. }
  143. fn := dispatch[cmd]
  144. // Ignore invalid Dockerfile instructions
  145. if fn == nil {
  146. fn = parseIgnore
  147. }
  148. next, attrs, err := fn(args, directive)
  149. if err != nil {
  150. return nil, err
  151. }
  152. return &Node{
  153. Value: cmd,
  154. Original: line,
  155. Flags: flags,
  156. Next: next,
  157. Attributes: attrs,
  158. }, nil
  159. }
  160. // Handle the parser directive '# escapeToken=<char>. Parser directives must precede
  161. // any builder instruction or other comments, and cannot be repeated.
  162. func handleParserDirective(line string, d *Directive) (bool, error) {
  163. if !d.lookingForDirectives {
  164. return false, nil
  165. }
  166. tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
  167. if len(tecMatch) == 0 {
  168. return false, nil
  169. }
  170. if d.escapeSeen == true {
  171. return false, fmt.Errorf("only one escape parser directive can be used")
  172. }
  173. for i, n := range tokenEscapeCommand.SubexpNames() {
  174. if n == "escapechar" {
  175. if err := d.setEscapeToken(tecMatch[i]); err != nil {
  176. return false, err
  177. }
  178. return true, nil
  179. }
  180. }
  181. return false, nil
  182. }
  183. // Result is the result of parsing a Dockerfile
  184. type Result struct {
  185. AST *Node
  186. EscapeToken rune
  187. }
  188. // Parse reads lines from a Reader, parses the lines into an AST and returns
  189. // the AST and escape token
  190. func Parse(rwc io.Reader) (*Result, error) {
  191. d := NewDefaultDirective()
  192. currentLine := 0
  193. root := &Node{}
  194. root.StartLine = -1
  195. scanner := bufio.NewScanner(rwc)
  196. utf8bom := []byte{0xEF, 0xBB, 0xBF}
  197. for scanner.Scan() {
  198. scannedBytes := scanner.Bytes()
  199. // We trim UTF8 BOM
  200. if currentLine == 0 {
  201. scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
  202. }
  203. scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
  204. currentLine++
  205. line, child, err := ParseLine(scannedLine, d, false)
  206. if err != nil {
  207. return nil, err
  208. }
  209. startLine := currentLine
  210. if line != "" && child == nil {
  211. for scanner.Scan() {
  212. newline := scanner.Text()
  213. currentLine++
  214. if stripComments(strings.TrimSpace(newline)) == "" {
  215. continue
  216. }
  217. line, child, err = ParseLine(line+newline, d, false)
  218. if err != nil {
  219. return nil, err
  220. }
  221. if child != nil {
  222. break
  223. }
  224. }
  225. if child == nil && line != "" {
  226. // When we call ParseLine we'll pass in 'true' for
  227. // the ignoreCont param if we're at the EOF. This will
  228. // prevent the func from returning immediately w/o
  229. // parsing the line thinking that there's more input
  230. // to come.
  231. _, child, err = ParseLine(line, d, scanner.Err() == nil)
  232. if err != nil {
  233. return nil, err
  234. }
  235. }
  236. }
  237. if child != nil {
  238. // Update the line information for the current child.
  239. child.StartLine = startLine
  240. child.endLine = currentLine
  241. // Update the line information for the root. The starting line of the root is always the
  242. // starting line of the first child and the ending line is the ending line of the last child.
  243. if root.StartLine < 0 {
  244. root.StartLine = currentLine
  245. }
  246. root.endLine = currentLine
  247. root.Children = append(root.Children, child)
  248. }
  249. }
  250. return &Result{AST: root, EscapeToken: d.escapeToken}, nil
  251. }
  252. // covers comments and empty lines. Lines should be trimmed before passing to
  253. // this function.
  254. func stripComments(line string) string {
  255. // string is already trimmed at this point
  256. if tokenComment.MatchString(line) {
  257. return tokenComment.ReplaceAllString(line, "")
  258. }
  259. return line
  260. }