parser.go 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser
  3. import (
  4. "bufio"
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "unicode"
  12. "github.com/docker/docker/builder/dockerfile/command"
  13. "github.com/pkg/errors"
  14. )
  15. // Node is a structure used to represent a parse tree.
  16. //
  17. // In the node there are three fields, Value, Next, and Children. Value is the
  18. // current token's string value. Next is always the next non-child token, and
  19. // children contains all the children. Here's an example:
  20. //
  21. // (value next (child child-next child-next-next) next-next)
  22. //
  23. // This data structure is frankly pretty lousy for handling complex languages,
  24. // but lucky for us the Dockerfile isn't very complicated. This structure
  25. // works a little more effectively than a "proper" parse tree for our needs.
  26. //
  27. type Node struct {
  28. Value string // actual content
  29. Next *Node // the next item in the current sexp
  30. Children []*Node // the children of this sexp
  31. Attributes map[string]bool // special attributes for this node
  32. Original string // original line used before parsing
  33. Flags []string // only top Node should have this set
  34. StartLine int // the line in the original dockerfile where the node begins
  35. endLine int // the line in the original dockerfile where the node ends
  36. }
  37. // Dump dumps the AST defined by `node` as a list of sexps.
  38. // Returns a string suitable for printing.
  39. func (node *Node) Dump() string {
  40. str := ""
  41. str += node.Value
  42. if len(node.Flags) > 0 {
  43. str += fmt.Sprintf(" %q", node.Flags)
  44. }
  45. for _, n := range node.Children {
  46. str += "(" + n.Dump() + ")\n"
  47. }
  48. for n := node.Next; n != nil; n = n.Next {
  49. if len(n.Children) > 0 {
  50. str += " " + n.Dump()
  51. } else {
  52. str += " " + strconv.Quote(n.Value)
  53. }
  54. }
  55. return strings.TrimSpace(str)
  56. }
  57. var (
  58. dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error)
  59. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  60. tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
  61. tokenComment = regexp.MustCompile(`^#.*$`)
  62. )
  63. // DefaultEscapeToken is the default escape token
  64. const DefaultEscapeToken = '\\'
  65. // Directive is the structure used during a build run to hold the state of
  66. // parsing directives.
  67. type Directive struct {
  68. escapeToken rune // Current escape token
  69. lineContinuationRegex *regexp.Regexp // Current line continuation regex
  70. processingComplete bool // Whether we are done looking for directives
  71. escapeSeen bool // Whether the escape directive has been seen
  72. }
  73. // setEscapeToken sets the default token for escaping characters in a Dockerfile.
  74. func (d *Directive) setEscapeToken(s string) error {
  75. if s != "`" && s != "\\" {
  76. return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
  77. }
  78. d.escapeToken = rune(s[0])
  79. d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
  80. return nil
  81. }
  82. // processLine looks for a parser directive '# escapeToken=<char>. Parser
  83. // directives must precede any builder instruction or other comments, and cannot
  84. // be repeated.
  85. func (d *Directive) processLine(line string) error {
  86. if d.processingComplete {
  87. return nil
  88. }
  89. // Processing is finished after the first call
  90. defer func() { d.processingComplete = true }()
  91. tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
  92. if len(tecMatch) == 0 {
  93. return nil
  94. }
  95. if d.escapeSeen == true {
  96. return errors.New("only one escape parser directive can be used")
  97. }
  98. for i, n := range tokenEscapeCommand.SubexpNames() {
  99. if n == "escapechar" {
  100. d.escapeSeen = true
  101. return d.setEscapeToken(tecMatch[i])
  102. }
  103. }
  104. return nil
  105. }
  106. // NewDefaultDirective returns a new Directive with the default escapeToken token
  107. func NewDefaultDirective() *Directive {
  108. directive := Directive{}
  109. directive.setEscapeToken(string(DefaultEscapeToken))
  110. return &directive
  111. }
  112. func init() {
  113. // Dispatch Table. see line_parsers.go for the parse functions.
  114. // The command is parsed and mapped to the line parser. The line parser
  115. // receives the arguments but not the command, and returns an AST after
  116. // reformulating the arguments according to the rules in the parser
  117. // functions. Errors are propagated up by Parse() and the resulting AST can
  118. // be incorporated directly into the existing AST as a next.
  119. dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
  120. command.Add: parseMaybeJSONToList,
  121. command.Arg: parseNameOrNameVal,
  122. command.Cmd: parseMaybeJSON,
  123. command.Copy: parseMaybeJSONToList,
  124. command.Entrypoint: parseMaybeJSON,
  125. command.Env: parseEnv,
  126. command.Expose: parseStringsWhitespaceDelimited,
  127. command.From: parseStringsWhitespaceDelimited,
  128. command.Healthcheck: parseHealthConfig,
  129. command.Label: parseLabel,
  130. command.Maintainer: parseString,
  131. command.Onbuild: parseSubCommand,
  132. command.Run: parseMaybeJSON,
  133. command.Shell: parseMaybeJSON,
  134. command.StopSignal: parseString,
  135. command.User: parseString,
  136. command.Volume: parseMaybeJSONToList,
  137. command.Workdir: parseString,
  138. }
  139. }
  140. // ParseLine parses a line and returns the remainder.
  141. func ParseLine(line string, d *Directive, ignoreCont bool) (string, *Node, error) {
  142. if err := d.processLine(line); err != nil {
  143. return "", nil, err
  144. }
  145. if line = stripComments(line); line == "" {
  146. return "", nil, nil
  147. }
  148. if !ignoreCont && d.lineContinuationRegex.MatchString(line) {
  149. line = d.lineContinuationRegex.ReplaceAllString(line, "")
  150. return line, nil, nil
  151. }
  152. node, err := newNodeFromLine(line, d)
  153. return "", node, err
  154. }
  155. // newNodeFromLine splits the line into parts, and dispatches to a function
  156. // based on the command and command arguments. A Node is created from the
  157. // result of the dispatch.
  158. func newNodeFromLine(line string, directive *Directive) (*Node, error) {
  159. cmd, flags, args, err := splitCommand(line)
  160. if err != nil {
  161. return nil, err
  162. }
  163. fn := dispatch[cmd]
  164. // Ignore invalid Dockerfile instructions
  165. if fn == nil {
  166. fn = parseIgnore
  167. }
  168. next, attrs, err := fn(args, directive)
  169. if err != nil {
  170. return nil, err
  171. }
  172. return &Node{
  173. Value: cmd,
  174. Original: line,
  175. Flags: flags,
  176. Next: next,
  177. Attributes: attrs,
  178. }, nil
  179. }
  180. // Result is the result of parsing a Dockerfile
  181. type Result struct {
  182. AST *Node
  183. EscapeToken rune
  184. }
  185. // scanLines is a split function for bufio.Scanner. that augments the default
  186. // line scanner by supporting newline escapes.
  187. func scanLines(data []byte, atEOF bool) (int, []byte, error) {
  188. advance, token, err := bufio.ScanLines(data, atEOF)
  189. return advance, token, err
  190. }
  191. // Parse reads lines from a Reader, parses the lines into an AST and returns
  192. // the AST and escape token
  193. func Parse(rwc io.Reader) (*Result, error) {
  194. d := NewDefaultDirective()
  195. currentLine := 0
  196. root := &Node{StartLine: -1}
  197. scanner := bufio.NewScanner(rwc)
  198. scanner.Split(scanLines)
  199. utf8bom := []byte{0xEF, 0xBB, 0xBF}
  200. for scanner.Scan() {
  201. scannedBytes := scanner.Bytes()
  202. // We trim UTF8 BOM
  203. if currentLine == 0 {
  204. scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
  205. }
  206. scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
  207. currentLine++
  208. line, child, err := ParseLine(scannedLine, d, false)
  209. if err != nil {
  210. return nil, err
  211. }
  212. startLine := currentLine
  213. if line != "" && child == nil {
  214. for scanner.Scan() {
  215. newline := scanner.Text()
  216. currentLine++
  217. if stripComments(strings.TrimSpace(newline)) == "" {
  218. continue
  219. }
  220. line, child, err = ParseLine(line+newline, d, false)
  221. if err != nil {
  222. return nil, err
  223. }
  224. if child != nil {
  225. break
  226. }
  227. }
  228. if child == nil && line != "" {
  229. // When we call ParseLine we'll pass in 'true' for
  230. // the ignoreCont param if we're at the EOF. This will
  231. // prevent the func from returning immediately w/o
  232. // parsing the line thinking that there's more input
  233. // to come.
  234. _, child, err = ParseLine(line, d, scanner.Err() == nil)
  235. if err != nil {
  236. return nil, err
  237. }
  238. }
  239. }
  240. if child != nil {
  241. // Update the line information for the current child.
  242. child.StartLine = startLine
  243. child.endLine = currentLine
  244. // Update the line information for the root. The starting line of the root is always the
  245. // starting line of the first child and the ending line is the ending line of the last child.
  246. if root.StartLine < 0 {
  247. root.StartLine = currentLine
  248. }
  249. root.endLine = currentLine
  250. root.Children = append(root.Children, child)
  251. }
  252. }
  253. return &Result{AST: root, EscapeToken: d.escapeToken}, nil
  254. }
  255. // covers comments and empty lines. Lines should be trimmed before passing to
  256. // this function.
  257. func stripComments(line string) string {
  258. // string is already trimmed at this point
  259. if tokenComment.MatchString(line) {
  260. return tokenComment.ReplaceAllString(line, "")
  261. }
  262. return line
  263. }