parser.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser // import "github.com/docker/docker/builder/dockerfile/parser"
  3. import (
  4. "bufio"
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "unicode"
  12. "github.com/docker/docker/builder/dockerfile/command"
  13. "github.com/pkg/errors"
  14. )
  15. // Node is a structure used to represent a parse tree.
  16. //
  17. // In the node there are three fields, Value, Next, and Children. Value is the
  18. // current token's string value. Next is always the next non-child token, and
  19. // children contains all the children. Here's an example:
  20. //
  21. // (value next (child child-next child-next-next) next-next)
  22. //
  23. // This data structure is frankly pretty lousy for handling complex languages,
  24. // but lucky for us the Dockerfile isn't very complicated. This structure
  25. // works a little more effectively than a "proper" parse tree for our needs.
  26. //
  27. type Node struct {
  28. Value string // actual content
  29. Next *Node // the next item in the current sexp
  30. Children []*Node // the children of this sexp
  31. Attributes map[string]bool // special attributes for this node
  32. Original string // original line used before parsing
  33. Flags []string // only top Node should have this set
  34. StartLine int // the line in the original dockerfile where the node begins
  35. endLine int // the line in the original dockerfile where the node ends
  36. }
  37. // Dump dumps the AST defined by `node` as a list of sexps.
  38. // Returns a string suitable for printing.
  39. func (node *Node) Dump() string {
  40. str := ""
  41. str += node.Value
  42. if len(node.Flags) > 0 {
  43. str += fmt.Sprintf(" %q", node.Flags)
  44. }
  45. for _, n := range node.Children {
  46. str += "(" + n.Dump() + ")\n"
  47. }
  48. for n := node.Next; n != nil; n = n.Next {
  49. if len(n.Children) > 0 {
  50. str += " " + n.Dump()
  51. } else {
  52. str += " " + strconv.Quote(n.Value)
  53. }
  54. }
  55. return strings.TrimSpace(str)
  56. }
  57. func (node *Node) lines(start, end int) {
  58. node.StartLine = start
  59. node.endLine = end
  60. }
  61. // AddChild adds a new child node, and updates line information
  62. func (node *Node) AddChild(child *Node, startLine, endLine int) {
  63. child.lines(startLine, endLine)
  64. if node.StartLine < 0 {
  65. node.StartLine = startLine
  66. }
  67. node.endLine = endLine
  68. node.Children = append(node.Children, child)
  69. }
  70. var (
  71. dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error)
  72. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  73. tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
  74. tokenComment = regexp.MustCompile(`^#.*$`)
  75. )
  76. // DefaultEscapeToken is the default escape token
  77. const DefaultEscapeToken = '\\'
  78. // Directive is the structure used during a build run to hold the state of
  79. // parsing directives.
  80. type Directive struct {
  81. escapeToken rune // Current escape token
  82. lineContinuationRegex *regexp.Regexp // Current line continuation regex
  83. processingComplete bool // Whether we are done looking for directives
  84. escapeSeen bool // Whether the escape directive has been seen
  85. }
  86. // setEscapeToken sets the default token for escaping characters in a Dockerfile.
  87. func (d *Directive) setEscapeToken(s string) error {
  88. if s != "`" && s != "\\" {
  89. return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
  90. }
  91. d.escapeToken = rune(s[0])
  92. d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
  93. return nil
  94. }
  95. // possibleParserDirective looks for parser directives, eg '# escapeToken=<char>'.
  96. // Parser directives must precede any builder instruction or other comments,
  97. // and cannot be repeated.
  98. func (d *Directive) possibleParserDirective(line string) error {
  99. if d.processingComplete {
  100. return nil
  101. }
  102. tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
  103. if len(tecMatch) != 0 {
  104. for i, n := range tokenEscapeCommand.SubexpNames() {
  105. if n == "escapechar" {
  106. if d.escapeSeen {
  107. return errors.New("only one escape parser directive can be used")
  108. }
  109. d.escapeSeen = true
  110. return d.setEscapeToken(tecMatch[i])
  111. }
  112. }
  113. }
  114. d.processingComplete = true
  115. return nil
  116. }
  117. // NewDefaultDirective returns a new Directive with the default escapeToken token
  118. func NewDefaultDirective() *Directive {
  119. directive := Directive{}
  120. directive.setEscapeToken(string(DefaultEscapeToken))
  121. return &directive
  122. }
  123. func init() {
  124. // Dispatch Table. see line_parsers.go for the parse functions.
  125. // The command is parsed and mapped to the line parser. The line parser
  126. // receives the arguments but not the command, and returns an AST after
  127. // reformulating the arguments according to the rules in the parser
  128. // functions. Errors are propagated up by Parse() and the resulting AST can
  129. // be incorporated directly into the existing AST as a next.
  130. dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
  131. command.Add: parseMaybeJSONToList,
  132. command.Arg: parseNameOrNameVal,
  133. command.Cmd: parseMaybeJSON,
  134. command.Copy: parseMaybeJSONToList,
  135. command.Entrypoint: parseMaybeJSON,
  136. command.Env: parseEnv,
  137. command.Expose: parseStringsWhitespaceDelimited,
  138. command.From: parseStringsWhitespaceDelimited,
  139. command.Healthcheck: parseHealthConfig,
  140. command.Label: parseLabel,
  141. command.Maintainer: parseString,
  142. command.Onbuild: parseSubCommand,
  143. command.Run: parseMaybeJSON,
  144. command.Shell: parseMaybeJSON,
  145. command.StopSignal: parseString,
  146. command.User: parseString,
  147. command.Volume: parseMaybeJSONToList,
  148. command.Workdir: parseString,
  149. }
  150. }
  151. // newNodeFromLine splits the line into parts, and dispatches to a function
  152. // based on the command and command arguments. A Node is created from the
  153. // result of the dispatch.
  154. func newNodeFromLine(line string, directive *Directive) (*Node, error) {
  155. cmd, flags, args, err := splitCommand(line)
  156. if err != nil {
  157. return nil, err
  158. }
  159. fn := dispatch[cmd]
  160. // Ignore invalid Dockerfile instructions
  161. if fn == nil {
  162. fn = parseIgnore
  163. }
  164. next, attrs, err := fn(args, directive)
  165. if err != nil {
  166. return nil, err
  167. }
  168. return &Node{
  169. Value: cmd,
  170. Original: line,
  171. Flags: flags,
  172. Next: next,
  173. Attributes: attrs,
  174. }, nil
  175. }
  176. // Result is the result of parsing a Dockerfile
  177. type Result struct {
  178. AST *Node
  179. EscapeToken rune
  180. Warnings []string
  181. }
  182. // PrintWarnings to the writer
  183. func (r *Result) PrintWarnings(out io.Writer) {
  184. if len(r.Warnings) == 0 {
  185. return
  186. }
  187. fmt.Fprintf(out, strings.Join(r.Warnings, "\n")+"\n")
  188. }
  189. // Parse reads lines from a Reader, parses the lines into an AST and returns
  190. // the AST and escape token
  191. func Parse(rwc io.Reader) (*Result, error) {
  192. d := NewDefaultDirective()
  193. currentLine := 0
  194. root := &Node{StartLine: -1}
  195. scanner := bufio.NewScanner(rwc)
  196. warnings := []string{}
  197. var err error
  198. for scanner.Scan() {
  199. bytesRead := scanner.Bytes()
  200. if currentLine == 0 {
  201. // First line, strip the byte-order-marker if present
  202. bytesRead = bytes.TrimPrefix(bytesRead, utf8bom)
  203. }
  204. bytesRead, err = processLine(d, bytesRead, true)
  205. if err != nil {
  206. return nil, err
  207. }
  208. currentLine++
  209. startLine := currentLine
  210. line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d)
  211. if isEndOfLine && line == "" {
  212. continue
  213. }
  214. var hasEmptyContinuationLine bool
  215. for !isEndOfLine && scanner.Scan() {
  216. bytesRead, err := processLine(d, scanner.Bytes(), false)
  217. if err != nil {
  218. return nil, err
  219. }
  220. currentLine++
  221. if isComment(scanner.Bytes()) {
  222. // original line was a comment (processLine strips comments)
  223. continue
  224. }
  225. if isEmptyContinuationLine(bytesRead) {
  226. hasEmptyContinuationLine = true
  227. continue
  228. }
  229. continuationLine := string(bytesRead)
  230. continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d)
  231. line += continuationLine
  232. }
  233. if hasEmptyContinuationLine {
  234. warnings = append(warnings, "[WARNING]: Empty continuation line found in:\n "+line)
  235. }
  236. child, err := newNodeFromLine(line, d)
  237. if err != nil {
  238. return nil, err
  239. }
  240. root.AddChild(child, startLine, currentLine)
  241. }
  242. if len(warnings) > 0 {
  243. warnings = append(warnings, "[WARNING]: Empty continuation lines will become errors in a future release.")
  244. }
  245. return &Result{
  246. AST: root,
  247. Warnings: warnings,
  248. EscapeToken: d.escapeToken,
  249. }, handleScannerError(scanner.Err())
  250. }
  251. func trimComments(src []byte) []byte {
  252. return tokenComment.ReplaceAll(src, []byte{})
  253. }
  254. func trimWhitespace(src []byte) []byte {
  255. return bytes.TrimLeftFunc(src, unicode.IsSpace)
  256. }
  257. func isComment(line []byte) bool {
  258. return tokenComment.Match(trimWhitespace(line))
  259. }
  260. func isEmptyContinuationLine(line []byte) bool {
  261. return len(trimWhitespace(line)) == 0
  262. }
  263. var utf8bom = []byte{0xEF, 0xBB, 0xBF}
  264. func trimContinuationCharacter(line string, d *Directive) (string, bool) {
  265. if d.lineContinuationRegex.MatchString(line) {
  266. line = d.lineContinuationRegex.ReplaceAllString(line, "")
  267. return line, false
  268. }
  269. return line, true
  270. }
  271. // TODO: remove stripLeftWhitespace after deprecation period. It seems silly
  272. // to preserve whitespace on continuation lines. Why is that done?
  273. func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) {
  274. if stripLeftWhitespace {
  275. token = trimWhitespace(token)
  276. }
  277. return trimComments(token), d.possibleParserDirective(string(token))
  278. }
  279. func handleScannerError(err error) error {
  280. switch err {
  281. case bufio.ErrTooLong:
  282. return errors.Errorf("dockerfile line greater than max allowed size of %d", bufio.MaxScanTokenSize-1)
  283. default:
  284. return err
  285. }
  286. }