parser.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. // Package parser implements a parser and parse tree dumper for Dockerfiles.
  2. package parser
  3. import (
  4. "bufio"
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "runtime"
  10. "strconv"
  11. "strings"
  12. "unicode"
  13. "github.com/docker/docker/builder/dockerfile/command"
  14. "github.com/docker/docker/pkg/system"
  15. "github.com/pkg/errors"
  16. )
  17. // Node is a structure used to represent a parse tree.
  18. //
  19. // In the node there are three fields, Value, Next, and Children. Value is the
  20. // current token's string value. Next is always the next non-child token, and
  21. // children contains all the children. Here's an example:
  22. //
  23. // (value next (child child-next child-next-next) next-next)
  24. //
  25. // This data structure is frankly pretty lousy for handling complex languages,
  26. // but lucky for us the Dockerfile isn't very complicated. This structure
  27. // works a little more effectively than a "proper" parse tree for our needs.
  28. //
  29. type Node struct {
  30. Value string // actual content
  31. Next *Node // the next item in the current sexp
  32. Children []*Node // the children of this sexp
  33. Attributes map[string]bool // special attributes for this node
  34. Original string // original line used before parsing
  35. Flags []string // only top Node should have this set
  36. StartLine int // the line in the original dockerfile where the node begins
  37. endLine int // the line in the original dockerfile where the node ends
  38. }
  39. // Dump dumps the AST defined by `node` as a list of sexps.
  40. // Returns a string suitable for printing.
  41. func (node *Node) Dump() string {
  42. str := ""
  43. str += node.Value
  44. if len(node.Flags) > 0 {
  45. str += fmt.Sprintf(" %q", node.Flags)
  46. }
  47. for _, n := range node.Children {
  48. str += "(" + n.Dump() + ")\n"
  49. }
  50. for n := node.Next; n != nil; n = n.Next {
  51. if len(n.Children) > 0 {
  52. str += " " + n.Dump()
  53. } else {
  54. str += " " + strconv.Quote(n.Value)
  55. }
  56. }
  57. return strings.TrimSpace(str)
  58. }
  59. func (node *Node) lines(start, end int) {
  60. node.StartLine = start
  61. node.endLine = end
  62. }
  63. // AddChild adds a new child node, and updates line information
  64. func (node *Node) AddChild(child *Node, startLine, endLine int) {
  65. child.lines(startLine, endLine)
  66. if node.StartLine < 0 {
  67. node.StartLine = startLine
  68. }
  69. node.endLine = endLine
  70. node.Children = append(node.Children, child)
  71. }
  72. var (
  73. dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error)
  74. tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
  75. tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
  76. tokenPlatformCommand = regexp.MustCompile(`^#[ \t]*platform[ \t]*=[ \t]*(?P<platform>.*)$`)
  77. tokenComment = regexp.MustCompile(`^#.*$`)
  78. )
  79. // DefaultEscapeToken is the default escape token
  80. const DefaultEscapeToken = '\\'
  81. // Directive is the structure used during a build run to hold the state of
  82. // parsing directives.
  83. type Directive struct {
  84. escapeToken rune // Current escape token
  85. platformToken string // Current platform token
  86. lineContinuationRegex *regexp.Regexp // Current line continuation regex
  87. processingComplete bool // Whether we are done looking for directives
  88. escapeSeen bool // Whether the escape directive has been seen
  89. platformSeen bool // Whether the platform directive has been seen
  90. }
  91. // setEscapeToken sets the default token for escaping characters in a Dockerfile.
  92. func (d *Directive) setEscapeToken(s string) error {
  93. if s != "`" && s != "\\" {
  94. return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
  95. }
  96. d.escapeToken = rune(s[0])
  97. d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
  98. return nil
  99. }
  100. // setPlatformToken sets the default platform for pulling images in a Dockerfile.
  101. func (d *Directive) setPlatformToken(s string) error {
  102. s = strings.ToLower(s)
  103. valid := []string{runtime.GOOS}
  104. if system.LCOWSupported() {
  105. valid = append(valid, "linux")
  106. }
  107. for _, item := range valid {
  108. if s == item {
  109. d.platformToken = s
  110. return nil
  111. }
  112. }
  113. return fmt.Errorf("invalid PLATFORM '%s'. Must be one of %v", s, valid)
  114. }
  115. // possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and
  116. // '# platform=<string>'. Parser directives must precede any builder instruction
  117. // or other comments, and cannot be repeated.
  118. func (d *Directive) possibleParserDirective(line string) error {
  119. if d.processingComplete {
  120. return nil
  121. }
  122. tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
  123. if len(tecMatch) != 0 {
  124. for i, n := range tokenEscapeCommand.SubexpNames() {
  125. if n == "escapechar" {
  126. if d.escapeSeen {
  127. return errors.New("only one escape parser directive can be used")
  128. }
  129. d.escapeSeen = true
  130. return d.setEscapeToken(tecMatch[i])
  131. }
  132. }
  133. }
  134. // Only recognise a platform token if LCOW is supported
  135. if system.LCOWSupported() {
  136. tpcMatch := tokenPlatformCommand.FindStringSubmatch(strings.ToLower(line))
  137. if len(tpcMatch) != 0 {
  138. for i, n := range tokenPlatformCommand.SubexpNames() {
  139. if n == "platform" {
  140. if d.platformSeen {
  141. return errors.New("only one platform parser directive can be used")
  142. }
  143. d.platformSeen = true
  144. return d.setPlatformToken(tpcMatch[i])
  145. }
  146. }
  147. }
  148. }
  149. d.processingComplete = true
  150. return nil
  151. }
  152. // NewDefaultDirective returns a new Directive with the default escapeToken token
  153. func NewDefaultDirective() *Directive {
  154. directive := Directive{}
  155. directive.setEscapeToken(string(DefaultEscapeToken))
  156. return &directive
  157. }
  158. func init() {
  159. // Dispatch Table. see line_parsers.go for the parse functions.
  160. // The command is parsed and mapped to the line parser. The line parser
  161. // receives the arguments but not the command, and returns an AST after
  162. // reformulating the arguments according to the rules in the parser
  163. // functions. Errors are propagated up by Parse() and the resulting AST can
  164. // be incorporated directly into the existing AST as a next.
  165. dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
  166. command.Add: parseMaybeJSONToList,
  167. command.Arg: parseNameOrNameVal,
  168. command.Cmd: parseMaybeJSON,
  169. command.Copy: parseMaybeJSONToList,
  170. command.Entrypoint: parseMaybeJSON,
  171. command.Env: parseEnv,
  172. command.Expose: parseStringsWhitespaceDelimited,
  173. command.From: parseStringsWhitespaceDelimited,
  174. command.Healthcheck: parseHealthConfig,
  175. command.Label: parseLabel,
  176. command.Maintainer: parseString,
  177. command.Onbuild: parseSubCommand,
  178. command.Run: parseMaybeJSON,
  179. command.Shell: parseMaybeJSON,
  180. command.StopSignal: parseString,
  181. command.User: parseString,
  182. command.Volume: parseMaybeJSONToList,
  183. command.Workdir: parseString,
  184. }
  185. }
  186. // newNodeFromLine splits the line into parts, and dispatches to a function
  187. // based on the command and command arguments. A Node is created from the
  188. // result of the dispatch.
  189. func newNodeFromLine(line string, directive *Directive) (*Node, error) {
  190. cmd, flags, args, err := splitCommand(line)
  191. if err != nil {
  192. return nil, err
  193. }
  194. fn := dispatch[cmd]
  195. // Ignore invalid Dockerfile instructions
  196. if fn == nil {
  197. fn = parseIgnore
  198. }
  199. next, attrs, err := fn(args, directive)
  200. if err != nil {
  201. return nil, err
  202. }
  203. return &Node{
  204. Value: cmd,
  205. Original: line,
  206. Flags: flags,
  207. Next: next,
  208. Attributes: attrs,
  209. }, nil
  210. }
  211. // Result is the result of parsing a Dockerfile
  212. type Result struct {
  213. AST *Node
  214. EscapeToken rune
  215. // TODO @jhowardmsft - see https://github.com/moby/moby/issues/34617
  216. // This next field will be removed in a future update for LCOW support.
  217. OS string
  218. Warnings []string
  219. }
  220. // PrintWarnings to the writer
  221. func (r *Result) PrintWarnings(out io.Writer) {
  222. if len(r.Warnings) == 0 {
  223. return
  224. }
  225. fmt.Fprintf(out, strings.Join(r.Warnings, "\n")+"\n")
  226. }
  227. // Parse reads lines from a Reader, parses the lines into an AST and returns
  228. // the AST and escape token
  229. func Parse(rwc io.Reader) (*Result, error) {
  230. d := NewDefaultDirective()
  231. currentLine := 0
  232. root := &Node{StartLine: -1}
  233. scanner := bufio.NewScanner(rwc)
  234. warnings := []string{}
  235. var err error
  236. for scanner.Scan() {
  237. bytesRead := scanner.Bytes()
  238. if currentLine == 0 {
  239. // First line, strip the byte-order-marker if present
  240. bytesRead = bytes.TrimPrefix(bytesRead, utf8bom)
  241. }
  242. bytesRead, err = processLine(d, bytesRead, true)
  243. if err != nil {
  244. return nil, err
  245. }
  246. currentLine++
  247. startLine := currentLine
  248. line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d)
  249. if isEndOfLine && line == "" {
  250. continue
  251. }
  252. var hasEmptyContinuationLine bool
  253. for !isEndOfLine && scanner.Scan() {
  254. bytesRead, err := processLine(d, scanner.Bytes(), false)
  255. if err != nil {
  256. return nil, err
  257. }
  258. currentLine++
  259. if isComment(scanner.Bytes()) {
  260. // original line was a comment (processLine strips comments)
  261. continue
  262. }
  263. if isEmptyContinuationLine(bytesRead) {
  264. hasEmptyContinuationLine = true
  265. continue
  266. }
  267. continuationLine := string(bytesRead)
  268. continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d)
  269. line += continuationLine
  270. }
  271. if hasEmptyContinuationLine {
  272. warning := "[WARNING]: Empty continuation line found in:\n " + line
  273. warnings = append(warnings, warning)
  274. }
  275. child, err := newNodeFromLine(line, d)
  276. if err != nil {
  277. return nil, err
  278. }
  279. root.AddChild(child, startLine, currentLine)
  280. }
  281. if len(warnings) > 0 {
  282. warnings = append(warnings, "[WARNING]: Empty continuation lines will become errors in a future release.")
  283. }
  284. return &Result{
  285. AST: root,
  286. Warnings: warnings,
  287. EscapeToken: d.escapeToken,
  288. OS: d.platformToken,
  289. }, handleScannerError(scanner.Err())
  290. }
  291. func trimComments(src []byte) []byte {
  292. return tokenComment.ReplaceAll(src, []byte{})
  293. }
  294. func trimWhitespace(src []byte) []byte {
  295. return bytes.TrimLeftFunc(src, unicode.IsSpace)
  296. }
  297. func isComment(line []byte) bool {
  298. return tokenComment.Match(trimWhitespace(line))
  299. }
  300. func isEmptyContinuationLine(line []byte) bool {
  301. return len(trimWhitespace(line)) == 0
  302. }
  303. var utf8bom = []byte{0xEF, 0xBB, 0xBF}
  304. func trimContinuationCharacter(line string, d *Directive) (string, bool) {
  305. if d.lineContinuationRegex.MatchString(line) {
  306. line = d.lineContinuationRegex.ReplaceAllString(line, "")
  307. return line, false
  308. }
  309. return line, true
  310. }
  311. // TODO: remove stripLeftWhitespace after deprecation period. It seems silly
  312. // to preserve whitespace on continuation lines. Why is that done?
  313. func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) {
  314. if stripLeftWhitespace {
  315. token = trimWhitespace(token)
  316. }
  317. return trimComments(token), d.possibleParserDirective(string(token))
  318. }
  319. func handleScannerError(err error) error {
  320. switch err {
  321. case bufio.ErrTooLong:
  322. return errors.Errorf("dockerfile line greater than max allowed size of %d", bufio.MaxScanTokenSize-1)
  323. default:
  324. return err
  325. }
  326. }