line_parsers.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. package parser // import "github.com/docker/docker/builder/dockerfile/parser"
  2. // line parsers are dispatch calls that parse a single unit of text into a
  3. // Node object which contains the whole statement. Dockerfiles have varied
  4. // (but not usually unique, see ONBUILD for a unique example) parsing rules
  5. // per-command, and these unify the processing in a way that makes it
  6. // manageable.
  7. import (
  8. "encoding/json"
  9. "errors"
  10. "fmt"
  11. "strings"
  12. "unicode"
  13. "unicode/utf8"
  14. )
  15. var (
  16. errDockerfileNotStringArray = errors.New("when using JSON array syntax, arrays must be comprised of strings only")
  17. )
  18. const (
  19. commandLabel = "LABEL"
  20. )
  21. // ignore the current argument. This will still leave a command parsed, but
  22. // will not incorporate the arguments into the ast.
  23. func parseIgnore(rest string, d *Directive) (*Node, map[string]bool, error) {
  24. return &Node{}, nil, nil
  25. }
  26. // used for onbuild. Could potentially be used for anything that represents a
  27. // statement with sub-statements.
  28. //
  29. // ONBUILD RUN foo bar -> (onbuild (run foo bar))
  30. //
  31. func parseSubCommand(rest string, d *Directive) (*Node, map[string]bool, error) {
  32. if rest == "" {
  33. return nil, nil, nil
  34. }
  35. child, err := newNodeFromLine(rest, d)
  36. if err != nil {
  37. return nil, nil, err
  38. }
  39. return &Node{Children: []*Node{child}}, nil, nil
  40. }
  41. // helper to parse words (i.e space delimited or quoted strings) in a statement.
  42. // The quotes are preserved as part of this function and they are stripped later
  43. // as part of processWords().
  44. func parseWords(rest string, d *Directive) []string {
  45. const (
  46. inSpaces = iota // looking for start of a word
  47. inWord
  48. inQuote
  49. )
  50. words := []string{}
  51. phase := inSpaces
  52. word := ""
  53. quote := '\000'
  54. blankOK := false
  55. var ch rune
  56. var chWidth int
  57. for pos := 0; pos <= len(rest); pos += chWidth {
  58. if pos != len(rest) {
  59. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  60. }
  61. if phase == inSpaces { // Looking for start of word
  62. if pos == len(rest) { // end of input
  63. break
  64. }
  65. if unicode.IsSpace(ch) { // skip spaces
  66. continue
  67. }
  68. phase = inWord // found it, fall through
  69. }
  70. if (phase == inWord || phase == inQuote) && (pos == len(rest)) {
  71. if blankOK || len(word) > 0 {
  72. words = append(words, word)
  73. }
  74. break
  75. }
  76. if phase == inWord {
  77. if unicode.IsSpace(ch) {
  78. phase = inSpaces
  79. if blankOK || len(word) > 0 {
  80. words = append(words, word)
  81. }
  82. word = ""
  83. blankOK = false
  84. continue
  85. }
  86. if ch == '\'' || ch == '"' {
  87. quote = ch
  88. blankOK = true
  89. phase = inQuote
  90. }
  91. if ch == d.escapeToken {
  92. if pos+chWidth == len(rest) {
  93. continue // just skip an escape token at end of line
  94. }
  95. // If we're not quoted and we see an escape token, then always just
  96. // add the escape token plus the char to the word, even if the char
  97. // is a quote.
  98. word += string(ch)
  99. pos += chWidth
  100. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  101. }
  102. word += string(ch)
  103. continue
  104. }
  105. if phase == inQuote {
  106. if ch == quote {
  107. phase = inWord
  108. }
  109. // The escape token is special except for ' quotes - can't escape anything for '
  110. if ch == d.escapeToken && quote != '\'' {
  111. if pos+chWidth == len(rest) {
  112. phase = inWord
  113. continue // just skip the escape token at end
  114. }
  115. pos += chWidth
  116. word += string(ch)
  117. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  118. }
  119. word += string(ch)
  120. }
  121. }
  122. return words
  123. }
  124. // parse environment like statements. Note that this does *not* handle
  125. // variable interpolation, which will be handled in the evaluator.
  126. func parseNameVal(rest string, key string, d *Directive) (*Node, error) {
  127. // This is kind of tricky because we need to support the old
  128. // variant: KEY name value
  129. // as well as the new one: KEY name=value ...
  130. // The trigger to know which one is being used will be whether we hit
  131. // a space or = first. space ==> old, "=" ==> new
  132. words := parseWords(rest, d)
  133. if len(words) == 0 {
  134. return nil, nil
  135. }
  136. // Old format (KEY name value)
  137. if !strings.Contains(words[0], "=") {
  138. parts := tokenWhitespace.Split(rest, 2)
  139. if len(parts) < 2 {
  140. return nil, fmt.Errorf(key + " must have two arguments")
  141. }
  142. return newKeyValueNode(parts[0], parts[1]), nil
  143. }
  144. var rootNode *Node
  145. var prevNode *Node
  146. for _, word := range words {
  147. if !strings.Contains(word, "=") {
  148. return nil, fmt.Errorf("Syntax error - can't find = in %q. Must be of the form: name=value", word)
  149. }
  150. parts := strings.SplitN(word, "=", 2)
  151. node := newKeyValueNode(parts[0], parts[1])
  152. rootNode, prevNode = appendKeyValueNode(node, rootNode, prevNode)
  153. }
  154. return rootNode, nil
  155. }
  156. func newKeyValueNode(key, value string) *Node {
  157. return &Node{
  158. Value: key,
  159. Next: &Node{Value: value},
  160. }
  161. }
  162. func appendKeyValueNode(node, rootNode, prevNode *Node) (*Node, *Node) {
  163. if rootNode == nil {
  164. rootNode = node
  165. }
  166. if prevNode != nil {
  167. prevNode.Next = node
  168. }
  169. prevNode = node.Next
  170. return rootNode, prevNode
  171. }
  172. func parseEnv(rest string, d *Directive) (*Node, map[string]bool, error) {
  173. node, err := parseNameVal(rest, "ENV", d)
  174. return node, nil, err
  175. }
  176. func parseLabel(rest string, d *Directive) (*Node, map[string]bool, error) {
  177. node, err := parseNameVal(rest, commandLabel, d)
  178. return node, nil, err
  179. }
  180. // parses a statement containing one or more keyword definition(s) and/or
  181. // value assignments, like `name1 name2= name3="" name4=value`.
  182. // Note that this is a stricter format than the old format of assignment,
  183. // allowed by parseNameVal(), in a way that this only allows assignment of the
  184. // form `keyword=[<value>]` like `name2=`, `name3=""`, and `name4=value` above.
  185. // In addition, a keyword definition alone is of the form `keyword` like `name1`
  186. // above. And the assignments `name2=` and `name3=""` are equivalent and
  187. // assign an empty value to the respective keywords.
  188. func parseNameOrNameVal(rest string, d *Directive) (*Node, map[string]bool, error) {
  189. words := parseWords(rest, d)
  190. if len(words) == 0 {
  191. return nil, nil, nil
  192. }
  193. var (
  194. rootnode *Node
  195. prevNode *Node
  196. )
  197. for i, word := range words {
  198. node := &Node{}
  199. node.Value = word
  200. if i == 0 {
  201. rootnode = node
  202. } else {
  203. prevNode.Next = node
  204. }
  205. prevNode = node
  206. }
  207. return rootnode, nil, nil
  208. }
  209. // parses a whitespace-delimited set of arguments. The result is effectively a
  210. // linked list of string arguments.
  211. func parseStringsWhitespaceDelimited(rest string, d *Directive) (*Node, map[string]bool, error) {
  212. if rest == "" {
  213. return nil, nil, nil
  214. }
  215. node := &Node{}
  216. rootnode := node
  217. prevnode := node
  218. for _, str := range tokenWhitespace.Split(rest, -1) { // use regexp
  219. prevnode = node
  220. node.Value = str
  221. node.Next = &Node{}
  222. node = node.Next
  223. }
  224. // XXX to get around regexp.Split *always* providing an empty string at the
  225. // end due to how our loop is constructed, nil out the last node in the
  226. // chain.
  227. prevnode.Next = nil
  228. return rootnode, nil, nil
  229. }
  230. // parseString just wraps the string in quotes and returns a working node.
  231. func parseString(rest string, d *Directive) (*Node, map[string]bool, error) {
  232. if rest == "" {
  233. return nil, nil, nil
  234. }
  235. n := &Node{}
  236. n.Value = rest
  237. return n, nil, nil
  238. }
  239. // parseJSON converts JSON arrays to an AST.
  240. func parseJSON(rest string, d *Directive) (*Node, map[string]bool, error) {
  241. rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
  242. if !strings.HasPrefix(rest, "[") {
  243. return nil, nil, fmt.Errorf(`Error parsing "%s" as a JSON array`, rest)
  244. }
  245. var myJSON []interface{}
  246. if err := json.NewDecoder(strings.NewReader(rest)).Decode(&myJSON); err != nil {
  247. return nil, nil, err
  248. }
  249. var top, prev *Node
  250. for _, str := range myJSON {
  251. s, ok := str.(string)
  252. if !ok {
  253. return nil, nil, errDockerfileNotStringArray
  254. }
  255. node := &Node{Value: s}
  256. if prev == nil {
  257. top = node
  258. } else {
  259. prev.Next = node
  260. }
  261. prev = node
  262. }
  263. return top, map[string]bool{"json": true}, nil
  264. }
  265. // parseMaybeJSON determines if the argument appears to be a JSON array. If
  266. // so, passes to parseJSON; if not, quotes the result and returns a single
  267. // node.
  268. func parseMaybeJSON(rest string, d *Directive) (*Node, map[string]bool, error) {
  269. if rest == "" {
  270. return nil, nil, nil
  271. }
  272. node, attrs, err := parseJSON(rest, d)
  273. if err == nil {
  274. return node, attrs, nil
  275. }
  276. if err == errDockerfileNotStringArray {
  277. return nil, nil, err
  278. }
  279. node = &Node{}
  280. node.Value = rest
  281. return node, nil, nil
  282. }
  283. // parseMaybeJSONToList determines if the argument appears to be a JSON array. If
  284. // so, passes to parseJSON; if not, attempts to parse it as a whitespace
  285. // delimited string.
  286. func parseMaybeJSONToList(rest string, d *Directive) (*Node, map[string]bool, error) {
  287. node, attrs, err := parseJSON(rest, d)
  288. if err == nil {
  289. return node, attrs, nil
  290. }
  291. if err == errDockerfileNotStringArray {
  292. return nil, nil, err
  293. }
  294. return parseStringsWhitespaceDelimited(rest, d)
  295. }
  296. // The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
  297. func parseHealthConfig(rest string, d *Directive) (*Node, map[string]bool, error) {
  298. // Find end of first argument
  299. var sep int
  300. for ; sep < len(rest); sep++ {
  301. if unicode.IsSpace(rune(rest[sep])) {
  302. break
  303. }
  304. }
  305. next := sep
  306. for ; next < len(rest); next++ {
  307. if !unicode.IsSpace(rune(rest[next])) {
  308. break
  309. }
  310. }
  311. if sep == 0 {
  312. return nil, nil, nil
  313. }
  314. typ := rest[:sep]
  315. cmd, attrs, err := parseMaybeJSON(rest[next:], d)
  316. if err != nil {
  317. return nil, nil, err
  318. }
  319. return &Node{Value: typ, Next: cmd}, attrs, err
  320. }