line_parsers.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. package parser
  2. // line parsers are dispatch calls that parse a single unit of text into a
  3. // Node object which contains the whole statement. Dockerfiles have varied
  4. // (but not usually unique, see ONBUILD for a unique example) parsing rules
  5. // per-command, and these unify the processing in a way that makes it
  6. // manageable.
  7. import (
  8. "encoding/json"
  9. "errors"
  10. "fmt"
  11. "sort"
  12. "strings"
  13. "unicode"
  14. "unicode/utf8"
  15. "github.com/docker/docker/builder/dockerfile/command"
  16. )
  17. var (
  18. errDockerfileNotStringArray = errors.New("when using JSON array syntax, arrays must be comprised of strings only")
  19. )
  20. const (
  21. commandLabel = "LABEL"
  22. )
  23. // ignore the current argument. This will still leave a command parsed, but
  24. // will not incorporate the arguments into the ast.
  25. func parseIgnore(rest string, d *Directive) (*Node, map[string]bool, error) {
  26. return &Node{}, nil, nil
  27. }
  28. // used for onbuild. Could potentially be used for anything that represents a
  29. // statement with sub-statements.
  30. //
  31. // ONBUILD RUN foo bar -> (onbuild (run foo bar))
  32. //
  33. func parseSubCommand(rest string, d *Directive) (*Node, map[string]bool, error) {
  34. if rest == "" {
  35. return nil, nil, nil
  36. }
  37. child, err := newNodeFromLine(rest, d)
  38. if err != nil {
  39. return nil, nil, err
  40. }
  41. return &Node{Children: []*Node{child}}, nil, nil
  42. }
  43. // helper to parse words (i.e space delimited or quoted strings) in a statement.
  44. // The quotes are preserved as part of this function and they are stripped later
  45. // as part of processWords().
  46. func parseWords(rest string, d *Directive) []string {
  47. const (
  48. inSpaces = iota // looking for start of a word
  49. inWord
  50. inQuote
  51. )
  52. words := []string{}
  53. phase := inSpaces
  54. word := ""
  55. quote := '\000'
  56. blankOK := false
  57. var ch rune
  58. var chWidth int
  59. for pos := 0; pos <= len(rest); pos += chWidth {
  60. if pos != len(rest) {
  61. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  62. }
  63. if phase == inSpaces { // Looking for start of word
  64. if pos == len(rest) { // end of input
  65. break
  66. }
  67. if unicode.IsSpace(ch) { // skip spaces
  68. continue
  69. }
  70. phase = inWord // found it, fall through
  71. }
  72. if (phase == inWord || phase == inQuote) && (pos == len(rest)) {
  73. if blankOK || len(word) > 0 {
  74. words = append(words, word)
  75. }
  76. break
  77. }
  78. if phase == inWord {
  79. if unicode.IsSpace(ch) {
  80. phase = inSpaces
  81. if blankOK || len(word) > 0 {
  82. words = append(words, word)
  83. }
  84. word = ""
  85. blankOK = false
  86. continue
  87. }
  88. if ch == '\'' || ch == '"' {
  89. quote = ch
  90. blankOK = true
  91. phase = inQuote
  92. }
  93. if ch == d.escapeToken {
  94. if pos+chWidth == len(rest) {
  95. continue // just skip an escape token at end of line
  96. }
  97. // If we're not quoted and we see an escape token, then always just
  98. // add the escape token plus the char to the word, even if the char
  99. // is a quote.
  100. word += string(ch)
  101. pos += chWidth
  102. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  103. }
  104. word += string(ch)
  105. continue
  106. }
  107. if phase == inQuote {
  108. if ch == quote {
  109. phase = inWord
  110. }
  111. // The escape token is special except for ' quotes - can't escape anything for '
  112. if ch == d.escapeToken && quote != '\'' {
  113. if pos+chWidth == len(rest) {
  114. phase = inWord
  115. continue // just skip the escape token at end
  116. }
  117. pos += chWidth
  118. word += string(ch)
  119. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  120. }
  121. word += string(ch)
  122. }
  123. }
  124. return words
  125. }
  126. // parse environment like statements. Note that this does *not* handle
  127. // variable interpolation, which will be handled in the evaluator.
  128. func parseNameVal(rest string, key string, d *Directive) (*Node, error) {
  129. // This is kind of tricky because we need to support the old
  130. // variant: KEY name value
  131. // as well as the new one: KEY name=value ...
  132. // The trigger to know which one is being used will be whether we hit
  133. // a space or = first. space ==> old, "=" ==> new
  134. words := parseWords(rest, d)
  135. if len(words) == 0 {
  136. return nil, nil
  137. }
  138. // Old format (KEY name value)
  139. if !strings.Contains(words[0], "=") {
  140. parts := tokenWhitespace.Split(rest, 2)
  141. if len(parts) < 2 {
  142. return nil, fmt.Errorf(key + " must have two arguments")
  143. }
  144. return newKeyValueNode(parts[0], parts[1]), nil
  145. }
  146. var rootNode *Node
  147. var prevNode *Node
  148. for _, word := range words {
  149. if !strings.Contains(word, "=") {
  150. return nil, fmt.Errorf("Syntax error - can't find = in %q. Must be of the form: name=value", word)
  151. }
  152. parts := strings.SplitN(word, "=", 2)
  153. node := newKeyValueNode(parts[0], parts[1])
  154. rootNode, prevNode = appendKeyValueNode(node, rootNode, prevNode)
  155. }
  156. return rootNode, nil
  157. }
  158. func newKeyValueNode(key, value string) *Node {
  159. return &Node{
  160. Value: key,
  161. Next: &Node{Value: value},
  162. }
  163. }
  164. func appendKeyValueNode(node, rootNode, prevNode *Node) (*Node, *Node) {
  165. if rootNode == nil {
  166. rootNode = node
  167. }
  168. if prevNode != nil {
  169. prevNode.Next = node
  170. }
  171. prevNode = node.Next
  172. return rootNode, prevNode
  173. }
  174. func parseEnv(rest string, d *Directive) (*Node, map[string]bool, error) {
  175. node, err := parseNameVal(rest, "ENV", d)
  176. return node, nil, err
  177. }
  178. func parseLabel(rest string, d *Directive) (*Node, map[string]bool, error) {
  179. node, err := parseNameVal(rest, commandLabel, d)
  180. return node, nil, err
  181. }
  182. // NodeFromLabels returns a Node for the injected labels
  183. func NodeFromLabels(labels map[string]string) *Node {
  184. keys := []string{}
  185. for key := range labels {
  186. keys = append(keys, key)
  187. }
  188. // Sort the label to have a repeatable order
  189. sort.Strings(keys)
  190. labelPairs := []string{}
  191. var rootNode *Node
  192. var prevNode *Node
  193. for _, key := range keys {
  194. value := labels[key]
  195. labelPairs = append(labelPairs, fmt.Sprintf("%q='%s'", key, value))
  196. // Value must be single quoted to prevent env variable expansion
  197. // See https://github.com/docker/docker/issues/26027
  198. node := newKeyValueNode(key, "'"+value+"'")
  199. rootNode, prevNode = appendKeyValueNode(node, rootNode, prevNode)
  200. }
  201. return &Node{
  202. Value: command.Label,
  203. Original: commandLabel + " " + strings.Join(labelPairs, " "),
  204. Next: rootNode,
  205. }
  206. }
  207. // parses a statement containing one or more keyword definition(s) and/or
  208. // value assignments, like `name1 name2= name3="" name4=value`.
  209. // Note that this is a stricter format than the old format of assignment,
  210. // allowed by parseNameVal(), in a way that this only allows assignment of the
  211. // form `keyword=[<value>]` like `name2=`, `name3=""`, and `name4=value` above.
  212. // In addition, a keyword definition alone is of the form `keyword` like `name1`
  213. // above. And the assignments `name2=` and `name3=""` are equivalent and
  214. // assign an empty value to the respective keywords.
  215. func parseNameOrNameVal(rest string, d *Directive) (*Node, map[string]bool, error) {
  216. words := parseWords(rest, d)
  217. if len(words) == 0 {
  218. return nil, nil, nil
  219. }
  220. var (
  221. rootnode *Node
  222. prevNode *Node
  223. )
  224. for i, word := range words {
  225. node := &Node{}
  226. node.Value = word
  227. if i == 0 {
  228. rootnode = node
  229. } else {
  230. prevNode.Next = node
  231. }
  232. prevNode = node
  233. }
  234. return rootnode, nil, nil
  235. }
  236. // parses a whitespace-delimited set of arguments. The result is effectively a
  237. // linked list of string arguments.
  238. func parseStringsWhitespaceDelimited(rest string, d *Directive) (*Node, map[string]bool, error) {
  239. if rest == "" {
  240. return nil, nil, nil
  241. }
  242. node := &Node{}
  243. rootnode := node
  244. prevnode := node
  245. for _, str := range tokenWhitespace.Split(rest, -1) { // use regexp
  246. prevnode = node
  247. node.Value = str
  248. node.Next = &Node{}
  249. node = node.Next
  250. }
  251. // XXX to get around regexp.Split *always* providing an empty string at the
  252. // end due to how our loop is constructed, nil out the last node in the
  253. // chain.
  254. prevnode.Next = nil
  255. return rootnode, nil, nil
  256. }
  257. // parseString just wraps the string in quotes and returns a working node.
  258. func parseString(rest string, d *Directive) (*Node, map[string]bool, error) {
  259. if rest == "" {
  260. return nil, nil, nil
  261. }
  262. n := &Node{}
  263. n.Value = rest
  264. return n, nil, nil
  265. }
  266. // parseJSON converts JSON arrays to an AST.
  267. func parseJSON(rest string, d *Directive) (*Node, map[string]bool, error) {
  268. rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
  269. if !strings.HasPrefix(rest, "[") {
  270. return nil, nil, fmt.Errorf(`Error parsing "%s" as a JSON array`, rest)
  271. }
  272. var myJSON []interface{}
  273. if err := json.NewDecoder(strings.NewReader(rest)).Decode(&myJSON); err != nil {
  274. return nil, nil, err
  275. }
  276. var top, prev *Node
  277. for _, str := range myJSON {
  278. s, ok := str.(string)
  279. if !ok {
  280. return nil, nil, errDockerfileNotStringArray
  281. }
  282. node := &Node{Value: s}
  283. if prev == nil {
  284. top = node
  285. } else {
  286. prev.Next = node
  287. }
  288. prev = node
  289. }
  290. return top, map[string]bool{"json": true}, nil
  291. }
  292. // parseMaybeJSON determines if the argument appears to be a JSON array. If
  293. // so, passes to parseJSON; if not, quotes the result and returns a single
  294. // node.
  295. func parseMaybeJSON(rest string, d *Directive) (*Node, map[string]bool, error) {
  296. if rest == "" {
  297. return nil, nil, nil
  298. }
  299. node, attrs, err := parseJSON(rest, d)
  300. if err == nil {
  301. return node, attrs, nil
  302. }
  303. if err == errDockerfileNotStringArray {
  304. return nil, nil, err
  305. }
  306. node = &Node{}
  307. node.Value = rest
  308. return node, nil, nil
  309. }
  310. // parseMaybeJSONToList determines if the argument appears to be a JSON array. If
  311. // so, passes to parseJSON; if not, attempts to parse it as a whitespace
  312. // delimited string.
  313. func parseMaybeJSONToList(rest string, d *Directive) (*Node, map[string]bool, error) {
  314. node, attrs, err := parseJSON(rest, d)
  315. if err == nil {
  316. return node, attrs, nil
  317. }
  318. if err == errDockerfileNotStringArray {
  319. return nil, nil, err
  320. }
  321. return parseStringsWhitespaceDelimited(rest, d)
  322. }
  323. // The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
  324. func parseHealthConfig(rest string, d *Directive) (*Node, map[string]bool, error) {
  325. // Find end of first argument
  326. var sep int
  327. for ; sep < len(rest); sep++ {
  328. if unicode.IsSpace(rune(rest[sep])) {
  329. break
  330. }
  331. }
  332. next := sep
  333. for ; next < len(rest); next++ {
  334. if !unicode.IsSpace(rune(rest[next])) {
  335. break
  336. }
  337. }
  338. if sep == 0 {
  339. return nil, nil, nil
  340. }
  341. typ := rest[:sep]
  342. cmd, attrs, err := parseMaybeJSON(rest[next:], d)
  343. if err != nil {
  344. return nil, nil, err
  345. }
  346. return &Node{Value: typ, Next: cmd}, attrs, err
  347. }