line_parsers.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. package parser
  2. // line parsers are dispatch calls that parse a single unit of text into a
  3. // Node object which contains the whole statement. Dockerfiles have varied
  4. // (but not usually unique, see ONBUILD for a unique example) parsing rules
  5. // per-command, and these unify the processing in a way that makes it
  6. // manageable.
  7. import (
  8. "encoding/json"
  9. "errors"
  10. "fmt"
  11. "strings"
  12. "unicode"
  13. "unicode/utf8"
  14. )
  15. var (
  16. errDockerfileNotStringArray = errors.New("When using JSON array syntax, arrays must be comprised of strings only.")
  17. )
  18. // ignore the current argument. This will still leave a command parsed, but
  19. // will not incorporate the arguments into the ast.
  20. func parseIgnore(rest string, d *Directive) (*Node, map[string]bool, error) {
  21. return &Node{}, nil, nil
  22. }
  23. // used for onbuild. Could potentially be used for anything that represents a
  24. // statement with sub-statements.
  25. //
  26. // ONBUILD RUN foo bar -> (onbuild (run foo bar))
  27. //
  28. func parseSubCommand(rest string, d *Directive) (*Node, map[string]bool, error) {
  29. if rest == "" {
  30. return nil, nil, nil
  31. }
  32. _, child, err := ParseLine(rest, d)
  33. if err != nil {
  34. return nil, nil, err
  35. }
  36. return &Node{Children: []*Node{child}}, nil, nil
  37. }
  38. // helper to parse words (i.e space delimited or quoted strings) in a statement.
  39. // The quotes are preserved as part of this function and they are stripped later
  40. // as part of processWords().
  41. func parseWords(rest string, d *Directive) []string {
  42. const (
  43. inSpaces = iota // looking for start of a word
  44. inWord
  45. inQuote
  46. )
  47. words := []string{}
  48. phase := inSpaces
  49. word := ""
  50. quote := '\000'
  51. blankOK := false
  52. var ch rune
  53. var chWidth int
  54. for pos := 0; pos <= len(rest); pos += chWidth {
  55. if pos != len(rest) {
  56. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  57. }
  58. if phase == inSpaces { // Looking for start of word
  59. if pos == len(rest) { // end of input
  60. break
  61. }
  62. if unicode.IsSpace(ch) { // skip spaces
  63. continue
  64. }
  65. phase = inWord // found it, fall through
  66. }
  67. if (phase == inWord || phase == inQuote) && (pos == len(rest)) {
  68. if blankOK || len(word) > 0 {
  69. words = append(words, word)
  70. }
  71. break
  72. }
  73. if phase == inWord {
  74. if unicode.IsSpace(ch) {
  75. phase = inSpaces
  76. if blankOK || len(word) > 0 {
  77. words = append(words, word)
  78. }
  79. word = ""
  80. blankOK = false
  81. continue
  82. }
  83. if ch == '\'' || ch == '"' {
  84. quote = ch
  85. blankOK = true
  86. phase = inQuote
  87. }
  88. if ch == d.EscapeToken {
  89. if pos+chWidth == len(rest) {
  90. continue // just skip an escape token at end of line
  91. }
  92. // If we're not quoted and we see an escape token, then always just
  93. // add the escape token plus the char to the word, even if the char
  94. // is a quote.
  95. word += string(ch)
  96. pos += chWidth
  97. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  98. }
  99. word += string(ch)
  100. continue
  101. }
  102. if phase == inQuote {
  103. if ch == quote {
  104. phase = inWord
  105. }
  106. // The escape token is special except for ' quotes - can't escape anything for '
  107. if ch == d.EscapeToken && quote != '\'' {
  108. if pos+chWidth == len(rest) {
  109. phase = inWord
  110. continue // just skip the escape token at end
  111. }
  112. pos += chWidth
  113. word += string(ch)
  114. ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
  115. }
  116. word += string(ch)
  117. }
  118. }
  119. return words
  120. }
  121. // parse environment like statements. Note that this does *not* handle
  122. // variable interpolation, which will be handled in the evaluator.
  123. func parseNameVal(rest string, key string, d *Directive) (*Node, map[string]bool, error) {
  124. // This is kind of tricky because we need to support the old
  125. // variant: KEY name value
  126. // as well as the new one: KEY name=value ...
  127. // The trigger to know which one is being used will be whether we hit
  128. // a space or = first. space ==> old, "=" ==> new
  129. words := parseWords(rest, d)
  130. if len(words) == 0 {
  131. return nil, nil, nil
  132. }
  133. var rootnode *Node
  134. // Old format (KEY name value)
  135. if !strings.Contains(words[0], "=") {
  136. node := &Node{}
  137. rootnode = node
  138. strs := tokenWhitespace.Split(rest, 2)
  139. if len(strs) < 2 {
  140. return nil, nil, fmt.Errorf(key + " must have two arguments")
  141. }
  142. node.Value = strs[0]
  143. node.Next = &Node{}
  144. node.Next.Value = strs[1]
  145. } else {
  146. var prevNode *Node
  147. for i, word := range words {
  148. if !strings.Contains(word, "=") {
  149. return nil, nil, fmt.Errorf("Syntax error - can't find = in %q. Must be of the form: name=value", word)
  150. }
  151. parts := strings.SplitN(word, "=", 2)
  152. name := &Node{}
  153. value := &Node{}
  154. name.Next = value
  155. name.Value = parts[0]
  156. value.Value = parts[1]
  157. if i == 0 {
  158. rootnode = name
  159. } else {
  160. prevNode.Next = name
  161. }
  162. prevNode = value
  163. }
  164. }
  165. return rootnode, nil, nil
  166. }
  167. func parseEnv(rest string, d *Directive) (*Node, map[string]bool, error) {
  168. return parseNameVal(rest, "ENV", d)
  169. }
  170. func parseLabel(rest string, d *Directive) (*Node, map[string]bool, error) {
  171. return parseNameVal(rest, "LABEL", d)
  172. }
  173. // parses a statement containing one or more keyword definition(s) and/or
  174. // value assignments, like `name1 name2= name3="" name4=value`.
  175. // Note that this is a stricter format than the old format of assignment,
  176. // allowed by parseNameVal(), in a way that this only allows assignment of the
  177. // form `keyword=[<value>]` like `name2=`, `name3=""`, and `name4=value` above.
  178. // In addition, a keyword definition alone is of the form `keyword` like `name1`
  179. // above. And the assignments `name2=` and `name3=""` are equivalent and
  180. // assign an empty value to the respective keywords.
  181. func parseNameOrNameVal(rest string, d *Directive) (*Node, map[string]bool, error) {
  182. words := parseWords(rest, d)
  183. if len(words) == 0 {
  184. return nil, nil, nil
  185. }
  186. var (
  187. rootnode *Node
  188. prevNode *Node
  189. )
  190. for i, word := range words {
  191. node := &Node{}
  192. node.Value = word
  193. if i == 0 {
  194. rootnode = node
  195. } else {
  196. prevNode.Next = node
  197. }
  198. prevNode = node
  199. }
  200. return rootnode, nil, nil
  201. }
  202. // parses a whitespace-delimited set of arguments. The result is effectively a
  203. // linked list of string arguments.
  204. func parseStringsWhitespaceDelimited(rest string, d *Directive) (*Node, map[string]bool, error) {
  205. if rest == "" {
  206. return nil, nil, nil
  207. }
  208. node := &Node{}
  209. rootnode := node
  210. prevnode := node
  211. for _, str := range tokenWhitespace.Split(rest, -1) { // use regexp
  212. prevnode = node
  213. node.Value = str
  214. node.Next = &Node{}
  215. node = node.Next
  216. }
  217. // XXX to get around regexp.Split *always* providing an empty string at the
  218. // end due to how our loop is constructed, nil out the last node in the
  219. // chain.
  220. prevnode.Next = nil
  221. return rootnode, nil, nil
  222. }
  223. // parsestring just wraps the string in quotes and returns a working node.
  224. func parseString(rest string, d *Directive) (*Node, map[string]bool, error) {
  225. if rest == "" {
  226. return nil, nil, nil
  227. }
  228. n := &Node{}
  229. n.Value = rest
  230. return n, nil, nil
  231. }
  232. // parseJSON converts JSON arrays to an AST.
  233. func parseJSON(rest string, d *Directive) (*Node, map[string]bool, error) {
  234. rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
  235. if !strings.HasPrefix(rest, "[") {
  236. return nil, nil, fmt.Errorf(`Error parsing "%s" as a JSON array`, rest)
  237. }
  238. var myJSON []interface{}
  239. if err := json.NewDecoder(strings.NewReader(rest)).Decode(&myJSON); err != nil {
  240. return nil, nil, err
  241. }
  242. var top, prev *Node
  243. for _, str := range myJSON {
  244. s, ok := str.(string)
  245. if !ok {
  246. return nil, nil, errDockerfileNotStringArray
  247. }
  248. node := &Node{Value: s}
  249. if prev == nil {
  250. top = node
  251. } else {
  252. prev.Next = node
  253. }
  254. prev = node
  255. }
  256. return top, map[string]bool{"json": true}, nil
  257. }
  258. // parseMaybeJSON determines if the argument appears to be a JSON array. If
  259. // so, passes to parseJSON; if not, quotes the result and returns a single
  260. // node.
  261. func parseMaybeJSON(rest string, d *Directive) (*Node, map[string]bool, error) {
  262. if rest == "" {
  263. return nil, nil, nil
  264. }
  265. node, attrs, err := parseJSON(rest, d)
  266. if err == nil {
  267. return node, attrs, nil
  268. }
  269. if err == errDockerfileNotStringArray {
  270. return nil, nil, err
  271. }
  272. node = &Node{}
  273. node.Value = rest
  274. return node, nil, nil
  275. }
  276. // parseMaybeJSONToList determines if the argument appears to be a JSON array. If
  277. // so, passes to parseJSON; if not, attempts to parse it as a whitespace
  278. // delimited string.
  279. func parseMaybeJSONToList(rest string, d *Directive) (*Node, map[string]bool, error) {
  280. node, attrs, err := parseJSON(rest, d)
  281. if err == nil {
  282. return node, attrs, nil
  283. }
  284. if err == errDockerfileNotStringArray {
  285. return nil, nil, err
  286. }
  287. return parseStringsWhitespaceDelimited(rest, d)
  288. }
  289. // The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
  290. func parseHealthConfig(rest string, d *Directive) (*Node, map[string]bool, error) {
  291. // Find end of first argument
  292. var sep int
  293. for ; sep < len(rest); sep++ {
  294. if unicode.IsSpace(rune(rest[sep])) {
  295. break
  296. }
  297. }
  298. next := sep
  299. for ; next < len(rest); next++ {
  300. if !unicode.IsSpace(rune(rest[next])) {
  301. break
  302. }
  303. }
  304. if sep == 0 {
  305. return nil, nil, nil
  306. }
  307. typ := rest[:sep]
  308. cmd, attrs, err := parseMaybeJSON(rest[next:], d)
  309. if err != nil {
  310. return nil, nil, err
  311. }
  312. return &Node{Value: typ, Next: cmd}, attrs, err
  313. }