line_parsers.go 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. package parser
  2. // line parsers are dispatch calls that parse a single unit of text into a
  3. // Node object which contains the whole statement. Dockerfiles have varied
  4. // (but not usually unique, see ONBUILD for a unique example) parsing rules
  5. // per-command, and these unify the processing in a way that makes it
  6. // manageable.
  7. import (
  8. "encoding/json"
  9. "errors"
  10. "fmt"
  11. "strings"
  12. "unicode"
  13. )
  14. var (
  15. errDockerfileNotStringArray = errors.New("When using JSON array syntax, arrays must be comprised of strings only.")
  16. )
  17. // ignore the current argument. This will still leave a command parsed, but
  18. // will not incorporate the arguments into the ast.
  19. func parseIgnore(rest string) (*Node, map[string]bool, error) {
  20. return &Node{}, nil, nil
  21. }
  22. // used for onbuild. Could potentially be used for anything that represents a
  23. // statement with sub-statements.
  24. //
  25. // ONBUILD RUN foo bar -> (onbuild (run foo bar))
  26. //
  27. func parseSubCommand(rest string) (*Node, map[string]bool, error) {
  28. if rest == "" {
  29. return nil, nil, nil
  30. }
  31. _, child, err := parseLine(rest)
  32. if err != nil {
  33. return nil, nil, err
  34. }
  35. return &Node{Children: []*Node{child}}, nil, nil
  36. }
  37. // helper to parse words (i.e space delimited or quoted strings) in a statement.
  38. // The quotes are preserved as part of this function and they are stripped later
  39. // as part of processWords().
  40. func parseWords(rest string) []string {
  41. const (
  42. inSpaces = iota // looking for start of a word
  43. inWord
  44. inQuote
  45. )
  46. words := []string{}
  47. phase := inSpaces
  48. word := ""
  49. quote := '\000'
  50. blankOK := false
  51. var ch rune
  52. for pos := 0; pos <= len(rest); pos++ {
  53. if pos != len(rest) {
  54. ch = rune(rest[pos])
  55. }
  56. if phase == inSpaces { // Looking for start of word
  57. if pos == len(rest) { // end of input
  58. break
  59. }
  60. if unicode.IsSpace(ch) { // skip spaces
  61. continue
  62. }
  63. phase = inWord // found it, fall through
  64. }
  65. if (phase == inWord || phase == inQuote) && (pos == len(rest)) {
  66. if blankOK || len(word) > 0 {
  67. words = append(words, word)
  68. }
  69. break
  70. }
  71. if phase == inWord {
  72. if unicode.IsSpace(ch) {
  73. phase = inSpaces
  74. if blankOK || len(word) > 0 {
  75. words = append(words, word)
  76. }
  77. word = ""
  78. blankOK = false
  79. continue
  80. }
  81. if ch == '\'' || ch == '"' {
  82. quote = ch
  83. blankOK = true
  84. phase = inQuote
  85. }
  86. if ch == '\\' {
  87. if pos+1 == len(rest) {
  88. continue // just skip \ at end
  89. }
  90. // If we're not quoted and we see a \, then always just
  91. // add \ plus the char to the word, even if the char
  92. // is a quote.
  93. word += string(ch)
  94. pos++
  95. ch = rune(rest[pos])
  96. }
  97. word += string(ch)
  98. continue
  99. }
  100. if phase == inQuote {
  101. if ch == quote {
  102. phase = inWord
  103. }
  104. // \ is special except for ' quotes - can't escape anything for '
  105. if ch == '\\' && quote != '\'' {
  106. if pos+1 == len(rest) {
  107. phase = inWord
  108. continue // just skip \ at end
  109. }
  110. pos++
  111. nextCh := rune(rest[pos])
  112. word += string(ch)
  113. ch = nextCh
  114. }
  115. word += string(ch)
  116. }
  117. }
  118. return words
  119. }
  120. // parse environment like statements. Note that this does *not* handle
  121. // variable interpolation, which will be handled in the evaluator.
  122. func parseNameVal(rest string, key string) (*Node, map[string]bool, error) {
  123. // This is kind of tricky because we need to support the old
  124. // variant: KEY name value
  125. // as well as the new one: KEY name=value ...
  126. // The trigger to know which one is being used will be whether we hit
  127. // a space or = first. space ==> old, "=" ==> new
  128. words := parseWords(rest)
  129. if len(words) == 0 {
  130. return nil, nil, nil
  131. }
  132. var rootnode *Node
  133. // Old format (KEY name value)
  134. if !strings.Contains(words[0], "=") {
  135. node := &Node{}
  136. rootnode = node
  137. strs := tokenWhitespace.Split(rest, 2)
  138. if len(strs) < 2 {
  139. return nil, nil, fmt.Errorf(key + " must have two arguments")
  140. }
  141. node.Value = strs[0]
  142. node.Next = &Node{}
  143. node.Next.Value = strs[1]
  144. } else {
  145. var prevNode *Node
  146. for i, word := range words {
  147. if !strings.Contains(word, "=") {
  148. return nil, nil, fmt.Errorf("Syntax error - can't find = in %q. Must be of the form: name=value", word)
  149. }
  150. parts := strings.SplitN(word, "=", 2)
  151. name := &Node{}
  152. value := &Node{}
  153. name.Next = value
  154. name.Value = parts[0]
  155. value.Value = parts[1]
  156. if i == 0 {
  157. rootnode = name
  158. } else {
  159. prevNode.Next = name
  160. }
  161. prevNode = value
  162. }
  163. }
  164. return rootnode, nil, nil
  165. }
  166. func parseEnv(rest string) (*Node, map[string]bool, error) {
  167. return parseNameVal(rest, "ENV")
  168. }
  169. func parseLabel(rest string) (*Node, map[string]bool, error) {
  170. return parseNameVal(rest, "LABEL")
  171. }
  172. // parses a statement containing one or more keyword definition(s) and/or
  173. // value assignments, like `name1 name2= name3="" name4=value`.
  174. // Note that this is a stricter format than the old format of assignment,
  175. // allowed by parseNameVal(), in a way that this only allows assignment of the
  176. // form `keyword=[<value>]` like `name2=`, `name3=""`, and `name4=value` above.
  177. // In addition, a keyword definition alone is of the form `keyword` like `name1`
  178. // above. And the assignments `name2=` and `name3=""` are equivalent and
  179. // assign an empty value to the respective keywords.
  180. func parseNameOrNameVal(rest string) (*Node, map[string]bool, error) {
  181. words := parseWords(rest)
  182. if len(words) == 0 {
  183. return nil, nil, nil
  184. }
  185. var (
  186. rootnode *Node
  187. prevNode *Node
  188. )
  189. for i, word := range words {
  190. node := &Node{}
  191. node.Value = word
  192. if i == 0 {
  193. rootnode = node
  194. } else {
  195. prevNode.Next = node
  196. }
  197. prevNode = node
  198. }
  199. return rootnode, nil, nil
  200. }
  201. // parses a whitespace-delimited set of arguments. The result is effectively a
  202. // linked list of string arguments.
  203. func parseStringsWhitespaceDelimited(rest string) (*Node, map[string]bool, error) {
  204. if rest == "" {
  205. return nil, nil, nil
  206. }
  207. node := &Node{}
  208. rootnode := node
  209. prevnode := node
  210. for _, str := range tokenWhitespace.Split(rest, -1) { // use regexp
  211. prevnode = node
  212. node.Value = str
  213. node.Next = &Node{}
  214. node = node.Next
  215. }
  216. // XXX to get around regexp.Split *always* providing an empty string at the
  217. // end due to how our loop is constructed, nil out the last node in the
  218. // chain.
  219. prevnode.Next = nil
  220. return rootnode, nil, nil
  221. }
  222. // parsestring just wraps the string in quotes and returns a working node.
  223. func parseString(rest string) (*Node, map[string]bool, error) {
  224. if rest == "" {
  225. return nil, nil, nil
  226. }
  227. n := &Node{}
  228. n.Value = rest
  229. return n, nil, nil
  230. }
  231. // parseJSON converts JSON arrays to an AST.
  232. func parseJSON(rest string) (*Node, map[string]bool, error) {
  233. rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
  234. if !strings.HasPrefix(rest, "[") {
  235. return nil, nil, fmt.Errorf(`Error parsing "%s" as a JSON array`, rest)
  236. }
  237. var myJSON []interface{}
  238. if err := json.NewDecoder(strings.NewReader(rest)).Decode(&myJSON); err != nil {
  239. return nil, nil, err
  240. }
  241. var top, prev *Node
  242. for _, str := range myJSON {
  243. s, ok := str.(string)
  244. if !ok {
  245. return nil, nil, errDockerfileNotStringArray
  246. }
  247. node := &Node{Value: s}
  248. if prev == nil {
  249. top = node
  250. } else {
  251. prev.Next = node
  252. }
  253. prev = node
  254. }
  255. return top, map[string]bool{"json": true}, nil
  256. }
  257. // parseMaybeJSON determines if the argument appears to be a JSON array. If
  258. // so, passes to parseJSON; if not, quotes the result and returns a single
  259. // node.
  260. func parseMaybeJSON(rest string) (*Node, map[string]bool, error) {
  261. if rest == "" {
  262. return nil, nil, nil
  263. }
  264. node, attrs, err := parseJSON(rest)
  265. if err == nil {
  266. return node, attrs, nil
  267. }
  268. if err == errDockerfileNotStringArray {
  269. return nil, nil, err
  270. }
  271. node = &Node{}
  272. node.Value = rest
  273. return node, nil, nil
  274. }
  275. // parseMaybeJSONToList determines if the argument appears to be a JSON array. If
  276. // so, passes to parseJSON; if not, attempts to parse it as a whitespace
  277. // delimited string.
  278. func parseMaybeJSONToList(rest string) (*Node, map[string]bool, error) {
  279. node, attrs, err := parseJSON(rest)
  280. if err == nil {
  281. return node, attrs, nil
  282. }
  283. if err == errDockerfileNotStringArray {
  284. return nil, nil, err
  285. }
  286. return parseStringsWhitespaceDelimited(rest)
  287. }