shell_parser.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. package dockerfile
  2. import (
  3. "bytes"
  4. "strings"
  5. "text/scanner"
  6. "unicode"
  7. "github.com/pkg/errors"
  8. )
  9. // ShellLex performs shell word splitting and variable expansion.
  10. //
  11. // ShellLex takes a string and an array of env variables and
  12. // process all quotes (" and ') as well as $xxx and ${xxx} env variable
  13. // tokens. Tries to mimic bash shell process.
  14. // It doesn't support all flavors of ${xx:...} formats but new ones can
  15. // be added by adding code to the "special ${} format processing" section
  16. type ShellLex struct {
  17. escapeToken rune
  18. }
  19. // NewShellLex creates a new ShellLex which uses escapeToken to escape quotes.
  20. func NewShellLex(escapeToken rune) *ShellLex {
  21. return &ShellLex{escapeToken: escapeToken}
  22. }
  23. // ProcessWord will use the 'env' list of environment variables,
  24. // and replace any env var references in 'word'.
  25. func (s *ShellLex) ProcessWord(word string, env []string) (string, error) {
  26. word, _, err := s.process(word, env)
  27. return word, err
  28. }
  29. // ProcessWords will use the 'env' list of environment variables,
  30. // and replace any env var references in 'word' then it will also
  31. // return a slice of strings which represents the 'word'
  32. // split up based on spaces - taking into account quotes. Note that
  33. // this splitting is done **after** the env var substitutions are done.
  34. // Note, each one is trimmed to remove leading and trailing spaces (unless
  35. // they are quoted", but ProcessWord retains spaces between words.
  36. func (s *ShellLex) ProcessWords(word string, env []string) ([]string, error) {
  37. _, words, err := s.process(word, env)
  38. return words, err
  39. }
  40. func (s *ShellLex) process(word string, env []string) (string, []string, error) {
  41. sw := &shellWord{
  42. envs: env,
  43. escapeToken: s.escapeToken,
  44. }
  45. sw.scanner.Init(strings.NewReader(word))
  46. return sw.process(word)
  47. }
  48. type shellWord struct {
  49. scanner scanner.Scanner
  50. envs []string
  51. escapeToken rune
  52. }
  53. func (sw *shellWord) process(source string) (string, []string, error) {
  54. word, words, err := sw.processStopOn(scanner.EOF)
  55. if err != nil {
  56. err = errors.Wrapf(err, "failed to process %q", source)
  57. }
  58. return word, words, err
  59. }
  60. type wordsStruct struct {
  61. word string
  62. words []string
  63. inWord bool
  64. }
  65. func (w *wordsStruct) addChar(ch rune) {
  66. if unicode.IsSpace(ch) && w.inWord {
  67. if len(w.word) != 0 {
  68. w.words = append(w.words, w.word)
  69. w.word = ""
  70. w.inWord = false
  71. }
  72. } else if !unicode.IsSpace(ch) {
  73. w.addRawChar(ch)
  74. }
  75. }
  76. func (w *wordsStruct) addRawChar(ch rune) {
  77. w.word += string(ch)
  78. w.inWord = true
  79. }
  80. func (w *wordsStruct) addString(str string) {
  81. var scan scanner.Scanner
  82. scan.Init(strings.NewReader(str))
  83. for scan.Peek() != scanner.EOF {
  84. w.addChar(scan.Next())
  85. }
  86. }
  87. func (w *wordsStruct) addRawString(str string) {
  88. w.word += str
  89. w.inWord = true
  90. }
  91. func (w *wordsStruct) getWords() []string {
  92. if len(w.word) > 0 {
  93. w.words = append(w.words, w.word)
  94. // Just in case we're called again by mistake
  95. w.word = ""
  96. w.inWord = false
  97. }
  98. return w.words
  99. }
  100. // Process the word, starting at 'pos', and stop when we get to the
  101. // end of the word or the 'stopChar' character
  102. func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) {
  103. var result bytes.Buffer
  104. var words wordsStruct
  105. var charFuncMapping = map[rune]func() (string, error){
  106. '\'': sw.processSingleQuote,
  107. '"': sw.processDoubleQuote,
  108. '$': sw.processDollar,
  109. }
  110. for sw.scanner.Peek() != scanner.EOF {
  111. ch := sw.scanner.Peek()
  112. if stopChar != scanner.EOF && ch == stopChar {
  113. sw.scanner.Next()
  114. break
  115. }
  116. if fn, ok := charFuncMapping[ch]; ok {
  117. // Call special processing func for certain chars
  118. tmp, err := fn()
  119. if err != nil {
  120. return "", []string{}, err
  121. }
  122. result.WriteString(tmp)
  123. if ch == rune('$') {
  124. words.addString(tmp)
  125. } else {
  126. words.addRawString(tmp)
  127. }
  128. } else {
  129. // Not special, just add it to the result
  130. ch = sw.scanner.Next()
  131. if ch == sw.escapeToken {
  132. // '\' (default escape token, but ` allowed) escapes, except end of line
  133. ch = sw.scanner.Next()
  134. if ch == scanner.EOF {
  135. break
  136. }
  137. words.addRawChar(ch)
  138. } else {
  139. words.addChar(ch)
  140. }
  141. result.WriteRune(ch)
  142. }
  143. }
  144. return result.String(), words.getWords(), nil
  145. }
  146. func (sw *shellWord) processSingleQuote() (string, error) {
  147. // All chars between single quotes are taken as-is
  148. // Note, you can't escape '
  149. //
  150. // From the "sh" man page:
  151. // Single Quotes
  152. // Enclosing characters in single quotes preserves the literal meaning of
  153. // all the characters (except single quotes, making it impossible to put
  154. // single-quotes in a single-quoted string).
  155. var result bytes.Buffer
  156. sw.scanner.Next()
  157. for {
  158. ch := sw.scanner.Next()
  159. switch ch {
  160. case scanner.EOF:
  161. return "", errors.New("unexpected end of statement while looking for matching single-quote")
  162. case '\'':
  163. return result.String(), nil
  164. }
  165. result.WriteRune(ch)
  166. }
  167. }
  168. func (sw *shellWord) processDoubleQuote() (string, error) {
  169. // All chars up to the next " are taken as-is, even ', except any $ chars
  170. // But you can escape " with a \ (or ` if escape token set accordingly)
  171. //
  172. // From the "sh" man page:
  173. // Double Quotes
  174. // Enclosing characters within double quotes preserves the literal meaning
  175. // of all characters except dollarsign ($), backquote (`), and backslash
  176. // (\). The backslash inside double quotes is historically weird, and
  177. // serves to quote only the following characters:
  178. // $ ` " \ <newline>.
  179. // Otherwise it remains literal.
  180. var result bytes.Buffer
  181. sw.scanner.Next()
  182. for {
  183. switch sw.scanner.Peek() {
  184. case scanner.EOF:
  185. return "", errors.New("unexpected end of statement while looking for matching double-quote")
  186. case '"':
  187. sw.scanner.Next()
  188. return result.String(), nil
  189. case '$':
  190. value, err := sw.processDollar()
  191. if err != nil {
  192. return "", err
  193. }
  194. result.WriteString(value)
  195. default:
  196. ch := sw.scanner.Next()
  197. if ch == sw.escapeToken {
  198. switch sw.scanner.Peek() {
  199. case scanner.EOF:
  200. // Ignore \ at end of word
  201. continue
  202. case '"', '$', sw.escapeToken:
  203. // These chars can be escaped, all other \'s are left as-is
  204. // Note: for now don't do anything special with ` chars.
  205. // Not sure what to do with them anyway since we're not going
  206. // to execute the text in there (not now anyway).
  207. ch = sw.scanner.Next()
  208. }
  209. }
  210. result.WriteRune(ch)
  211. }
  212. }
  213. }
  214. func (sw *shellWord) processDollar() (string, error) {
  215. sw.scanner.Next()
  216. // $xxx case
  217. if sw.scanner.Peek() != '{' {
  218. name := sw.processName()
  219. if name == "" {
  220. return "$", nil
  221. }
  222. return sw.getEnv(name), nil
  223. }
  224. sw.scanner.Next()
  225. name := sw.processName()
  226. ch := sw.scanner.Peek()
  227. if ch == '}' {
  228. // Normal ${xx} case
  229. sw.scanner.Next()
  230. return sw.getEnv(name), nil
  231. }
  232. if ch == ':' {
  233. // Special ${xx:...} format processing
  234. // Yes it allows for recursive $'s in the ... spot
  235. sw.scanner.Next() // skip over :
  236. modifier := sw.scanner.Next()
  237. word, _, err := sw.processStopOn('}')
  238. if err != nil {
  239. return "", err
  240. }
  241. // Grab the current value of the variable in question so we
  242. // can use to to determine what to do based on the modifier
  243. newValue := sw.getEnv(name)
  244. switch modifier {
  245. case '+':
  246. if newValue != "" {
  247. newValue = word
  248. }
  249. return newValue, nil
  250. case '-':
  251. if newValue == "" {
  252. newValue = word
  253. }
  254. return newValue, nil
  255. default:
  256. return "", errors.Errorf("unsupported modifier (%c) in substitution", modifier)
  257. }
  258. }
  259. return "", errors.Errorf("missing ':' in substitution")
  260. }
  261. func (sw *shellWord) processName() string {
  262. // Read in a name (alphanumeric or _)
  263. // If it starts with a numeric then just return $#
  264. var name bytes.Buffer
  265. for sw.scanner.Peek() != scanner.EOF {
  266. ch := sw.scanner.Peek()
  267. if name.Len() == 0 && unicode.IsDigit(ch) {
  268. ch = sw.scanner.Next()
  269. return string(ch)
  270. }
  271. if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
  272. break
  273. }
  274. ch = sw.scanner.Next()
  275. name.WriteRune(ch)
  276. }
  277. return name.String()
  278. }
  279. func (sw *shellWord) getEnv(name string) string {
  280. for _, env := range sw.envs {
  281. i := strings.Index(env, "=")
  282. if i < 0 {
  283. if equalEnvKeys(name, env) {
  284. // Should probably never get here, but just in case treat
  285. // it like "var" and "var=" are the same
  286. return ""
  287. }
  288. continue
  289. }
  290. compareName := env[:i]
  291. if !equalEnvKeys(name, compareName) {
  292. continue
  293. }
  294. return env[i+1:]
  295. }
  296. return ""
  297. }