lex.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. package shell // import "github.com/docker/docker/builder/dockerfile/shell"
  2. import (
  3. "bytes"
  4. "strings"
  5. "text/scanner"
  6. "unicode"
  7. "github.com/pkg/errors"
  8. )
  9. // Lex performs shell word splitting and variable expansion.
  10. //
  11. // Lex takes a string and an array of env variables and
  12. // process all quotes (" and ') as well as $xxx and ${xxx} env variable
  13. // tokens. Tries to mimic bash shell process.
  14. // It doesn't support all flavors of ${xx:...} formats but new ones can
  15. // be added by adding code to the "special ${} format processing" section
  16. type Lex struct {
  17. escapeToken rune
  18. }
  19. // NewLex creates a new Lex which uses escapeToken to escape quotes.
  20. func NewLex(escapeToken rune) *Lex {
  21. return &Lex{escapeToken: escapeToken}
  22. }
  23. // ProcessWord will use the 'env' list of environment variables,
  24. // and replace any env var references in 'word'.
  25. func (s *Lex) ProcessWord(word string, env []string) (string, error) {
  26. word, _, err := s.process(word, env)
  27. return word, err
  28. }
  29. // ProcessWords will use the 'env' list of environment variables,
  30. // and replace any env var references in 'word' then it will also
  31. // return a slice of strings which represents the 'word'
  32. // split up based on spaces - taking into account quotes. Note that
  33. // this splitting is done **after** the env var substitutions are done.
  34. // Note, each one is trimmed to remove leading and trailing spaces (unless
  35. // they are quoted", but ProcessWord retains spaces between words.
  36. func (s *Lex) ProcessWords(word string, env []string) ([]string, error) {
  37. _, words, err := s.process(word, env)
  38. return words, err
  39. }
  40. func (s *Lex) process(word string, env []string) (string, []string, error) {
  41. sw := &shellWord{
  42. envs: env,
  43. escapeToken: s.escapeToken,
  44. }
  45. sw.scanner.Init(strings.NewReader(word))
  46. return sw.process(word)
  47. }
  48. type shellWord struct {
  49. scanner scanner.Scanner
  50. envs []string
  51. escapeToken rune
  52. }
  53. func (sw *shellWord) process(source string) (string, []string, error) {
  54. word, words, err := sw.processStopOn(scanner.EOF)
  55. if err != nil {
  56. err = errors.Wrapf(err, "failed to process %q", source)
  57. }
  58. return word, words, err
  59. }
  60. type wordsStruct struct {
  61. word string
  62. words []string
  63. inWord bool
  64. }
  65. func (w *wordsStruct) addChar(ch rune) {
  66. if unicode.IsSpace(ch) && w.inWord {
  67. if len(w.word) != 0 {
  68. w.words = append(w.words, w.word)
  69. w.word = ""
  70. w.inWord = false
  71. }
  72. } else if !unicode.IsSpace(ch) {
  73. w.addRawChar(ch)
  74. }
  75. }
  76. func (w *wordsStruct) addRawChar(ch rune) {
  77. w.word += string(ch)
  78. w.inWord = true
  79. }
  80. func (w *wordsStruct) addString(str string) {
  81. var scan scanner.Scanner
  82. scan.Init(strings.NewReader(str))
  83. for scan.Peek() != scanner.EOF {
  84. w.addChar(scan.Next())
  85. }
  86. }
  87. func (w *wordsStruct) addRawString(str string) {
  88. w.word += str
  89. w.inWord = true
  90. }
  91. func (w *wordsStruct) getWords() []string {
  92. if len(w.word) > 0 {
  93. w.words = append(w.words, w.word)
  94. // Just in case we're called again by mistake
  95. w.word = ""
  96. w.inWord = false
  97. }
  98. return w.words
  99. }
  100. // Process the word, starting at 'pos', and stop when we get to the
  101. // end of the word or the 'stopChar' character
  102. func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) {
  103. var result bytes.Buffer
  104. var words wordsStruct
  105. var charFuncMapping = map[rune]func() (string, error){
  106. '\'': sw.processSingleQuote,
  107. '"': sw.processDoubleQuote,
  108. '$': sw.processDollar,
  109. }
  110. for sw.scanner.Peek() != scanner.EOF {
  111. ch := sw.scanner.Peek()
  112. if stopChar != scanner.EOF && ch == stopChar {
  113. sw.scanner.Next()
  114. return result.String(), words.getWords(), nil
  115. }
  116. if fn, ok := charFuncMapping[ch]; ok {
  117. // Call special processing func for certain chars
  118. tmp, err := fn()
  119. if err != nil {
  120. return "", []string{}, err
  121. }
  122. result.WriteString(tmp)
  123. if ch == rune('$') {
  124. words.addString(tmp)
  125. } else {
  126. words.addRawString(tmp)
  127. }
  128. } else {
  129. // Not special, just add it to the result
  130. ch = sw.scanner.Next()
  131. if ch == sw.escapeToken {
  132. // '\' (default escape token, but ` allowed) escapes, except end of line
  133. ch = sw.scanner.Next()
  134. if ch == scanner.EOF {
  135. break
  136. }
  137. words.addRawChar(ch)
  138. } else {
  139. words.addChar(ch)
  140. }
  141. result.WriteRune(ch)
  142. }
  143. }
  144. if stopChar != scanner.EOF {
  145. return "", []string{}, errors.Errorf("unexpected end of statement while looking for matching %s", string(stopChar))
  146. }
  147. return result.String(), words.getWords(), nil
  148. }
  149. func (sw *shellWord) processSingleQuote() (string, error) {
  150. // All chars between single quotes are taken as-is
  151. // Note, you can't escape '
  152. //
  153. // From the "sh" man page:
  154. // Single Quotes
  155. // Enclosing characters in single quotes preserves the literal meaning of
  156. // all the characters (except single quotes, making it impossible to put
  157. // single-quotes in a single-quoted string).
  158. var result bytes.Buffer
  159. sw.scanner.Next()
  160. for {
  161. ch := sw.scanner.Next()
  162. switch ch {
  163. case scanner.EOF:
  164. return "", errors.New("unexpected end of statement while looking for matching single-quote")
  165. case '\'':
  166. return result.String(), nil
  167. }
  168. result.WriteRune(ch)
  169. }
  170. }
  171. func (sw *shellWord) processDoubleQuote() (string, error) {
  172. // All chars up to the next " are taken as-is, even ', except any $ chars
  173. // But you can escape " with a \ (or ` if escape token set accordingly)
  174. //
  175. // From the "sh" man page:
  176. // Double Quotes
  177. // Enclosing characters within double quotes preserves the literal meaning
  178. // of all characters except dollarsign ($), backquote (`), and backslash
  179. // (\). The backslash inside double quotes is historically weird, and
  180. // serves to quote only the following characters:
  181. // $ ` " \ <newline>.
  182. // Otherwise it remains literal.
  183. var result bytes.Buffer
  184. sw.scanner.Next()
  185. for {
  186. switch sw.scanner.Peek() {
  187. case scanner.EOF:
  188. return "", errors.New("unexpected end of statement while looking for matching double-quote")
  189. case '"':
  190. sw.scanner.Next()
  191. return result.String(), nil
  192. case '$':
  193. value, err := sw.processDollar()
  194. if err != nil {
  195. return "", err
  196. }
  197. result.WriteString(value)
  198. default:
  199. ch := sw.scanner.Next()
  200. if ch == sw.escapeToken {
  201. switch sw.scanner.Peek() {
  202. case scanner.EOF:
  203. // Ignore \ at end of word
  204. continue
  205. case '"', '$', sw.escapeToken:
  206. // These chars can be escaped, all other \'s are left as-is
  207. // Note: for now don't do anything special with ` chars.
  208. // Not sure what to do with them anyway since we're not going
  209. // to execute the text in there (not now anyway).
  210. ch = sw.scanner.Next()
  211. }
  212. }
  213. result.WriteRune(ch)
  214. }
  215. }
  216. }
  217. func (sw *shellWord) processDollar() (string, error) {
  218. sw.scanner.Next()
  219. // $xxx case
  220. if sw.scanner.Peek() != '{' {
  221. name := sw.processName()
  222. if name == "" {
  223. return "$", nil
  224. }
  225. return sw.getEnv(name), nil
  226. }
  227. sw.scanner.Next()
  228. switch sw.scanner.Peek() {
  229. case scanner.EOF:
  230. return "", errors.New("syntax error: missing '}'")
  231. case '{', '}', ':':
  232. // Invalid ${{xx}, ${:xx}, ${:}. ${} case
  233. return "", errors.New("syntax error: bad substitution")
  234. }
  235. name := sw.processName()
  236. ch := sw.scanner.Next()
  237. switch ch {
  238. case '}':
  239. // Normal ${xx} case
  240. return sw.getEnv(name), nil
  241. case ':':
  242. // Special ${xx:...} format processing
  243. // Yes it allows for recursive $'s in the ... spot
  244. modifier := sw.scanner.Next()
  245. word, _, err := sw.processStopOn('}')
  246. if err != nil {
  247. if sw.scanner.Peek() == scanner.EOF {
  248. return "", errors.New("syntax error: missing '}'")
  249. }
  250. return "", err
  251. }
  252. // Grab the current value of the variable in question so we
  253. // can use to to determine what to do based on the modifier
  254. newValue := sw.getEnv(name)
  255. switch modifier {
  256. case '+':
  257. if newValue != "" {
  258. newValue = word
  259. }
  260. return newValue, nil
  261. case '-':
  262. if newValue == "" {
  263. newValue = word
  264. }
  265. return newValue, nil
  266. default:
  267. return "", errors.Errorf("unsupported modifier (%c) in substitution", modifier)
  268. }
  269. }
  270. return "", errors.Errorf("missing ':' in substitution")
  271. }
  272. func (sw *shellWord) processName() string {
  273. // Read in a name (alphanumeric or _)
  274. // If it starts with a numeric then just return $#
  275. var name bytes.Buffer
  276. for sw.scanner.Peek() != scanner.EOF {
  277. ch := sw.scanner.Peek()
  278. if name.Len() == 0 && unicode.IsDigit(ch) {
  279. for sw.scanner.Peek() != scanner.EOF && unicode.IsDigit(sw.scanner.Peek()) {
  280. // Keep reading until the first non-digit character, or EOF
  281. ch = sw.scanner.Next()
  282. name.WriteRune(ch)
  283. }
  284. return name.String()
  285. }
  286. if name.Len() == 0 && isSpecialParam(ch) {
  287. ch = sw.scanner.Next()
  288. return string(ch)
  289. }
  290. if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
  291. break
  292. }
  293. ch = sw.scanner.Next()
  294. name.WriteRune(ch)
  295. }
  296. return name.String()
  297. }
  298. // isSpecialParam checks if the provided character is a special parameters,
  299. // as defined in http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_05_02
  300. func isSpecialParam(char rune) bool {
  301. switch char {
  302. case '@', '*', '#', '?', '-', '$', '!', '0':
  303. // Special parameters
  304. // http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_05_02
  305. return true
  306. }
  307. return false
  308. }
  309. func (sw *shellWord) getEnv(name string) string {
  310. for _, env := range sw.envs {
  311. i := strings.Index(env, "=")
  312. if i < 0 {
  313. if EqualEnvKeys(name, env) {
  314. // Should probably never get here, but just in case treat
  315. // it like "var" and "var=" are the same
  316. return ""
  317. }
  318. continue
  319. }
  320. compareName := env[:i]
  321. if !EqualEnvKeys(name, compareName) {
  322. continue
  323. }
  324. return env[i+1:]
  325. }
  326. return ""
  327. }