lex.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. package shell
  2. import (
  3. "bytes"
  4. "fmt"
  5. "strings"
  6. "text/scanner"
  7. "unicode"
  8. "github.com/pkg/errors"
  9. )
  10. // Lex performs shell word splitting and variable expansion.
  11. //
  12. // Lex takes a string and an array of env variables and
  13. // process all quotes (" and ') as well as $xxx and ${xxx} env variable
  14. // tokens. Tries to mimic bash shell process.
  15. // It doesn't support all flavors of ${xx:...} formats but new ones can
  16. // be added by adding code to the "special ${} format processing" section
  17. type Lex struct {
  18. escapeToken rune
  19. RawQuotes bool
  20. SkipUnsetEnv bool
  21. }
  22. // NewLex creates a new Lex which uses escapeToken to escape quotes.
  23. func NewLex(escapeToken rune) *Lex {
  24. return &Lex{escapeToken: escapeToken}
  25. }
  26. // ProcessWord will use the 'env' list of environment variables,
  27. // and replace any env var references in 'word'.
  28. func (s *Lex) ProcessWord(word string, env []string) (string, error) {
  29. word, _, err := s.process(word, BuildEnvs(env))
  30. return word, err
  31. }
  32. // ProcessWords will use the 'env' list of environment variables,
  33. // and replace any env var references in 'word' then it will also
  34. // return a slice of strings which represents the 'word'
  35. // split up based on spaces - taking into account quotes. Note that
  36. // this splitting is done **after** the env var substitutions are done.
  37. // Note, each one is trimmed to remove leading and trailing spaces (unless
  38. // they are quoted", but ProcessWord retains spaces between words.
  39. func (s *Lex) ProcessWords(word string, env []string) ([]string, error) {
  40. _, words, err := s.process(word, BuildEnvs(env))
  41. return words, err
  42. }
  43. // ProcessWordWithMap will use the 'env' list of environment variables,
  44. // and replace any env var references in 'word'.
  45. func (s *Lex) ProcessWordWithMap(word string, env map[string]string) (string, error) {
  46. word, _, err := s.process(word, env)
  47. return word, err
  48. }
  49. func (s *Lex) ProcessWordsWithMap(word string, env map[string]string) ([]string, error) {
  50. _, words, err := s.process(word, env)
  51. return words, err
  52. }
  53. func (s *Lex) process(word string, env map[string]string) (string, []string, error) {
  54. sw := &shellWord{
  55. envs: env,
  56. escapeToken: s.escapeToken,
  57. skipUnsetEnv: s.SkipUnsetEnv,
  58. rawQuotes: s.RawQuotes,
  59. }
  60. sw.scanner.Init(strings.NewReader(word))
  61. return sw.process(word)
  62. }
  63. type shellWord struct {
  64. scanner scanner.Scanner
  65. envs map[string]string
  66. escapeToken rune
  67. rawQuotes bool
  68. skipUnsetEnv bool
  69. }
  70. func (sw *shellWord) process(source string) (string, []string, error) {
  71. word, words, err := sw.processStopOn(scanner.EOF)
  72. if err != nil {
  73. err = errors.Wrapf(err, "failed to process %q", source)
  74. }
  75. return word, words, err
  76. }
  77. type wordsStruct struct {
  78. word string
  79. words []string
  80. inWord bool
  81. }
  82. func (w *wordsStruct) addChar(ch rune) {
  83. if unicode.IsSpace(ch) && w.inWord {
  84. if len(w.word) != 0 {
  85. w.words = append(w.words, w.word)
  86. w.word = ""
  87. w.inWord = false
  88. }
  89. } else if !unicode.IsSpace(ch) {
  90. w.addRawChar(ch)
  91. }
  92. }
  93. func (w *wordsStruct) addRawChar(ch rune) {
  94. w.word += string(ch)
  95. w.inWord = true
  96. }
  97. func (w *wordsStruct) addString(str string) {
  98. for _, ch := range str {
  99. w.addChar(ch)
  100. }
  101. }
  102. func (w *wordsStruct) addRawString(str string) {
  103. w.word += str
  104. w.inWord = true
  105. }
  106. func (w *wordsStruct) getWords() []string {
  107. if len(w.word) > 0 {
  108. w.words = append(w.words, w.word)
  109. // Just in case we're called again by mistake
  110. w.word = ""
  111. w.inWord = false
  112. }
  113. return w.words
  114. }
  115. // Process the word, starting at 'pos', and stop when we get to the
  116. // end of the word or the 'stopChar' character
  117. func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) {
  118. var result bytes.Buffer
  119. var words wordsStruct
  120. var charFuncMapping = map[rune]func() (string, error){
  121. '\'': sw.processSingleQuote,
  122. '"': sw.processDoubleQuote,
  123. '$': sw.processDollar,
  124. }
  125. for sw.scanner.Peek() != scanner.EOF {
  126. ch := sw.scanner.Peek()
  127. if stopChar != scanner.EOF && ch == stopChar {
  128. sw.scanner.Next()
  129. return result.String(), words.getWords(), nil
  130. }
  131. if fn, ok := charFuncMapping[ch]; ok {
  132. // Call special processing func for certain chars
  133. tmp, err := fn()
  134. if err != nil {
  135. return "", []string{}, err
  136. }
  137. result.WriteString(tmp)
  138. if ch == rune('$') {
  139. words.addString(tmp)
  140. } else {
  141. words.addRawString(tmp)
  142. }
  143. } else {
  144. // Not special, just add it to the result
  145. ch = sw.scanner.Next()
  146. if ch == sw.escapeToken {
  147. // '\' (default escape token, but ` allowed) escapes, except end of line
  148. ch = sw.scanner.Next()
  149. if ch == scanner.EOF {
  150. break
  151. }
  152. words.addRawChar(ch)
  153. } else {
  154. words.addChar(ch)
  155. }
  156. result.WriteRune(ch)
  157. }
  158. }
  159. if stopChar != scanner.EOF {
  160. return "", []string{}, errors.Errorf("unexpected end of statement while looking for matching %s", string(stopChar))
  161. }
  162. return result.String(), words.getWords(), nil
  163. }
  164. func (sw *shellWord) processSingleQuote() (string, error) {
  165. // All chars between single quotes are taken as-is
  166. // Note, you can't escape '
  167. //
  168. // From the "sh" man page:
  169. // Single Quotes
  170. // Enclosing characters in single quotes preserves the literal meaning of
  171. // all the characters (except single quotes, making it impossible to put
  172. // single-quotes in a single-quoted string).
  173. var result bytes.Buffer
  174. ch := sw.scanner.Next()
  175. if sw.rawQuotes {
  176. result.WriteRune(ch)
  177. }
  178. for {
  179. ch = sw.scanner.Next()
  180. switch ch {
  181. case scanner.EOF:
  182. return "", errors.New("unexpected end of statement while looking for matching single-quote")
  183. case '\'':
  184. if sw.rawQuotes {
  185. result.WriteRune(ch)
  186. }
  187. return result.String(), nil
  188. }
  189. result.WriteRune(ch)
  190. }
  191. }
  192. func (sw *shellWord) processDoubleQuote() (string, error) {
  193. // All chars up to the next " are taken as-is, even ', except any $ chars
  194. // But you can escape " with a \ (or ` if escape token set accordingly)
  195. //
  196. // From the "sh" man page:
  197. // Double Quotes
  198. // Enclosing characters within double quotes preserves the literal meaning
  199. // of all characters except dollarsign ($), backquote (`), and backslash
  200. // (\). The backslash inside double quotes is historically weird, and
  201. // serves to quote only the following characters:
  202. // $ ` " \ <newline>.
  203. // Otherwise it remains literal.
  204. var result bytes.Buffer
  205. ch := sw.scanner.Next()
  206. if sw.rawQuotes {
  207. result.WriteRune(ch)
  208. }
  209. for {
  210. switch sw.scanner.Peek() {
  211. case scanner.EOF:
  212. return "", errors.New("unexpected end of statement while looking for matching double-quote")
  213. case '"':
  214. ch := sw.scanner.Next()
  215. if sw.rawQuotes {
  216. result.WriteRune(ch)
  217. }
  218. return result.String(), nil
  219. case '$':
  220. value, err := sw.processDollar()
  221. if err != nil {
  222. return "", err
  223. }
  224. result.WriteString(value)
  225. default:
  226. ch := sw.scanner.Next()
  227. if ch == sw.escapeToken {
  228. switch sw.scanner.Peek() {
  229. case scanner.EOF:
  230. // Ignore \ at end of word
  231. continue
  232. case '"', '$', sw.escapeToken:
  233. // These chars can be escaped, all other \'s are left as-is
  234. // Note: for now don't do anything special with ` chars.
  235. // Not sure what to do with them anyway since we're not going
  236. // to execute the text in there (not now anyway).
  237. ch = sw.scanner.Next()
  238. }
  239. }
  240. result.WriteRune(ch)
  241. }
  242. }
  243. }
  244. func (sw *shellWord) processDollar() (string, error) {
  245. sw.scanner.Next()
  246. // $xxx case
  247. if sw.scanner.Peek() != '{' {
  248. name := sw.processName()
  249. if name == "" {
  250. return "$", nil
  251. }
  252. value, found := sw.getEnv(name)
  253. if !found && sw.skipUnsetEnv {
  254. return "$" + name, nil
  255. }
  256. return value, nil
  257. }
  258. sw.scanner.Next()
  259. switch sw.scanner.Peek() {
  260. case scanner.EOF:
  261. return "", errors.New("syntax error: missing '}'")
  262. case '{', '}', ':':
  263. // Invalid ${{xx}, ${:xx}, ${:}. ${} case
  264. return "", errors.New("syntax error: bad substitution")
  265. }
  266. name := sw.processName()
  267. ch := sw.scanner.Next()
  268. switch ch {
  269. case '}':
  270. // Normal ${xx} case
  271. value, found := sw.getEnv(name)
  272. if !found && sw.skipUnsetEnv {
  273. return fmt.Sprintf("${%s}", name), nil
  274. }
  275. return value, nil
  276. case '?':
  277. word, _, err := sw.processStopOn('}')
  278. if err != nil {
  279. if sw.scanner.Peek() == scanner.EOF {
  280. return "", errors.New("syntax error: missing '}'")
  281. }
  282. return "", err
  283. }
  284. newValue, found := sw.getEnv(name)
  285. if !found {
  286. if sw.skipUnsetEnv {
  287. return fmt.Sprintf("${%s?%s}", name, word), nil
  288. }
  289. message := "is not allowed to be unset"
  290. if word != "" {
  291. message = word
  292. }
  293. return "", errors.Errorf("%s: %s", name, message)
  294. }
  295. return newValue, nil
  296. case ':':
  297. // Special ${xx:...} format processing
  298. // Yes it allows for recursive $'s in the ... spot
  299. modifier := sw.scanner.Next()
  300. word, _, err := sw.processStopOn('}')
  301. if err != nil {
  302. if sw.scanner.Peek() == scanner.EOF {
  303. return "", errors.New("syntax error: missing '}'")
  304. }
  305. return "", err
  306. }
  307. // Grab the current value of the variable in question so we
  308. // can use to to determine what to do based on the modifier
  309. newValue, found := sw.getEnv(name)
  310. switch modifier {
  311. case '+':
  312. if newValue != "" {
  313. newValue = word
  314. }
  315. if !found && sw.skipUnsetEnv {
  316. return fmt.Sprintf("${%s:%s%s}", name, string(modifier), word), nil
  317. }
  318. return newValue, nil
  319. case '-':
  320. if newValue == "" {
  321. newValue = word
  322. }
  323. if !found && sw.skipUnsetEnv {
  324. return fmt.Sprintf("${%s:%s%s}", name, string(modifier), word), nil
  325. }
  326. return newValue, nil
  327. case '?':
  328. if !found {
  329. if sw.skipUnsetEnv {
  330. return fmt.Sprintf("${%s:%s%s}", name, string(modifier), word), nil
  331. }
  332. message := "is not allowed to be unset"
  333. if word != "" {
  334. message = word
  335. }
  336. return "", errors.Errorf("%s: %s", name, message)
  337. }
  338. if newValue == "" {
  339. message := "is not allowed to be empty"
  340. if word != "" {
  341. message = word
  342. }
  343. return "", errors.Errorf("%s: %s", name, message)
  344. }
  345. return newValue, nil
  346. default:
  347. return "", errors.Errorf("unsupported modifier (%c) in substitution", modifier)
  348. }
  349. }
  350. return "", errors.Errorf("missing ':' in substitution")
  351. }
  352. func (sw *shellWord) processName() string {
  353. // Read in a name (alphanumeric or _)
  354. // If it starts with a numeric then just return $#
  355. var name bytes.Buffer
  356. for sw.scanner.Peek() != scanner.EOF {
  357. ch := sw.scanner.Peek()
  358. if name.Len() == 0 && unicode.IsDigit(ch) {
  359. for sw.scanner.Peek() != scanner.EOF && unicode.IsDigit(sw.scanner.Peek()) {
  360. // Keep reading until the first non-digit character, or EOF
  361. ch = sw.scanner.Next()
  362. name.WriteRune(ch)
  363. }
  364. return name.String()
  365. }
  366. if name.Len() == 0 && isSpecialParam(ch) {
  367. ch = sw.scanner.Next()
  368. return string(ch)
  369. }
  370. if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
  371. break
  372. }
  373. ch = sw.scanner.Next()
  374. name.WriteRune(ch)
  375. }
  376. return name.String()
  377. }
  378. // isSpecialParam checks if the provided character is a special parameters,
  379. // as defined in http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_05_02
  380. func isSpecialParam(char rune) bool {
  381. switch char {
  382. case '@', '*', '#', '?', '-', '$', '!', '0':
  383. // Special parameters
  384. // http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_05_02
  385. return true
  386. }
  387. return false
  388. }
  389. func (sw *shellWord) getEnv(name string) (string, bool) {
  390. for key, value := range sw.envs {
  391. if EqualEnvKeys(name, key) {
  392. return value, true
  393. }
  394. }
  395. return "", false
  396. }
  397. func BuildEnvs(env []string) map[string]string {
  398. envs := map[string]string{}
  399. for _, e := range env {
  400. i := strings.Index(e, "=")
  401. if i < 0 {
  402. envs[e] = ""
  403. } else {
  404. k := e[:i]
  405. v := e[i+1:]
  406. // overwrite value if key already exists
  407. envs[k] = v
  408. }
  409. }
  410. return envs
  411. }