parser.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. // TOML Parser.
  2. package toml
  3. import (
  4. "errors"
  5. "fmt"
  6. "math"
  7. "reflect"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "time"
  12. )
  13. type tomlParser struct {
  14. flowIdx int
  15. flow []token
  16. tree *Tree
  17. currentTable []string
  18. seenTableKeys []string
  19. }
  20. type tomlParserStateFn func() tomlParserStateFn
  21. // Formats and panics an error message based on a token
  22. func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
  23. panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
  24. }
  25. func (p *tomlParser) run() {
  26. for state := p.parseStart; state != nil; {
  27. state = state()
  28. }
  29. }
  30. func (p *tomlParser) peek() *token {
  31. if p.flowIdx >= len(p.flow) {
  32. return nil
  33. }
  34. return &p.flow[p.flowIdx]
  35. }
  36. func (p *tomlParser) assume(typ tokenType) {
  37. tok := p.getToken()
  38. if tok == nil {
  39. p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
  40. }
  41. if tok.typ != typ {
  42. p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
  43. }
  44. }
  45. func (p *tomlParser) getToken() *token {
  46. tok := p.peek()
  47. if tok == nil {
  48. return nil
  49. }
  50. p.flowIdx++
  51. return tok
  52. }
  53. func (p *tomlParser) parseStart() tomlParserStateFn {
  54. tok := p.peek()
  55. // end of stream, parsing is finished
  56. if tok == nil {
  57. return nil
  58. }
  59. switch tok.typ {
  60. case tokenDoubleLeftBracket:
  61. return p.parseGroupArray
  62. case tokenLeftBracket:
  63. return p.parseGroup
  64. case tokenKey:
  65. return p.parseAssign
  66. case tokenEOF:
  67. return nil
  68. case tokenError:
  69. p.raiseError(tok, "parsing error: %s", tok.String())
  70. default:
  71. p.raiseError(tok, "unexpected token %s", tok.typ)
  72. }
  73. return nil
  74. }
  75. func (p *tomlParser) parseGroupArray() tomlParserStateFn {
  76. startToken := p.getToken() // discard the [[
  77. key := p.getToken()
  78. if key.typ != tokenKeyGroupArray {
  79. p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
  80. }
  81. // get or create table array element at the indicated part in the path
  82. keys, err := parseKey(key.val)
  83. if err != nil {
  84. p.raiseError(key, "invalid table array key: %s", err)
  85. }
  86. p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
  87. destTree := p.tree.GetPath(keys)
  88. var array []*Tree
  89. if destTree == nil {
  90. array = make([]*Tree, 0)
  91. } else if target, ok := destTree.([]*Tree); ok && target != nil {
  92. array = destTree.([]*Tree)
  93. } else {
  94. p.raiseError(key, "key %s is already assigned and not of type table array", key)
  95. }
  96. p.currentTable = keys
  97. // add a new tree to the end of the table array
  98. newTree := newTree()
  99. newTree.position = startToken.Position
  100. array = append(array, newTree)
  101. p.tree.SetPath(p.currentTable, array)
  102. // remove all keys that were children of this table array
  103. prefix := key.val + "."
  104. found := false
  105. for ii := 0; ii < len(p.seenTableKeys); {
  106. tableKey := p.seenTableKeys[ii]
  107. if strings.HasPrefix(tableKey, prefix) {
  108. p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
  109. } else {
  110. found = (tableKey == key.val)
  111. ii++
  112. }
  113. }
  114. // keep this key name from use by other kinds of assignments
  115. if !found {
  116. p.seenTableKeys = append(p.seenTableKeys, key.val)
  117. }
  118. // move to next parser state
  119. p.assume(tokenDoubleRightBracket)
  120. return p.parseStart
  121. }
  122. func (p *tomlParser) parseGroup() tomlParserStateFn {
  123. startToken := p.getToken() // discard the [
  124. key := p.getToken()
  125. if key.typ != tokenKeyGroup {
  126. p.raiseError(key, "unexpected token %s, was expecting a table key", key)
  127. }
  128. for _, item := range p.seenTableKeys {
  129. if item == key.val {
  130. p.raiseError(key, "duplicated tables")
  131. }
  132. }
  133. p.seenTableKeys = append(p.seenTableKeys, key.val)
  134. keys, err := parseKey(key.val)
  135. if err != nil {
  136. p.raiseError(key, "invalid table array key: %s", err)
  137. }
  138. if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
  139. p.raiseError(key, "%s", err)
  140. }
  141. destTree := p.tree.GetPath(keys)
  142. if target, ok := destTree.(*Tree); ok && target != nil && target.inline {
  143. p.raiseError(key, "could not re-define exist inline table or its sub-table : %s",
  144. strings.Join(keys, "."))
  145. }
  146. p.assume(tokenRightBracket)
  147. p.currentTable = keys
  148. return p.parseStart
  149. }
  150. func (p *tomlParser) parseAssign() tomlParserStateFn {
  151. key := p.getToken()
  152. p.assume(tokenEqual)
  153. parsedKey, err := parseKey(key.val)
  154. if err != nil {
  155. p.raiseError(key, "invalid key: %s", err.Error())
  156. }
  157. value := p.parseRvalue()
  158. var tableKey []string
  159. if len(p.currentTable) > 0 {
  160. tableKey = p.currentTable
  161. } else {
  162. tableKey = []string{}
  163. }
  164. prefixKey := parsedKey[0 : len(parsedKey)-1]
  165. tableKey = append(tableKey, prefixKey...)
  166. // find the table to assign, looking out for arrays of tables
  167. var targetNode *Tree
  168. switch node := p.tree.GetPath(tableKey).(type) {
  169. case []*Tree:
  170. targetNode = node[len(node)-1]
  171. case *Tree:
  172. targetNode = node
  173. case nil:
  174. // create intermediate
  175. if err := p.tree.createSubTree(tableKey, key.Position); err != nil {
  176. p.raiseError(key, "could not create intermediate group: %s", err)
  177. }
  178. targetNode = p.tree.GetPath(tableKey).(*Tree)
  179. default:
  180. p.raiseError(key, "Unknown table type for path: %s",
  181. strings.Join(tableKey, "."))
  182. }
  183. if targetNode.inline {
  184. p.raiseError(key, "could not add key or sub-table to exist inline table or its sub-table : %s",
  185. strings.Join(tableKey, "."))
  186. }
  187. // assign value to the found table
  188. keyVal := parsedKey[len(parsedKey)-1]
  189. localKey := []string{keyVal}
  190. finalKey := append(tableKey, keyVal)
  191. if targetNode.GetPath(localKey) != nil {
  192. p.raiseError(key, "The following key was defined twice: %s",
  193. strings.Join(finalKey, "."))
  194. }
  195. var toInsert interface{}
  196. switch value.(type) {
  197. case *Tree, []*Tree:
  198. toInsert = value
  199. default:
  200. toInsert = &tomlValue{value: value, position: key.Position}
  201. }
  202. targetNode.values[keyVal] = toInsert
  203. return p.parseStart
  204. }
  205. var numberUnderscoreInvalidRegexp *regexp.Regexp
  206. var hexNumberUnderscoreInvalidRegexp *regexp.Regexp
  207. func numberContainsInvalidUnderscore(value string) error {
  208. if numberUnderscoreInvalidRegexp.MatchString(value) {
  209. return errors.New("invalid use of _ in number")
  210. }
  211. return nil
  212. }
  213. func hexNumberContainsInvalidUnderscore(value string) error {
  214. if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
  215. return errors.New("invalid use of _ in hex number")
  216. }
  217. return nil
  218. }
  219. func cleanupNumberToken(value string) string {
  220. cleanedVal := strings.Replace(value, "_", "", -1)
  221. return cleanedVal
  222. }
  223. func (p *tomlParser) parseRvalue() interface{} {
  224. tok := p.getToken()
  225. if tok == nil || tok.typ == tokenEOF {
  226. p.raiseError(tok, "expecting a value")
  227. }
  228. switch tok.typ {
  229. case tokenString:
  230. return tok.val
  231. case tokenTrue:
  232. return true
  233. case tokenFalse:
  234. return false
  235. case tokenInf:
  236. if tok.val[0] == '-' {
  237. return math.Inf(-1)
  238. }
  239. return math.Inf(1)
  240. case tokenNan:
  241. return math.NaN()
  242. case tokenInteger:
  243. cleanedVal := cleanupNumberToken(tok.val)
  244. var err error
  245. var val int64
  246. if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
  247. switch cleanedVal[1] {
  248. case 'x':
  249. err = hexNumberContainsInvalidUnderscore(tok.val)
  250. if err != nil {
  251. p.raiseError(tok, "%s", err)
  252. }
  253. val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
  254. case 'o':
  255. err = numberContainsInvalidUnderscore(tok.val)
  256. if err != nil {
  257. p.raiseError(tok, "%s", err)
  258. }
  259. val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
  260. case 'b':
  261. err = numberContainsInvalidUnderscore(tok.val)
  262. if err != nil {
  263. p.raiseError(tok, "%s", err)
  264. }
  265. val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
  266. default:
  267. panic("invalid base") // the lexer should catch this first
  268. }
  269. } else {
  270. err = numberContainsInvalidUnderscore(tok.val)
  271. if err != nil {
  272. p.raiseError(tok, "%s", err)
  273. }
  274. val, err = strconv.ParseInt(cleanedVal, 10, 64)
  275. }
  276. if err != nil {
  277. p.raiseError(tok, "%s", err)
  278. }
  279. return val
  280. case tokenFloat:
  281. err := numberContainsInvalidUnderscore(tok.val)
  282. if err != nil {
  283. p.raiseError(tok, "%s", err)
  284. }
  285. cleanedVal := cleanupNumberToken(tok.val)
  286. val, err := strconv.ParseFloat(cleanedVal, 64)
  287. if err != nil {
  288. p.raiseError(tok, "%s", err)
  289. }
  290. return val
  291. case tokenDate:
  292. layout := time.RFC3339Nano
  293. if !strings.Contains(tok.val, "T") {
  294. layout = strings.Replace(layout, "T", " ", 1)
  295. }
  296. val, err := time.ParseInLocation(layout, tok.val, time.UTC)
  297. if err != nil {
  298. p.raiseError(tok, "%s", err)
  299. }
  300. return val
  301. case tokenLocalDate:
  302. v := strings.Replace(tok.val, " ", "T", -1)
  303. isDateTime := false
  304. isTime := false
  305. for _, c := range v {
  306. if c == 'T' || c == 't' {
  307. isDateTime = true
  308. break
  309. }
  310. if c == ':' {
  311. isTime = true
  312. break
  313. }
  314. }
  315. var val interface{}
  316. var err error
  317. if isDateTime {
  318. val, err = ParseLocalDateTime(v)
  319. } else if isTime {
  320. val, err = ParseLocalTime(v)
  321. } else {
  322. val, err = ParseLocalDate(v)
  323. }
  324. if err != nil {
  325. p.raiseError(tok, "%s", err)
  326. }
  327. return val
  328. case tokenLeftBracket:
  329. return p.parseArray()
  330. case tokenLeftCurlyBrace:
  331. return p.parseInlineTable()
  332. case tokenEqual:
  333. p.raiseError(tok, "cannot have multiple equals for the same key")
  334. case tokenError:
  335. p.raiseError(tok, "%s", tok)
  336. }
  337. p.raiseError(tok, "never reached")
  338. return nil
  339. }
  340. func tokenIsComma(t *token) bool {
  341. return t != nil && t.typ == tokenComma
  342. }
  343. func (p *tomlParser) parseInlineTable() *Tree {
  344. tree := newTree()
  345. var previous *token
  346. Loop:
  347. for {
  348. follow := p.peek()
  349. if follow == nil || follow.typ == tokenEOF {
  350. p.raiseError(follow, "unterminated inline table")
  351. }
  352. switch follow.typ {
  353. case tokenRightCurlyBrace:
  354. p.getToken()
  355. break Loop
  356. case tokenKey, tokenInteger, tokenString:
  357. if !tokenIsComma(previous) && previous != nil {
  358. p.raiseError(follow, "comma expected between fields in inline table")
  359. }
  360. key := p.getToken()
  361. p.assume(tokenEqual)
  362. parsedKey, err := parseKey(key.val)
  363. if err != nil {
  364. p.raiseError(key, "invalid key: %s", err)
  365. }
  366. value := p.parseRvalue()
  367. tree.SetPath(parsedKey, value)
  368. case tokenComma:
  369. if tokenIsComma(previous) {
  370. p.raiseError(follow, "need field between two commas in inline table")
  371. }
  372. p.getToken()
  373. default:
  374. p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
  375. }
  376. previous = follow
  377. }
  378. if tokenIsComma(previous) {
  379. p.raiseError(previous, "trailing comma at the end of inline table")
  380. }
  381. tree.inline = true
  382. return tree
  383. }
  384. func (p *tomlParser) parseArray() interface{} {
  385. var array []interface{}
  386. arrayType := reflect.TypeOf(newTree())
  387. for {
  388. follow := p.peek()
  389. if follow == nil || follow.typ == tokenEOF {
  390. p.raiseError(follow, "unterminated array")
  391. }
  392. if follow.typ == tokenRightBracket {
  393. p.getToken()
  394. break
  395. }
  396. val := p.parseRvalue()
  397. if reflect.TypeOf(val) != arrayType {
  398. arrayType = nil
  399. }
  400. array = append(array, val)
  401. follow = p.peek()
  402. if follow == nil || follow.typ == tokenEOF {
  403. p.raiseError(follow, "unterminated array")
  404. }
  405. if follow.typ != tokenRightBracket && follow.typ != tokenComma {
  406. p.raiseError(follow, "missing comma")
  407. }
  408. if follow.typ == tokenComma {
  409. p.getToken()
  410. }
  411. }
  412. // if the array is a mixed-type array or its length is 0,
  413. // don't convert it to a table array
  414. if len(array) <= 0 {
  415. arrayType = nil
  416. }
  417. // An array of Trees is actually an array of inline
  418. // tables, which is a shorthand for a table array. If the
  419. // array was not converted from []interface{} to []*Tree,
  420. // the two notations would not be equivalent.
  421. if arrayType == reflect.TypeOf(newTree()) {
  422. tomlArray := make([]*Tree, len(array))
  423. for i, v := range array {
  424. tomlArray[i] = v.(*Tree)
  425. }
  426. return tomlArray
  427. }
  428. return array
  429. }
  430. func parseToml(flow []token) *Tree {
  431. result := newTree()
  432. result.position = Position{1, 1}
  433. parser := &tomlParser{
  434. flowIdx: 0,
  435. flow: flow,
  436. tree: result,
  437. currentTable: make([]string, 0),
  438. seenTableKeys: make([]string, 0),
  439. }
  440. parser.run()
  441. return result
  442. }
  443. func init() {
  444. numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
  445. hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
  446. }