ini_lexer.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. package ini
  2. import (
  3. "bytes"
  4. "io"
  5. "io/ioutil"
  6. )
  7. // TokenType represents the various different tokens types
  8. type TokenType int
  9. func (t TokenType) String() string {
  10. switch t {
  11. case TokenNone:
  12. return "none"
  13. case TokenLit:
  14. return "literal"
  15. case TokenSep:
  16. return "sep"
  17. case TokenOp:
  18. return "op"
  19. case TokenWS:
  20. return "ws"
  21. case TokenNL:
  22. return "newline"
  23. case TokenComment:
  24. return "comment"
  25. case TokenComma:
  26. return "comma"
  27. default:
  28. return ""
  29. }
  30. }
  31. // TokenType enums
  32. const (
  33. TokenNone = TokenType(iota)
  34. TokenLit
  35. TokenSep
  36. TokenComma
  37. TokenOp
  38. TokenWS
  39. TokenNL
  40. TokenComment
  41. )
  42. type iniLexer struct{}
  43. // Tokenize will return a list of tokens during lexical analysis of the
  44. // io.Reader.
  45. func (l *iniLexer) Tokenize(r io.Reader) ([]Token, error) {
  46. b, err := ioutil.ReadAll(r)
  47. if err != nil {
  48. return nil, &UnableToReadFile{Err: err}
  49. }
  50. return l.tokenize(b)
  51. }
  52. func (l *iniLexer) tokenize(b []byte) ([]Token, error) {
  53. runes := bytes.Runes(b)
  54. var err error
  55. n := 0
  56. tokenAmount := countTokens(runes)
  57. tokens := make([]Token, tokenAmount)
  58. count := 0
  59. for len(runes) > 0 && count < tokenAmount {
  60. switch {
  61. case isWhitespace(runes[0]):
  62. tokens[count], n, err = newWSToken(runes)
  63. case isComma(runes[0]):
  64. tokens[count], n = newCommaToken(), 1
  65. case isComment(runes):
  66. tokens[count], n, err = newCommentToken(runes)
  67. case isNewline(runes):
  68. tokens[count], n, err = newNewlineToken(runes)
  69. case isSep(runes):
  70. tokens[count], n, err = newSepToken(runes)
  71. case isOp(runes):
  72. tokens[count], n, err = newOpToken(runes)
  73. default:
  74. tokens[count], n, err = newLitToken(runes)
  75. }
  76. if err != nil {
  77. return nil, err
  78. }
  79. count++
  80. runes = runes[n:]
  81. }
  82. return tokens[:count], nil
  83. }
  84. func countTokens(runes []rune) int {
  85. count, n := 0, 0
  86. var err error
  87. for len(runes) > 0 {
  88. switch {
  89. case isWhitespace(runes[0]):
  90. _, n, err = newWSToken(runes)
  91. case isComma(runes[0]):
  92. _, n = newCommaToken(), 1
  93. case isComment(runes):
  94. _, n, err = newCommentToken(runes)
  95. case isNewline(runes):
  96. _, n, err = newNewlineToken(runes)
  97. case isSep(runes):
  98. _, n, err = newSepToken(runes)
  99. case isOp(runes):
  100. _, n, err = newOpToken(runes)
  101. default:
  102. _, n, err = newLitToken(runes)
  103. }
  104. if err != nil {
  105. return 0
  106. }
  107. count++
  108. runes = runes[n:]
  109. }
  110. return count + 1
  111. }
  112. // Token indicates a metadata about a given value.
  113. type Token struct {
  114. t TokenType
  115. ValueType ValueType
  116. base int
  117. raw []rune
  118. }
  119. var emptyValue = Value{}
  120. func newToken(t TokenType, raw []rune, v ValueType) Token {
  121. return Token{
  122. t: t,
  123. raw: raw,
  124. ValueType: v,
  125. }
  126. }
  127. // Raw return the raw runes that were consumed
  128. func (tok Token) Raw() []rune {
  129. return tok.raw
  130. }
  131. // Type returns the token type
  132. func (tok Token) Type() TokenType {
  133. return tok.t
  134. }