Token.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. /*
  2. * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@gmx.de>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #pragma once
  27. #include <AK/String.h>
  28. #include <AK/StringView.h>
  29. namespace JS {
  30. // U+2028 LINE SEPARATOR
  31. constexpr const char line_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa8, 0 };
  32. constexpr const StringView LINE_SEPARATOR { line_separator_chars };
  33. // U+2029 PARAGRAPH SEPARATOR
  34. constexpr const char paragraph_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa9, 0 };
  35. constexpr const StringView PARAGRAPH_SEPARATOR { paragraph_separator_chars };
  36. #define ENUMERATE_JS_TOKENS \
  37. __ENUMERATE_JS_TOKEN(Ampersand, Operator) \
  38. __ENUMERATE_JS_TOKEN(AmpersandEquals, Operator) \
  39. __ENUMERATE_JS_TOKEN(Arrow, Operator) \
  40. __ENUMERATE_JS_TOKEN(Asterisk, Operator) \
  41. __ENUMERATE_JS_TOKEN(AsteriskEquals, Operator) \
  42. __ENUMERATE_JS_TOKEN(Async, Keyword) \
  43. __ENUMERATE_JS_TOKEN(Await, Keyword) \
  44. __ENUMERATE_JS_TOKEN(BigIntLiteral, Number) \
  45. __ENUMERATE_JS_TOKEN(BoolLiteral, Keyword) \
  46. __ENUMERATE_JS_TOKEN(BracketClose, Punctuation) \
  47. __ENUMERATE_JS_TOKEN(BracketOpen, Punctuation) \
  48. __ENUMERATE_JS_TOKEN(Break, Keyword) \
  49. __ENUMERATE_JS_TOKEN(Caret, Operator) \
  50. __ENUMERATE_JS_TOKEN(CaretEquals, Operator) \
  51. __ENUMERATE_JS_TOKEN(Case, ControlKeyword) \
  52. __ENUMERATE_JS_TOKEN(Catch, ControlKeyword) \
  53. __ENUMERATE_JS_TOKEN(Class, Keyword) \
  54. __ENUMERATE_JS_TOKEN(Colon, Punctuation) \
  55. __ENUMERATE_JS_TOKEN(Comma, Punctuation) \
  56. __ENUMERATE_JS_TOKEN(Const, Keyword) \
  57. __ENUMERATE_JS_TOKEN(Continue, ControlKeyword) \
  58. __ENUMERATE_JS_TOKEN(CurlyClose, Punctuation) \
  59. __ENUMERATE_JS_TOKEN(CurlyOpen, Punctuation) \
  60. __ENUMERATE_JS_TOKEN(Debugger, Keyword) \
  61. __ENUMERATE_JS_TOKEN(Default, ControlKeyword) \
  62. __ENUMERATE_JS_TOKEN(Delete, Keyword) \
  63. __ENUMERATE_JS_TOKEN(Do, ControlKeyword) \
  64. __ENUMERATE_JS_TOKEN(DoubleAmpersand, Operator) \
  65. __ENUMERATE_JS_TOKEN(DoubleAmpersandEquals, Operator) \
  66. __ENUMERATE_JS_TOKEN(DoubleAsterisk, Operator) \
  67. __ENUMERATE_JS_TOKEN(DoubleAsteriskEquals, Operator) \
  68. __ENUMERATE_JS_TOKEN(DoublePipe, Operator) \
  69. __ENUMERATE_JS_TOKEN(DoublePipeEquals, Operator) \
  70. __ENUMERATE_JS_TOKEN(DoubleQuestionMark, Operator) \
  71. __ENUMERATE_JS_TOKEN(DoubleQuestionMarkEquals, Operator) \
  72. __ENUMERATE_JS_TOKEN(Else, ControlKeyword) \
  73. __ENUMERATE_JS_TOKEN(Enum, Keyword) \
  74. __ENUMERATE_JS_TOKEN(Eof, Invalid) \
  75. __ENUMERATE_JS_TOKEN(Equals, Operator) \
  76. __ENUMERATE_JS_TOKEN(EqualsEquals, Operator) \
  77. __ENUMERATE_JS_TOKEN(EqualsEqualsEquals, Operator) \
  78. __ENUMERATE_JS_TOKEN(ExclamationMark, Operator) \
  79. __ENUMERATE_JS_TOKEN(ExclamationMarkEquals, Operator) \
  80. __ENUMERATE_JS_TOKEN(ExclamationMarkEqualsEquals, Operator) \
  81. __ENUMERATE_JS_TOKEN(Export, Keyword) \
  82. __ENUMERATE_JS_TOKEN(Extends, Keyword) \
  83. __ENUMERATE_JS_TOKEN(Finally, ControlKeyword) \
  84. __ENUMERATE_JS_TOKEN(For, ControlKeyword) \
  85. __ENUMERATE_JS_TOKEN(Function, Keyword) \
  86. __ENUMERATE_JS_TOKEN(GreaterThan, Operator) \
  87. __ENUMERATE_JS_TOKEN(GreaterThanEquals, Operator) \
  88. __ENUMERATE_JS_TOKEN(Identifier, Identifier) \
  89. __ENUMERATE_JS_TOKEN(If, ControlKeyword) \
  90. __ENUMERATE_JS_TOKEN(Implements, Keyword) \
  91. __ENUMERATE_JS_TOKEN(Import, Keyword) \
  92. __ENUMERATE_JS_TOKEN(In, Keyword) \
  93. __ENUMERATE_JS_TOKEN(Instanceof, Keyword) \
  94. __ENUMERATE_JS_TOKEN(Interface, Keyword) \
  95. __ENUMERATE_JS_TOKEN(Invalid, Invalid) \
  96. __ENUMERATE_JS_TOKEN(LessThan, Operator) \
  97. __ENUMERATE_JS_TOKEN(LessThanEquals, Operator) \
  98. __ENUMERATE_JS_TOKEN(Let, Keyword) \
  99. __ENUMERATE_JS_TOKEN(Minus, Operator) \
  100. __ENUMERATE_JS_TOKEN(MinusEquals, Operator) \
  101. __ENUMERATE_JS_TOKEN(MinusMinus, Operator) \
  102. __ENUMERATE_JS_TOKEN(New, Keyword) \
  103. __ENUMERATE_JS_TOKEN(NullLiteral, Keyword) \
  104. __ENUMERATE_JS_TOKEN(NumericLiteral, Number) \
  105. __ENUMERATE_JS_TOKEN(Package, Keyword) \
  106. __ENUMERATE_JS_TOKEN(ParenClose, Punctuation) \
  107. __ENUMERATE_JS_TOKEN(ParenOpen, Punctuation) \
  108. __ENUMERATE_JS_TOKEN(Percent, Operator) \
  109. __ENUMERATE_JS_TOKEN(PercentEquals, Operator) \
  110. __ENUMERATE_JS_TOKEN(Period, Operator) \
  111. __ENUMERATE_JS_TOKEN(Pipe, Operator) \
  112. __ENUMERATE_JS_TOKEN(PipeEquals, Operator) \
  113. __ENUMERATE_JS_TOKEN(Plus, Operator) \
  114. __ENUMERATE_JS_TOKEN(PlusEquals, Operator) \
  115. __ENUMERATE_JS_TOKEN(PlusPlus, Operator) \
  116. __ENUMERATE_JS_TOKEN(Private, Keyword) \
  117. __ENUMERATE_JS_TOKEN(Protected, Keyword) \
  118. __ENUMERATE_JS_TOKEN(Public, Keyword) \
  119. __ENUMERATE_JS_TOKEN(QuestionMark, Operator) \
  120. __ENUMERATE_JS_TOKEN(QuestionMarkPeriod, Operator) \
  121. __ENUMERATE_JS_TOKEN(RegexFlags, String) \
  122. __ENUMERATE_JS_TOKEN(RegexLiteral, String) \
  123. __ENUMERATE_JS_TOKEN(Return, ControlKeyword) \
  124. __ENUMERATE_JS_TOKEN(Semicolon, Punctuation) \
  125. __ENUMERATE_JS_TOKEN(ShiftLeft, Operator) \
  126. __ENUMERATE_JS_TOKEN(ShiftLeftEquals, Operator) \
  127. __ENUMERATE_JS_TOKEN(ShiftRight, Operator) \
  128. __ENUMERATE_JS_TOKEN(ShiftRightEquals, Operator) \
  129. __ENUMERATE_JS_TOKEN(Slash, Operator) \
  130. __ENUMERATE_JS_TOKEN(SlashEquals, Operator) \
  131. __ENUMERATE_JS_TOKEN(Static, Keyword) \
  132. __ENUMERATE_JS_TOKEN(StringLiteral, String) \
  133. __ENUMERATE_JS_TOKEN(Super, Keyword) \
  134. __ENUMERATE_JS_TOKEN(Switch, ControlKeyword) \
  135. __ENUMERATE_JS_TOKEN(TemplateLiteralEnd, String) \
  136. __ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd, Punctuation) \
  137. __ENUMERATE_JS_TOKEN(TemplateLiteralExprStart, Punctuation) \
  138. __ENUMERATE_JS_TOKEN(TemplateLiteralStart, String) \
  139. __ENUMERATE_JS_TOKEN(TemplateLiteralString, String) \
  140. __ENUMERATE_JS_TOKEN(This, Keyword) \
  141. __ENUMERATE_JS_TOKEN(Throw, ControlKeyword) \
  142. __ENUMERATE_JS_TOKEN(Tilde, Operator) \
  143. __ENUMERATE_JS_TOKEN(TripleDot, Operator) \
  144. __ENUMERATE_JS_TOKEN(Try, ControlKeyword) \
  145. __ENUMERATE_JS_TOKEN(Typeof, Keyword) \
  146. __ENUMERATE_JS_TOKEN(UnsignedShiftRight, Operator) \
  147. __ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals, Operator) \
  148. __ENUMERATE_JS_TOKEN(UnterminatedRegexLiteral, String) \
  149. __ENUMERATE_JS_TOKEN(UnterminatedStringLiteral, String) \
  150. __ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral, String) \
  151. __ENUMERATE_JS_TOKEN(Var, Keyword) \
  152. __ENUMERATE_JS_TOKEN(Void, Keyword) \
  153. __ENUMERATE_JS_TOKEN(While, ControlKeyword) \
  154. __ENUMERATE_JS_TOKEN(With, ControlKeyword) \
  155. __ENUMERATE_JS_TOKEN(Yield, ControlKeyword)
  156. enum class TokenType {
  157. #define __ENUMERATE_JS_TOKEN(type, category) type,
  158. ENUMERATE_JS_TOKENS
  159. #undef __ENUMERATE_JS_TOKEN
  160. _COUNT_OF_TOKENS
  161. };
  162. constexpr size_t cs_num_of_js_tokens = static_cast<size_t>(TokenType::_COUNT_OF_TOKENS);
  163. enum class TokenCategory {
  164. Invalid,
  165. Number,
  166. String,
  167. Punctuation,
  168. Operator,
  169. Keyword,
  170. ControlKeyword,
  171. Identifier
  172. };
  173. class Token {
  174. public:
  175. Token(TokenType type, StringView trivia, StringView value, size_t line_number, size_t line_column)
  176. : m_type(type)
  177. , m_trivia(trivia)
  178. , m_value(value)
  179. , m_line_number(line_number)
  180. , m_line_column(line_column)
  181. {
  182. }
  183. TokenType type() const { return m_type; }
  184. TokenCategory category() const;
  185. static TokenCategory category(TokenType);
  186. const char* name() const;
  187. static const char* name(TokenType);
  188. const StringView& trivia() const { return m_trivia; }
  189. const StringView& value() const { return m_value; }
  190. size_t line_number() const { return m_line_number; }
  191. size_t line_column() const { return m_line_column; }
  192. double double_value() const;
  193. bool bool_value() const;
  194. enum class StringValueStatus {
  195. Ok,
  196. MalformedHexEscape,
  197. MalformedUnicodeEscape,
  198. UnicodeEscapeOverflow,
  199. };
  200. String string_value(StringValueStatus& status) const;
  201. bool is_identifier_name() const;
  202. bool trivia_contains_line_terminator() const;
  203. private:
  204. TokenType m_type;
  205. StringView m_trivia;
  206. StringView m_value;
  207. size_t m_line_number;
  208. size_t m_line_column;
  209. };
  210. }