Token.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /*
  2. * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/String.h>
  8. #include <AK/StringView.h>
  9. namespace JS {
  10. // U+2028 LINE SEPARATOR
  11. constexpr const char line_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa8, 0 };
  12. constexpr const StringView LINE_SEPARATOR { line_separator_chars };
  13. // U+2029 PARAGRAPH SEPARATOR
  14. constexpr const char paragraph_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa9, 0 };
  15. constexpr const StringView PARAGRAPH_SEPARATOR { paragraph_separator_chars };
  16. #define ENUMERATE_JS_TOKENS \
  17. __ENUMERATE_JS_TOKEN(Ampersand, Operator) \
  18. __ENUMERATE_JS_TOKEN(AmpersandEquals, Operator) \
  19. __ENUMERATE_JS_TOKEN(Arrow, Operator) \
  20. __ENUMERATE_JS_TOKEN(Asterisk, Operator) \
  21. __ENUMERATE_JS_TOKEN(AsteriskEquals, Operator) \
  22. __ENUMERATE_JS_TOKEN(Async, Keyword) \
  23. __ENUMERATE_JS_TOKEN(Await, Keyword) \
  24. __ENUMERATE_JS_TOKEN(BigIntLiteral, Number) \
  25. __ENUMERATE_JS_TOKEN(BoolLiteral, Keyword) \
  26. __ENUMERATE_JS_TOKEN(BracketClose, Punctuation) \
  27. __ENUMERATE_JS_TOKEN(BracketOpen, Punctuation) \
  28. __ENUMERATE_JS_TOKEN(Break, Keyword) \
  29. __ENUMERATE_JS_TOKEN(Caret, Operator) \
  30. __ENUMERATE_JS_TOKEN(CaretEquals, Operator) \
  31. __ENUMERATE_JS_TOKEN(Case, ControlKeyword) \
  32. __ENUMERATE_JS_TOKEN(Catch, ControlKeyword) \
  33. __ENUMERATE_JS_TOKEN(Class, Keyword) \
  34. __ENUMERATE_JS_TOKEN(Colon, Punctuation) \
  35. __ENUMERATE_JS_TOKEN(Comma, Punctuation) \
  36. __ENUMERATE_JS_TOKEN(Const, Keyword) \
  37. __ENUMERATE_JS_TOKEN(Continue, ControlKeyword) \
  38. __ENUMERATE_JS_TOKEN(CurlyClose, Punctuation) \
  39. __ENUMERATE_JS_TOKEN(CurlyOpen, Punctuation) \
  40. __ENUMERATE_JS_TOKEN(Debugger, Keyword) \
  41. __ENUMERATE_JS_TOKEN(Default, ControlKeyword) \
  42. __ENUMERATE_JS_TOKEN(Delete, Keyword) \
  43. __ENUMERATE_JS_TOKEN(Do, ControlKeyword) \
  44. __ENUMERATE_JS_TOKEN(DoubleAmpersand, Operator) \
  45. __ENUMERATE_JS_TOKEN(DoubleAmpersandEquals, Operator) \
  46. __ENUMERATE_JS_TOKEN(DoubleAsterisk, Operator) \
  47. __ENUMERATE_JS_TOKEN(DoubleAsteriskEquals, Operator) \
  48. __ENUMERATE_JS_TOKEN(DoublePipe, Operator) \
  49. __ENUMERATE_JS_TOKEN(DoublePipeEquals, Operator) \
  50. __ENUMERATE_JS_TOKEN(DoubleQuestionMark, Operator) \
  51. __ENUMERATE_JS_TOKEN(DoubleQuestionMarkEquals, Operator) \
  52. __ENUMERATE_JS_TOKEN(Else, ControlKeyword) \
  53. __ENUMERATE_JS_TOKEN(Enum, Keyword) \
  54. __ENUMERATE_JS_TOKEN(Eof, Invalid) \
  55. __ENUMERATE_JS_TOKEN(Equals, Operator) \
  56. __ENUMERATE_JS_TOKEN(EqualsEquals, Operator) \
  57. __ENUMERATE_JS_TOKEN(EqualsEqualsEquals, Operator) \
  58. __ENUMERATE_JS_TOKEN(ExclamationMark, Operator) \
  59. __ENUMERATE_JS_TOKEN(ExclamationMarkEquals, Operator) \
  60. __ENUMERATE_JS_TOKEN(ExclamationMarkEqualsEquals, Operator) \
  61. __ENUMERATE_JS_TOKEN(Export, Keyword) \
  62. __ENUMERATE_JS_TOKEN(Extends, Keyword) \
  63. __ENUMERATE_JS_TOKEN(Finally, ControlKeyword) \
  64. __ENUMERATE_JS_TOKEN(For, ControlKeyword) \
  65. __ENUMERATE_JS_TOKEN(Function, Keyword) \
  66. __ENUMERATE_JS_TOKEN(GreaterThan, Operator) \
  67. __ENUMERATE_JS_TOKEN(GreaterThanEquals, Operator) \
  68. __ENUMERATE_JS_TOKEN(Identifier, Identifier) \
  69. __ENUMERATE_JS_TOKEN(If, ControlKeyword) \
  70. __ENUMERATE_JS_TOKEN(Implements, Keyword) \
  71. __ENUMERATE_JS_TOKEN(Import, Keyword) \
  72. __ENUMERATE_JS_TOKEN(In, Keyword) \
  73. __ENUMERATE_JS_TOKEN(Instanceof, Keyword) \
  74. __ENUMERATE_JS_TOKEN(Interface, Keyword) \
  75. __ENUMERATE_JS_TOKEN(Invalid, Invalid) \
  76. __ENUMERATE_JS_TOKEN(LessThan, Operator) \
  77. __ENUMERATE_JS_TOKEN(LessThanEquals, Operator) \
  78. __ENUMERATE_JS_TOKEN(Let, Keyword) \
  79. __ENUMERATE_JS_TOKEN(Minus, Operator) \
  80. __ENUMERATE_JS_TOKEN(MinusEquals, Operator) \
  81. __ENUMERATE_JS_TOKEN(MinusMinus, Operator) \
  82. __ENUMERATE_JS_TOKEN(New, Keyword) \
  83. __ENUMERATE_JS_TOKEN(NullLiteral, Keyword) \
  84. __ENUMERATE_JS_TOKEN(NumericLiteral, Number) \
  85. __ENUMERATE_JS_TOKEN(Package, Keyword) \
  86. __ENUMERATE_JS_TOKEN(ParenClose, Punctuation) \
  87. __ENUMERATE_JS_TOKEN(ParenOpen, Punctuation) \
  88. __ENUMERATE_JS_TOKEN(Percent, Operator) \
  89. __ENUMERATE_JS_TOKEN(PercentEquals, Operator) \
  90. __ENUMERATE_JS_TOKEN(Period, Operator) \
  91. __ENUMERATE_JS_TOKEN(Pipe, Operator) \
  92. __ENUMERATE_JS_TOKEN(PipeEquals, Operator) \
  93. __ENUMERATE_JS_TOKEN(Plus, Operator) \
  94. __ENUMERATE_JS_TOKEN(PlusEquals, Operator) \
  95. __ENUMERATE_JS_TOKEN(PlusPlus, Operator) \
  96. __ENUMERATE_JS_TOKEN(Private, Keyword) \
  97. __ENUMERATE_JS_TOKEN(Protected, Keyword) \
  98. __ENUMERATE_JS_TOKEN(Public, Keyword) \
  99. __ENUMERATE_JS_TOKEN(QuestionMark, Operator) \
  100. __ENUMERATE_JS_TOKEN(QuestionMarkPeriod, Operator) \
  101. __ENUMERATE_JS_TOKEN(RegexFlags, String) \
  102. __ENUMERATE_JS_TOKEN(RegexLiteral, String) \
  103. __ENUMERATE_JS_TOKEN(Return, ControlKeyword) \
  104. __ENUMERATE_JS_TOKEN(Semicolon, Punctuation) \
  105. __ENUMERATE_JS_TOKEN(ShiftLeft, Operator) \
  106. __ENUMERATE_JS_TOKEN(ShiftLeftEquals, Operator) \
  107. __ENUMERATE_JS_TOKEN(ShiftRight, Operator) \
  108. __ENUMERATE_JS_TOKEN(ShiftRightEquals, Operator) \
  109. __ENUMERATE_JS_TOKEN(Slash, Operator) \
  110. __ENUMERATE_JS_TOKEN(SlashEquals, Operator) \
  111. __ENUMERATE_JS_TOKEN(Static, Keyword) \
  112. __ENUMERATE_JS_TOKEN(StringLiteral, String) \
  113. __ENUMERATE_JS_TOKEN(Super, Keyword) \
  114. __ENUMERATE_JS_TOKEN(Switch, ControlKeyword) \
  115. __ENUMERATE_JS_TOKEN(TemplateLiteralEnd, String) \
  116. __ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd, Punctuation) \
  117. __ENUMERATE_JS_TOKEN(TemplateLiteralExprStart, Punctuation) \
  118. __ENUMERATE_JS_TOKEN(TemplateLiteralStart, String) \
  119. __ENUMERATE_JS_TOKEN(TemplateLiteralString, String) \
  120. __ENUMERATE_JS_TOKEN(This, Keyword) \
  121. __ENUMERATE_JS_TOKEN(Throw, ControlKeyword) \
  122. __ENUMERATE_JS_TOKEN(Tilde, Operator) \
  123. __ENUMERATE_JS_TOKEN(TripleDot, Operator) \
  124. __ENUMERATE_JS_TOKEN(Try, ControlKeyword) \
  125. __ENUMERATE_JS_TOKEN(Typeof, Keyword) \
  126. __ENUMERATE_JS_TOKEN(UnsignedShiftRight, Operator) \
  127. __ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals, Operator) \
  128. __ENUMERATE_JS_TOKEN(UnterminatedRegexLiteral, String) \
  129. __ENUMERATE_JS_TOKEN(UnterminatedStringLiteral, String) \
  130. __ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral, String) \
  131. __ENUMERATE_JS_TOKEN(Var, Keyword) \
  132. __ENUMERATE_JS_TOKEN(Void, Keyword) \
  133. __ENUMERATE_JS_TOKEN(While, ControlKeyword) \
  134. __ENUMERATE_JS_TOKEN(With, ControlKeyword) \
  135. __ENUMERATE_JS_TOKEN(Yield, ControlKeyword)
  136. enum class TokenType {
  137. #define __ENUMERATE_JS_TOKEN(type, category) type,
  138. ENUMERATE_JS_TOKENS
  139. #undef __ENUMERATE_JS_TOKEN
  140. _COUNT_OF_TOKENS
  141. };
  142. constexpr size_t cs_num_of_js_tokens = static_cast<size_t>(TokenType::_COUNT_OF_TOKENS);
  143. enum class TokenCategory {
  144. Invalid,
  145. Number,
  146. String,
  147. Punctuation,
  148. Operator,
  149. Keyword,
  150. ControlKeyword,
  151. Identifier
  152. };
  153. class Token {
  154. public:
  155. Token(TokenType type, String message, StringView trivia, StringView value, StringView filename, size_t line_number, size_t line_column)
  156. : m_type(type)
  157. , m_message(message)
  158. , m_trivia(trivia)
  159. , m_value(value)
  160. , m_filename(filename)
  161. , m_line_number(line_number)
  162. , m_line_column(line_column)
  163. {
  164. }
  165. TokenType type() const { return m_type; }
  166. TokenCategory category() const;
  167. static TokenCategory category(TokenType);
  168. const char* name() const;
  169. static const char* name(TokenType);
  170. const String& message() const { return m_message; }
  171. const StringView& trivia() const { return m_trivia; }
  172. const StringView& value() const { return m_value; }
  173. const StringView& filename() const { return m_filename; }
  174. size_t line_number() const { return m_line_number; }
  175. size_t line_column() const { return m_line_column; }
  176. double double_value() const;
  177. bool bool_value() const;
  178. enum class StringValueStatus {
  179. Ok,
  180. MalformedHexEscape,
  181. MalformedUnicodeEscape,
  182. UnicodeEscapeOverflow,
  183. LegacyOctalEscapeSequence,
  184. };
  185. String string_value(StringValueStatus& status) const;
  186. bool is_identifier_name() const;
  187. bool trivia_contains_line_terminator() const;
  188. private:
  189. TokenType m_type;
  190. String m_message;
  191. StringView m_trivia;
  192. StringView m_value;
  193. StringView m_filename;
  194. size_t m_line_number;
  195. size_t m_line_column;
  196. };
  197. }