Token.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /*
  2. * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include <AK/FlyString.h>
  8. #include <AK/String.h>
  9. #include <AK/StringView.h>
  10. #include <AK/Variant.h>
  11. namespace JS {
  12. // U+2028 LINE SEPARATOR
  13. constexpr const char line_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa8, 0 };
  14. constexpr const StringView LINE_SEPARATOR_STRING { line_separator_chars };
  15. constexpr const u32 LINE_SEPARATOR { 0x2028 };
  16. // U+2029 PARAGRAPH SEPARATOR
  17. constexpr const char paragraph_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa9, 0 };
  18. constexpr const StringView PARAGRAPH_SEPARATOR_STRING { paragraph_separator_chars };
  19. constexpr const u32 PARAGRAPH_SEPARATOR { 0x2029 };
  20. // U+00A0 NO BREAK SPACE
  21. constexpr const u32 NO_BREAK_SPACE { 0x00A0 };
  22. // U+200C ZERO WIDTH NON-JOINER
  23. constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
  24. // U+FEFF ZERO WIDTH NO-BREAK SPACE
  25. constexpr const u32 ZERO_WIDTH_NO_BREAK_SPACE { 0xFEFF };
  26. // U+200D ZERO WIDTH JOINER
  27. constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
  28. #define ENUMERATE_JS_TOKENS \
  29. __ENUMERATE_JS_TOKEN(Ampersand, Operator) \
  30. __ENUMERATE_JS_TOKEN(AmpersandEquals, Operator) \
  31. __ENUMERATE_JS_TOKEN(Arrow, Operator) \
  32. __ENUMERATE_JS_TOKEN(Asterisk, Operator) \
  33. __ENUMERATE_JS_TOKEN(AsteriskEquals, Operator) \
  34. __ENUMERATE_JS_TOKEN(Async, Keyword) \
  35. __ENUMERATE_JS_TOKEN(Await, Keyword) \
  36. __ENUMERATE_JS_TOKEN(BigIntLiteral, Number) \
  37. __ENUMERATE_JS_TOKEN(BoolLiteral, Keyword) \
  38. __ENUMERATE_JS_TOKEN(BracketClose, Punctuation) \
  39. __ENUMERATE_JS_TOKEN(BracketOpen, Punctuation) \
  40. __ENUMERATE_JS_TOKEN(Break, Keyword) \
  41. __ENUMERATE_JS_TOKEN(Caret, Operator) \
  42. __ENUMERATE_JS_TOKEN(CaretEquals, Operator) \
  43. __ENUMERATE_JS_TOKEN(Case, ControlKeyword) \
  44. __ENUMERATE_JS_TOKEN(Catch, ControlKeyword) \
  45. __ENUMERATE_JS_TOKEN(Class, Keyword) \
  46. __ENUMERATE_JS_TOKEN(Colon, Punctuation) \
  47. __ENUMERATE_JS_TOKEN(Comma, Punctuation) \
  48. __ENUMERATE_JS_TOKEN(Const, Keyword) \
  49. __ENUMERATE_JS_TOKEN(Continue, ControlKeyword) \
  50. __ENUMERATE_JS_TOKEN(CurlyClose, Punctuation) \
  51. __ENUMERATE_JS_TOKEN(CurlyOpen, Punctuation) \
  52. __ENUMERATE_JS_TOKEN(Debugger, Keyword) \
  53. __ENUMERATE_JS_TOKEN(Default, ControlKeyword) \
  54. __ENUMERATE_JS_TOKEN(Delete, Keyword) \
  55. __ENUMERATE_JS_TOKEN(Do, ControlKeyword) \
  56. __ENUMERATE_JS_TOKEN(DoubleAmpersand, Operator) \
  57. __ENUMERATE_JS_TOKEN(DoubleAmpersandEquals, Operator) \
  58. __ENUMERATE_JS_TOKEN(DoubleAsterisk, Operator) \
  59. __ENUMERATE_JS_TOKEN(DoubleAsteriskEquals, Operator) \
  60. __ENUMERATE_JS_TOKEN(DoublePipe, Operator) \
  61. __ENUMERATE_JS_TOKEN(DoublePipeEquals, Operator) \
  62. __ENUMERATE_JS_TOKEN(DoubleQuestionMark, Operator) \
  63. __ENUMERATE_JS_TOKEN(DoubleQuestionMarkEquals, Operator) \
  64. __ENUMERATE_JS_TOKEN(Else, ControlKeyword) \
  65. __ENUMERATE_JS_TOKEN(Enum, Keyword) \
  66. __ENUMERATE_JS_TOKEN(Eof, Invalid) \
  67. __ENUMERATE_JS_TOKEN(Equals, Operator) \
  68. __ENUMERATE_JS_TOKEN(EqualsEquals, Operator) \
  69. __ENUMERATE_JS_TOKEN(EqualsEqualsEquals, Operator) \
  70. __ENUMERATE_JS_TOKEN(EscapedKeyword, Identifier) \
  71. __ENUMERATE_JS_TOKEN(ExclamationMark, Operator) \
  72. __ENUMERATE_JS_TOKEN(ExclamationMarkEquals, Operator) \
  73. __ENUMERATE_JS_TOKEN(ExclamationMarkEqualsEquals, Operator) \
  74. __ENUMERATE_JS_TOKEN(Export, Keyword) \
  75. __ENUMERATE_JS_TOKEN(Extends, Keyword) \
  76. __ENUMERATE_JS_TOKEN(Finally, ControlKeyword) \
  77. __ENUMERATE_JS_TOKEN(For, ControlKeyword) \
  78. __ENUMERATE_JS_TOKEN(Function, Keyword) \
  79. __ENUMERATE_JS_TOKEN(GreaterThan, Operator) \
  80. __ENUMERATE_JS_TOKEN(GreaterThanEquals, Operator) \
  81. __ENUMERATE_JS_TOKEN(Identifier, Identifier) \
  82. __ENUMERATE_JS_TOKEN(If, ControlKeyword) \
  83. __ENUMERATE_JS_TOKEN(Implements, Keyword) \
  84. __ENUMERATE_JS_TOKEN(Import, Keyword) \
  85. __ENUMERATE_JS_TOKEN(In, Keyword) \
  86. __ENUMERATE_JS_TOKEN(Instanceof, Keyword) \
  87. __ENUMERATE_JS_TOKEN(Interface, Keyword) \
  88. __ENUMERATE_JS_TOKEN(Invalid, Invalid) \
  89. __ENUMERATE_JS_TOKEN(LessThan, Operator) \
  90. __ENUMERATE_JS_TOKEN(LessThanEquals, Operator) \
  91. __ENUMERATE_JS_TOKEN(Let, Keyword) \
  92. __ENUMERATE_JS_TOKEN(Minus, Operator) \
  93. __ENUMERATE_JS_TOKEN(MinusEquals, Operator) \
  94. __ENUMERATE_JS_TOKEN(MinusMinus, Operator) \
  95. __ENUMERATE_JS_TOKEN(New, Keyword) \
  96. __ENUMERATE_JS_TOKEN(NullLiteral, Keyword) \
  97. __ENUMERATE_JS_TOKEN(NumericLiteral, Number) \
  98. __ENUMERATE_JS_TOKEN(Package, Keyword) \
  99. __ENUMERATE_JS_TOKEN(ParenClose, Punctuation) \
  100. __ENUMERATE_JS_TOKEN(ParenOpen, Punctuation) \
  101. __ENUMERATE_JS_TOKEN(Percent, Operator) \
  102. __ENUMERATE_JS_TOKEN(PercentEquals, Operator) \
  103. __ENUMERATE_JS_TOKEN(Period, Operator) \
  104. __ENUMERATE_JS_TOKEN(Pipe, Operator) \
  105. __ENUMERATE_JS_TOKEN(PipeEquals, Operator) \
  106. __ENUMERATE_JS_TOKEN(Plus, Operator) \
  107. __ENUMERATE_JS_TOKEN(PlusEquals, Operator) \
  108. __ENUMERATE_JS_TOKEN(PlusPlus, Operator) \
  109. __ENUMERATE_JS_TOKEN(Private, Keyword) \
  110. __ENUMERATE_JS_TOKEN(Protected, Keyword) \
  111. __ENUMERATE_JS_TOKEN(Public, Keyword) \
  112. __ENUMERATE_JS_TOKEN(QuestionMark, Operator) \
  113. __ENUMERATE_JS_TOKEN(QuestionMarkPeriod, Operator) \
  114. __ENUMERATE_JS_TOKEN(RegexFlags, String) \
  115. __ENUMERATE_JS_TOKEN(RegexLiteral, String) \
  116. __ENUMERATE_JS_TOKEN(Return, ControlKeyword) \
  117. __ENUMERATE_JS_TOKEN(Semicolon, Punctuation) \
  118. __ENUMERATE_JS_TOKEN(ShiftLeft, Operator) \
  119. __ENUMERATE_JS_TOKEN(ShiftLeftEquals, Operator) \
  120. __ENUMERATE_JS_TOKEN(ShiftRight, Operator) \
  121. __ENUMERATE_JS_TOKEN(ShiftRightEquals, Operator) \
  122. __ENUMERATE_JS_TOKEN(Slash, Operator) \
  123. __ENUMERATE_JS_TOKEN(SlashEquals, Operator) \
  124. __ENUMERATE_JS_TOKEN(Static, Keyword) \
  125. __ENUMERATE_JS_TOKEN(StringLiteral, String) \
  126. __ENUMERATE_JS_TOKEN(Super, Keyword) \
  127. __ENUMERATE_JS_TOKEN(Switch, ControlKeyword) \
  128. __ENUMERATE_JS_TOKEN(TemplateLiteralEnd, String) \
  129. __ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd, Punctuation) \
  130. __ENUMERATE_JS_TOKEN(TemplateLiteralExprStart, Punctuation) \
  131. __ENUMERATE_JS_TOKEN(TemplateLiteralStart, String) \
  132. __ENUMERATE_JS_TOKEN(TemplateLiteralString, String) \
  133. __ENUMERATE_JS_TOKEN(This, Keyword) \
  134. __ENUMERATE_JS_TOKEN(Throw, ControlKeyword) \
  135. __ENUMERATE_JS_TOKEN(Tilde, Operator) \
  136. __ENUMERATE_JS_TOKEN(TripleDot, Operator) \
  137. __ENUMERATE_JS_TOKEN(Try, ControlKeyword) \
  138. __ENUMERATE_JS_TOKEN(Typeof, Keyword) \
  139. __ENUMERATE_JS_TOKEN(UnsignedShiftRight, Operator) \
  140. __ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals, Operator) \
  141. __ENUMERATE_JS_TOKEN(UnterminatedRegexLiteral, String) \
  142. __ENUMERATE_JS_TOKEN(UnterminatedStringLiteral, String) \
  143. __ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral, String) \
  144. __ENUMERATE_JS_TOKEN(Var, Keyword) \
  145. __ENUMERATE_JS_TOKEN(Void, Keyword) \
  146. __ENUMERATE_JS_TOKEN(While, ControlKeyword) \
  147. __ENUMERATE_JS_TOKEN(With, ControlKeyword) \
  148. __ENUMERATE_JS_TOKEN(Yield, ControlKeyword)
  149. enum class TokenType {
  150. #define __ENUMERATE_JS_TOKEN(type, category) type,
  151. ENUMERATE_JS_TOKENS
  152. #undef __ENUMERATE_JS_TOKEN
  153. _COUNT_OF_TOKENS
  154. };
  155. constexpr size_t cs_num_of_js_tokens = static_cast<size_t>(TokenType::_COUNT_OF_TOKENS);
  156. enum class TokenCategory {
  157. Invalid,
  158. Number,
  159. String,
  160. Punctuation,
  161. Operator,
  162. Keyword,
  163. ControlKeyword,
  164. Identifier
  165. };
  166. class Token {
  167. public:
  168. Token() = default;
  169. Token(TokenType type, String message, StringView trivia, StringView value, StringView filename, size_t line_number, size_t line_column, size_t offset)
  170. : m_type(type)
  171. , m_message(message)
  172. , m_trivia(trivia)
  173. , m_original_value(value)
  174. , m_value(value)
  175. , m_filename(filename)
  176. , m_line_number(line_number)
  177. , m_line_column(line_column)
  178. , m_offset(offset)
  179. {
  180. }
  181. TokenType type() const { return m_type; }
  182. TokenCategory category() const;
  183. static TokenCategory category(TokenType);
  184. const char* name() const;
  185. static const char* name(TokenType);
  186. const String& message() const { return m_message; }
  187. const StringView& trivia() const { return m_trivia; }
  188. const StringView& original_value() const { return m_original_value; }
  189. StringView value() const
  190. {
  191. return m_value.visit(
  192. [](StringView const& view) { return view; },
  193. [](FlyString const& identifier) { return identifier.view(); },
  194. [](Empty) -> StringView { VERIFY_NOT_REACHED(); });
  195. }
  196. const StringView& filename() const { return m_filename; }
  197. size_t line_number() const { return m_line_number; }
  198. size_t line_column() const { return m_line_column; }
  199. size_t offset() const { return m_offset; }
  200. double double_value() const;
  201. bool bool_value() const;
  202. enum class StringValueStatus {
  203. Ok,
  204. MalformedHexEscape,
  205. MalformedUnicodeEscape,
  206. UnicodeEscapeOverflow,
  207. LegacyOctalEscapeSequence,
  208. };
  209. String string_value(StringValueStatus& status) const;
  210. String raw_template_value() const;
  211. void set_identifier_value(FlyString value)
  212. {
  213. m_value = move(value);
  214. }
  215. bool is_identifier_name() const;
  216. bool trivia_contains_line_terminator() const;
  217. private:
  218. TokenType m_type { TokenType::Invalid };
  219. String m_message;
  220. StringView m_trivia;
  221. StringView m_original_value;
  222. Variant<Empty, StringView, FlyString> m_value { Empty {} };
  223. StringView m_filename;
  224. size_t m_line_number { 0 };
  225. size_t m_line_column { 0 };
  226. size_t m_offset { 0 };
  227. };
  228. }