Token.cpp 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /*
  2. * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
  3. * Copyright (c) 2020, Linus Groh <linusg@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include "Token.h"
  8. #include <AK/Assertions.h>
  9. #include <AK/CharacterTypes.h>
  10. #include <AK/GenericLexer.h>
  11. #include <AK/StringBuilder.h>
  12. namespace JS {
  13. const char* Token::name(TokenType type)
  14. {
  15. switch (type) {
  16. #define __ENUMERATE_JS_TOKEN(type, category) \
  17. case TokenType::type: \
  18. return #type;
  19. ENUMERATE_JS_TOKENS
  20. #undef __ENUMERATE_JS_TOKEN
  21. default:
  22. VERIFY_NOT_REACHED();
  23. return "<Unknown>";
  24. }
  25. }
  26. const char* Token::name() const
  27. {
  28. return name(m_type);
  29. }
  30. TokenCategory Token::category(TokenType type)
  31. {
  32. switch (type) {
  33. #define __ENUMERATE_JS_TOKEN(type, category) \
  34. case TokenType::type: \
  35. return TokenCategory::category;
  36. ENUMERATE_JS_TOKENS
  37. #undef __ENUMERATE_JS_TOKEN
  38. default:
  39. VERIFY_NOT_REACHED();
  40. }
  41. }
  42. TokenCategory Token::category() const
  43. {
  44. return category(m_type);
  45. }
  46. double Token::double_value() const
  47. {
  48. VERIFY(type() == TokenType::NumericLiteral);
  49. StringBuilder builder;
  50. for (auto ch : m_value) {
  51. if (ch == '_')
  52. continue;
  53. builder.append(ch);
  54. }
  55. String value_string = builder.to_string();
  56. if (value_string[0] == '0' && value_string.length() >= 2) {
  57. if (value_string[1] == 'x' || value_string[1] == 'X') {
  58. // hexadecimal
  59. return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 16));
  60. } else if (value_string[1] == 'o' || value_string[1] == 'O') {
  61. // octal
  62. return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 8));
  63. } else if (value_string[1] == 'b' || value_string[1] == 'B') {
  64. // binary
  65. return static_cast<double>(strtoul(value_string.characters() + 2, nullptr, 2));
  66. } else if (is_ascii_digit(value_string[1])) {
  67. // also octal, but syntax error in strict mode
  68. if (!m_value.contains('8') && !m_value.contains('9'))
  69. return static_cast<double>(strtoul(value_string.characters() + 1, nullptr, 8));
  70. }
  71. }
  72. return strtod(value_string.characters(), nullptr);
  73. }
  74. static u32 hex2int(char x)
  75. {
  76. VERIFY(is_ascii_hex_digit(x));
  77. if (x >= '0' && x <= '9')
  78. return x - '0';
  79. return 10u + (to_ascii_lowercase(x) - 'a');
  80. }
  81. String Token::string_value(StringValueStatus& status) const
  82. {
  83. VERIFY(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
  84. auto is_template = type() == TokenType::TemplateLiteralString;
  85. GenericLexer lexer(is_template ? m_value : m_value.substring_view(1, m_value.length() - 2));
  86. auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
  87. status = parse_status;
  88. return {};
  89. };
  90. StringBuilder builder;
  91. while (!lexer.is_eof()) {
  92. // No escape, consume one char and continue
  93. if (!lexer.next_is('\\')) {
  94. builder.append(lexer.consume());
  95. continue;
  96. }
  97. lexer.ignore();
  98. VERIFY(!lexer.is_eof());
  99. // Line continuation
  100. if (lexer.next_is('\n') || lexer.next_is('\r')) {
  101. lexer.ignore();
  102. continue;
  103. }
  104. // Line continuation
  105. if (lexer.next_is(LINE_SEPARATOR) || lexer.next_is(PARAGRAPH_SEPARATOR)) {
  106. lexer.ignore(3);
  107. continue;
  108. }
  109. // Null-byte escape
  110. if (lexer.next_is('0') && !is_ascii_digit(lexer.peek(1))) {
  111. lexer.ignore();
  112. builder.append('\0');
  113. continue;
  114. }
  115. // Hex escape
  116. if (lexer.next_is('x')) {
  117. lexer.ignore();
  118. if (!is_ascii_hex_digit(lexer.peek()) || !is_ascii_hex_digit(lexer.peek(1)))
  119. return encoding_failure(StringValueStatus::MalformedHexEscape);
  120. auto code_point = hex2int(lexer.consume()) * 16 + hex2int(lexer.consume());
  121. VERIFY(code_point <= 255);
  122. builder.append_code_point(code_point);
  123. continue;
  124. }
  125. // Unicode escape
  126. if (lexer.next_is('u')) {
  127. lexer.ignore();
  128. u32 code_point = 0;
  129. if (lexer.next_is('{')) {
  130. lexer.ignore();
  131. while (true) {
  132. if (!lexer.next_is(is_ascii_hex_digit))
  133. return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
  134. auto new_code_point = (code_point << 4u) | hex2int(lexer.consume());
  135. if (new_code_point < code_point)
  136. return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
  137. code_point = new_code_point;
  138. if (lexer.next_is('}'))
  139. break;
  140. }
  141. lexer.ignore();
  142. } else {
  143. for (int j = 0; j < 4; ++j) {
  144. if (!lexer.next_is(is_ascii_hex_digit))
  145. return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
  146. code_point = (code_point << 4u) | hex2int(lexer.consume());
  147. }
  148. }
  149. builder.append_code_point(code_point);
  150. continue;
  151. }
  152. // In non-strict mode LegacyOctalEscapeSequence is allowed in strings:
  153. // https://tc39.es/ecma262/#sec-additional-syntax-string-literals
  154. String octal_str;
  155. auto is_octal_digit = [](char ch) { return ch >= '0' && ch <= '7'; };
  156. auto is_zero_to_three = [](char ch) { return ch >= '0' && ch <= '3'; };
  157. auto is_four_to_seven = [](char ch) { return ch >= '4' && ch <= '7'; };
  158. // OctalDigit [lookahead ∉ OctalDigit]
  159. if (is_octal_digit(lexer.peek()) && !is_octal_digit(lexer.peek(1)))
  160. octal_str = lexer.consume(1);
  161. // ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
  162. else if (is_zero_to_three(lexer.peek()) && is_octal_digit(lexer.peek(1)) && !is_octal_digit(lexer.peek(2)))
  163. octal_str = lexer.consume(2);
  164. // FourToSeven OctalDigit
  165. else if (is_four_to_seven(lexer.peek()) && is_octal_digit(lexer.peek(1)))
  166. octal_str = lexer.consume(2);
  167. // ZeroToThree OctalDigit OctalDigit
  168. else if (is_zero_to_three(lexer.peek()) && is_octal_digit(lexer.peek(1)) && is_octal_digit(lexer.peek(2)))
  169. octal_str = lexer.consume(3);
  170. if (!octal_str.is_null()) {
  171. status = StringValueStatus::LegacyOctalEscapeSequence;
  172. auto code_point = strtoul(octal_str.characters(), nullptr, 8);
  173. VERIFY(code_point <= 255);
  174. builder.append_code_point(code_point);
  175. continue;
  176. }
  177. lexer.retreat();
  178. builder.append(lexer.consume_escaped_character('\\', "b\bf\fn\nr\rt\tv\v"));
  179. }
  180. return builder.to_string();
  181. }
  182. bool Token::bool_value() const
  183. {
  184. VERIFY(type() == TokenType::BoolLiteral);
  185. return m_value == "true";
  186. }
  187. bool Token::is_identifier_name() const
  188. {
  189. // IdentifierNames are Identifiers + ReservedWords
  190. // The standard defines this reversed: Identifiers are IdentifierNames except reserved words
  191. // https://tc39.es/ecma262/#prod-Identifier
  192. return m_type == TokenType::Identifier
  193. || m_type == TokenType::Await
  194. || m_type == TokenType::BoolLiteral
  195. || m_type == TokenType::Break
  196. || m_type == TokenType::Case
  197. || m_type == TokenType::Catch
  198. || m_type == TokenType::Class
  199. || m_type == TokenType::Const
  200. || m_type == TokenType::Continue
  201. || m_type == TokenType::Default
  202. || m_type == TokenType::Delete
  203. || m_type == TokenType::Do
  204. || m_type == TokenType::Else
  205. || m_type == TokenType::Enum
  206. || m_type == TokenType::Export
  207. || m_type == TokenType::Extends
  208. || m_type == TokenType::Finally
  209. || m_type == TokenType::For
  210. || m_type == TokenType::Function
  211. || m_type == TokenType::If
  212. || m_type == TokenType::Import
  213. || m_type == TokenType::In
  214. || m_type == TokenType::Instanceof
  215. || m_type == TokenType::Interface
  216. || m_type == TokenType::Let
  217. || m_type == TokenType::New
  218. || m_type == TokenType::NullLiteral
  219. || m_type == TokenType::Return
  220. || m_type == TokenType::Super
  221. || m_type == TokenType::Switch
  222. || m_type == TokenType::This
  223. || m_type == TokenType::Throw
  224. || m_type == TokenType::Try
  225. || m_type == TokenType::Typeof
  226. || m_type == TokenType::Var
  227. || m_type == TokenType::Void
  228. || m_type == TokenType::While
  229. || m_type == TokenType::Yield;
  230. }
  231. bool Token::trivia_contains_line_terminator() const
  232. {
  233. return m_trivia.contains('\n') || m_trivia.contains('\r') || m_trivia.contains(LINE_SEPARATOR) || m_trivia.contains(PARAGRAPH_SEPARATOR);
  234. }
  235. }