test-invalid-unicode-js.cpp 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. /*
  2. * Copyright (c) 2021, David Tuin <davidot@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibJS/Parser.h>
  7. #include <LibTest/TestCase.h>
  8. static bool produces_eof_tokens(JS::Lexer& lexer)
  9. {
  10. for (auto i = 0; i < 10; i++) {
  11. auto eof_token = lexer.next();
  12. if (eof_token.type() != JS::TokenType::Eof)
  13. return false;
  14. }
  15. return true;
  16. }
  17. static bool triggers_immediate_unicode_fault(StringView code)
  18. {
  19. auto lexer = JS::Lexer(code);
  20. auto first_token = lexer.next();
  21. if (first_token.type() != JS::TokenType::Invalid)
  22. return false;
  23. return produces_eof_tokens(lexer);
  24. }
  25. // In the not leading character it must start with 0b10xxxxxx
  26. // Thus all these options are invalid:
  27. // \x0y = 0000 y (or \x1y, \x2y and \x3y)
  28. // \x4y = 0100 y (or \x5y, \x6y and \x7y)
  29. // \xCy = 1100 y (or \xDy, \xEy and \xFy)
  30. // And the only valid option is:
  31. // \x8y = 1000 y (or \x9y, \xAy
  32. TEST_CASE(no_input_only_gives_eof)
  33. {
  34. char const* code = "";
  35. auto lexer = JS::Lexer(code);
  36. EXPECT(produces_eof_tokens(lexer));
  37. }
  38. TEST_CASE(invalid_start_code_point)
  39. {
  40. EXPECT(triggers_immediate_unicode_fault("\x80"sv));
  41. EXPECT(triggers_immediate_unicode_fault("\x90"sv));
  42. EXPECT(triggers_immediate_unicode_fault("\xA0"sv));
  43. EXPECT(triggers_immediate_unicode_fault("\xB0"sv));
  44. EXPECT(triggers_immediate_unicode_fault("\xF8"sv));
  45. EXPECT(triggers_immediate_unicode_fault("\xFF"sv));
  46. }
  47. TEST_CASE(code_points_of_length_2)
  48. {
  49. // Initial 110xxxxx -> \xCy or \xDy
  50. EXPECT(triggers_immediate_unicode_fault("\xC5"sv));
  51. EXPECT(triggers_immediate_unicode_fault("\xC5\x02"sv));
  52. EXPECT(triggers_immediate_unicode_fault("\xC5\x52"sv));
  53. EXPECT(triggers_immediate_unicode_fault("\xC5\xD2"sv));
  54. EXPECT(triggers_immediate_unicode_fault("\xD5"sv));
  55. EXPECT(triggers_immediate_unicode_fault("\xD5\x23"sv));
  56. EXPECT(triggers_immediate_unicode_fault("\xD5\x74"sv));
  57. EXPECT(triggers_immediate_unicode_fault("\xD5\xF5"sv));
  58. }
  59. TEST_CASE(code_points_of_length_3)
  60. {
  61. // Initial 1110xxxx -> \xEy
  62. EXPECT(triggers_immediate_unicode_fault("\xE5"sv));
  63. EXPECT(triggers_immediate_unicode_fault("\xE5\x02"sv));
  64. EXPECT(triggers_immediate_unicode_fault("\xE5\x52"sv));
  65. EXPECT(triggers_immediate_unicode_fault("\xE5\xD2"sv));
  66. EXPECT(triggers_immediate_unicode_fault("\xEA\x80"sv));
  67. EXPECT(triggers_immediate_unicode_fault("\xEA\x81\x07"sv));
  68. EXPECT(triggers_immediate_unicode_fault("\xEA\x82\x57"sv));
  69. EXPECT(triggers_immediate_unicode_fault("\xEA\x83\xD7"sv));
  70. }
  71. TEST_CASE(code_points_of_length_4)
  72. {
  73. // Initial 11110xxx -> \xF{0..7}
  74. EXPECT(triggers_immediate_unicode_fault("\xF0"sv));
  75. EXPECT(triggers_immediate_unicode_fault("\xF1\x02"sv));
  76. EXPECT(triggers_immediate_unicode_fault("\xF2\x52"sv));
  77. EXPECT(triggers_immediate_unicode_fault("\xF3\xD2"sv));
  78. EXPECT(triggers_immediate_unicode_fault("\xF4\x80"sv));
  79. EXPECT(triggers_immediate_unicode_fault("\xF5\x81\x07"sv));
  80. EXPECT(triggers_immediate_unicode_fault("\xF6\x82\x57"sv));
  81. EXPECT(triggers_immediate_unicode_fault("\xF7\x83\xD7"sv));
  82. EXPECT(triggers_immediate_unicode_fault("\xF4\x80\x80"sv));
  83. EXPECT(triggers_immediate_unicode_fault("\xF5\x91\x80\x07"sv));
  84. EXPECT(triggers_immediate_unicode_fault("\xF6\xA2\x80\x57"sv));
  85. EXPECT(triggers_immediate_unicode_fault("\xF7\xB3\x80\xD7"sv));
  86. }
  87. TEST_CASE(gives_valid_part_until_fault)
  88. {
  89. auto code = "abc\xF5\x81\x80\x07; abc\xF5\x81\x80\x07 += 4"sv;
  90. JS::Lexer lexer(code);
  91. auto first_token = lexer.next();
  92. EXPECT_EQ(first_token.type(), JS::TokenType::Identifier);
  93. EXPECT_EQ(first_token.value(), "abc"sv);
  94. auto second_token = lexer.next();
  95. EXPECT_EQ(second_token.type(), JS::TokenType::Invalid);
  96. EXPECT(produces_eof_tokens(lexer));
  97. }
  98. TEST_CASE(gives_fully_parsed_tokens_even_if_invalid_unicode_follows)
  99. {
  100. auto code = "let \xE5\xD2"sv;
  101. JS::Lexer lexer(code);
  102. auto first_token = lexer.next();
  103. EXPECT_EQ(first_token.type(), JS::TokenType::Let);
  104. auto second_token = lexer.next();
  105. EXPECT_EQ(second_token.type(), JS::TokenType::Invalid);
  106. EXPECT(produces_eof_tokens(lexer));
  107. }
  108. TEST_CASE(invalid_unicode_and_valid_code)
  109. {
  110. EXPECT(triggers_immediate_unicode_fault("\xEA\xFDthrow 1;"sv));
  111. }
  112. TEST_CASE(long_invalid_unicode_and_valid_code)
  113. {
  114. EXPECT(triggers_immediate_unicode_fault("\xF7throw 1;"sv));
  115. }
  116. TEST_CASE(invalid_unicode_after_valid_code_and_before_eof)
  117. {
  118. char const* code = "let \xEA\xFD;";
  119. auto lexer = JS::Lexer(code);
  120. auto let_token = lexer.next();
  121. EXPECT_EQ(let_token.type(), JS::TokenType::Let);
  122. auto invalid_token = lexer.next();
  123. EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
  124. EXPECT(produces_eof_tokens(lexer));
  125. }