|
@@ -7,54 +7,129 @@
|
|
|
#include <LibJS/Parser.h>
|
|
|
#include <LibTest/TestCase.h>
|
|
|
|
|
|
-TEST_CASE(invalid_unicode_only)
|
|
|
+static bool produces_eof_tokens(JS::Lexer& lexer)
|
|
|
{
|
|
|
- char const* code = "\xEA\xFD";
|
|
|
- auto lexer = JS::Lexer(code);
|
|
|
- auto token = lexer.next();
|
|
|
- EXPECT_EQ(token.type(), JS::TokenType::Invalid);
|
|
|
-
|
|
|
- // After this we can get as many eof tokens as we like.
|
|
|
for (auto i = 0; i < 10; i++) {
|
|
|
auto eof_token = lexer.next();
|
|
|
- EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
|
|
|
+ if (eof_token.type() != JS::TokenType::Eof)
|
|
|
+ return false;
|
|
|
}
|
|
|
+ return true;
|
|
|
}
|
|
|
|
|
|
-TEST_CASE(long_invalid_unicode)
|
|
|
+static bool triggers_immediate_unicode_fault(StringView code)
|
|
|
{
|
|
|
- char const* code = "\xF7";
|
|
|
auto lexer = JS::Lexer(code);
|
|
|
- auto token = lexer.next();
|
|
|
- EXPECT_EQ(token.type(), JS::TokenType::Invalid);
|
|
|
+ auto first_token = lexer.next();
|
|
|
|
|
|
- // After this we can get as many eof tokens as we like.
|
|
|
- for (auto i = 0; i < 10; i++) {
|
|
|
- auto eof_token = lexer.next();
|
|
|
- EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
|
|
|
- }
|
|
|
+ if (first_token.type() != JS::TokenType::Invalid)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return produces_eof_tokens(lexer);
|
|
|
}
|
|
|
+// In the not leading character it must start with 0b10xxxxxx
|
|
|
+// Thus all these options are invalid:
|
|
|
+// \x0y = 0000 y (or \x1y, \x2y and \x3y)
|
|
|
+// \x4y = 0100 y (or \x5y, \x6y and \x7y)
|
|
|
+// \xCy = 1100 y (or \xDy, \xEy and \xFy)
|
|
|
+// And the only valid option is:
|
|
|
+// \x8y = 1000 y (or \x9y, \xAy
|
|
|
|
|
|
-TEST_CASE(invalid_unicode_and_valid_code)
|
|
|
+TEST_CASE(no_input_only_gives_eof)
|
|
|
{
|
|
|
- char const* code = "\xEA\xFDthrow 1;";
|
|
|
+ char const* code = "";
|
|
|
auto lexer = JS::Lexer(code);
|
|
|
- auto invalid_token = lexer.next();
|
|
|
- EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
|
|
|
- // 0xEA is the start of a three character unicode code point thus it consumes the 't'.
|
|
|
- auto token_after = lexer.next();
|
|
|
- EXPECT_EQ(token_after.value(), "hrow");
|
|
|
+ EXPECT(produces_eof_tokens(lexer));
|
|
|
+}
|
|
|
+
|
|
|
+TEST_CASE(invalid_start_code_point)
|
|
|
+{
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\x80"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\x90"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xA0"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xB0"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF8"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xFF"sv));
|
|
|
+}
|
|
|
+
|
|
|
+TEST_CASE(code_points_of_length_2)
|
|
|
+{
|
|
|
+ // Initial 110xxxxx -> \xCy or \xDy
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xC5"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xC5\x02"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xC5\x52"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xC5\xD2"sv));
|
|
|
+
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xD5"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xD5\x23"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xD5\x74"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xD5\xF5"sv));
|
|
|
+}
|
|
|
+
|
|
|
+TEST_CASE(code_points_of_length_3)
|
|
|
+{
|
|
|
+ // Initial 1110xxxx -> \xEy
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xE5"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xE5\x02"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xE5\x52"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xE5\xD2"sv));
|
|
|
+
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xEA\x80"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xEA\x81\x07"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xEA\x82\x57"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xEA\x83\xD7"sv));
|
|
|
+}
|
|
|
+
|
|
|
+TEST_CASE(code_points_of_length_4)
|
|
|
+{
|
|
|
+ // Initial 11110xxx -> \xF{0..7}
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF0"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF1\x02"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF2\x52"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF3\xD2"sv));
|
|
|
+
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF4\x80"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF5\x81\x07"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF6\x82\x57"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF7\x83\xD7"sv));
|
|
|
+
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF4\x80\x80"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF5\x91\x80\x07"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF6\xA2\x80\x57"sv));
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF7\xB3\x80\xD7"sv));
|
|
|
+}
|
|
|
+
|
|
|
+TEST_CASE(gives_valid_part_until_fault)
|
|
|
+{
|
|
|
+ auto code = "abc\xF5\x81\x80\x07; abc\xF5\x81\x80\x07 += 4"sv;
|
|
|
+ JS::Lexer lexer(code);
|
|
|
+ auto first_token = lexer.next();
|
|
|
+ EXPECT_EQ(first_token.type(), JS::TokenType::Identifier);
|
|
|
+ EXPECT_EQ(first_token.value(), "abc"sv);
|
|
|
+ auto second_token = lexer.next();
|
|
|
+ EXPECT_EQ(second_token.type(), JS::TokenType::Invalid);
|
|
|
+ EXPECT(produces_eof_tokens(lexer));
|
|
|
+}
|
|
|
+
|
|
|
+TEST_CASE(gives_fully_parsed_tokens_even_if_invalid_unicode_follows)
|
|
|
+{
|
|
|
+ auto code = "let \xE5\xD2"sv;
|
|
|
+ JS::Lexer lexer(code);
|
|
|
+ auto first_token = lexer.next();
|
|
|
+ EXPECT_EQ(first_token.type(), JS::TokenType::Let);
|
|
|
+ auto second_token = lexer.next();
|
|
|
+ EXPECT_EQ(second_token.type(), JS::TokenType::Invalid);
|
|
|
+ EXPECT(produces_eof_tokens(lexer));
|
|
|
+}
|
|
|
+
|
|
|
+TEST_CASE(invalid_unicode_and_valid_code)
|
|
|
+{
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xEA\xFDthrow 1;"sv));
|
|
|
}
|
|
|
|
|
|
TEST_CASE(long_invalid_unicode_and_valid_code)
|
|
|
{
|
|
|
- char const* code = "\xF7throw 1;";
|
|
|
- auto lexer = JS::Lexer(code);
|
|
|
- auto invalid_token = lexer.next();
|
|
|
- EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
|
|
|
- // 0xF7 is the start of a four character unicode code point thus it consumes 'thr'.
|
|
|
- auto token_after = lexer.next();
|
|
|
- EXPECT_EQ(token_after.value(), "ow");
|
|
|
+ EXPECT(triggers_immediate_unicode_fault("\xF7throw 1;"sv));
|
|
|
}
|
|
|
|
|
|
TEST_CASE(invalid_unicode_after_valid_code_and_before_eof)
|
|
@@ -65,12 +140,5 @@ TEST_CASE(invalid_unicode_after_valid_code_and_before_eof)
|
|
|
EXPECT_EQ(let_token.type(), JS::TokenType::Let);
|
|
|
auto invalid_token = lexer.next();
|
|
|
EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
|
|
|
- // It should still get the valid trivia in front.
|
|
|
- EXPECT_EQ(invalid_token.trivia(), " ");
|
|
|
-
|
|
|
- // After this we can get as many eof tokens as we like.
|
|
|
- for (auto i = 0; i < 10; i++) {
|
|
|
- auto eof_token = lexer.next();
|
|
|
- EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
|
|
|
- }
|
|
|
+ EXPECT(produces_eof_tokens(lexer));
|
|
|
}
|