3 years ago · ac2c3a73b1
--- a/Meta/Lagom/CMakeLists.txt
+++ b/Meta/Lagom/CMakeLists.txt
@@ -528,6 +528,9 @@ if (BUILD_LAGOM)
 
				         )
			
 
				         set_tests_properties(JS PROPERTIES ENVIRONMENT SERENITY_SOURCE_DIR=${SERENITY_PROJECT_ROOT})
			
 
				 
			
 
				+        # test-invalid-unicode-js
			
 
				+        lagom_test(../../Tests/LibJS/test-invalid-unicode-js.cpp LIBS LagomJS)
			
 
				+
			
 
				         # Markdown
			
 
				         include(commonmark_spec)
			
 
				         file(GLOB LIBMARKDOWN_TEST_SOURCES CONFIGURE_DEPENDS "../../Tests/LibMarkdown/*.cpp")
			
--- a/Tests/LibJS/CMakeLists.txt
+++ b/Tests/LibJS/CMakeLists.txt
@@ -1,2 +1,5 @@
 
				 serenity_testjs_test(test-js.cpp test-js)
			
 
				+
			
 
				 install(TARGETS test-js RUNTIME DESTINATION bin OPTIONAL)
			
 
				+
			
 
				+serenity_test(test-invalid-unicode-js.cpp LibJS LIBS LibJS)
			
--- a/Tests/LibJS/test-invalid-unicode-js.cpp
+++ b/Tests/LibJS/test-invalid-unicode-js.cpp
@@ -0,0 +1,76 @@
 
				+/*
			
 
				+ * Copyright (c) 2021, David Tuin <davidot@serenityos.org>
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: BSD-2-Clause
			
 
				+ */
			
 
				+
			
 
				+#include <LibJS/Parser.h>
			
 
				+#include <LibTest/TestCase.h>
			
 
				+
			
 
				+TEST_CASE(invalid_unicode_only)
			
 
				+{
			
 
				+    char const* code = "\xEA\xFD";
			
 
				+    auto lexer = JS::Lexer(code);
			
 
				+    auto token = lexer.next();
			
 
				+    EXPECT_EQ(token.type(), JS::TokenType::Invalid);
			
 
				+
			
 
				+    // After this we can get as many eof tokens as we like.
			
 
				+    for (auto i = 0; i < 10; i++) {
			
 
				+        auto eof_token = lexer.next();
			
 
				+        EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+TEST_CASE(long_invalid_unicode)
			
 
				+{
			
 
				+    char const* code = "\xF7";
			
 
				+    auto lexer = JS::Lexer(code);
			
 
				+    auto token = lexer.next();
			
 
				+    EXPECT_EQ(token.type(), JS::TokenType::Invalid);
			
 
				+
			
 
				+    // After this we can get as many eof tokens as we like.
			
 
				+    for (auto i = 0; i < 10; i++) {
			
 
				+        auto eof_token = lexer.next();
			
 
				+        EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+TEST_CASE(invalid_unicode_and_valid_code)
			
 
				+{
			
 
				+    char const* code = "\xEA\xFDthrow 1;";
			
 
				+    auto lexer = JS::Lexer(code);
			
 
				+    auto invalid_token = lexer.next();
			
 
				+    EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
			
 
				+    // 0xEA is the start of a three character unicode code point thus it consumes the 't'.
			
 
				+    auto token_after = lexer.next();
			
 
				+    EXPECT_EQ(token_after.value(), "hrow");
			
 
				+}
			
 
				+
			
 
				+TEST_CASE(long_invalid_unicode_and_valid_code)
			
 
				+{
			
 
				+    char const* code = "\xF7throw 1;";
			
 
				+    auto lexer = JS::Lexer(code);
			
 
				+    auto invalid_token = lexer.next();
			
 
				+    EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
			
 
				+    // 0xF7 is the start of a four character unicode code point thus it consumes 'thr'.
			
 
				+    auto token_after = lexer.next();
			
 
				+    EXPECT_EQ(token_after.value(), "ow");
			
 
				+}
			
 
				+
			
 
				+TEST_CASE(invalid_unicode_after_valid_code_and_before_eof)
			
 
				+{
			
 
				+    char const* code = "let \xEA\xFD;";
			
 
				+    auto lexer = JS::Lexer(code);
			
 
				+    auto let_token = lexer.next();
			
 
				+    EXPECT_EQ(let_token.type(), JS::TokenType::Let);
			
 
				+    auto invalid_token = lexer.next();
			
 
				+    EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
			
 
				+    // It should still get the valid trivia in front.
			
 
				+    EXPECT_EQ(invalid_token.trivia(), " ");
			
 
				+
			
 
				+    // After this we can get as many eof tokens as we like.
			
 
				+    for (auto i = 0; i < 10; i++) {
			
 
				+        auto eof_token = lexer.next();
			
 
				+        EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
			
 
				+    }
			
 
				+}
			
--- a/Userland/Libraries/LibJS/Lexer.cpp
+++ b/Userland/Libraries/LibJS/Lexer.cpp
@@ -145,7 +145,7 @@ void Lexer::consume()
 
				             return false;
			
 
				         m_eof = true;
			
 
				         m_current_char = '\0';
			
 
				-        m_position++;
			
 
				+        m_position = m_source.length() + 1;
			
 
				         m_line_column++;
			
 
				         return true;
			
 
				     };