LibJS: Add a specific test for invalid unicode characters in the lexer
Also fixes that it tried to make substrings past the end of the source if we overran the source length.
This commit is contained in:
parent
962298b040
commit
ac2c3a73b1
Notes:
sideshowbarker
2024-07-18 03:08:40 +09:00
Author: https://github.com/davidot Commit: https://github.com/SerenityOS/serenity/commit/ac2c3a73b13 Pull-request: https://github.com/SerenityOS/serenity/pull/10333 Reviewed-by: https://github.com/linusg ✅
4 changed files with 83 additions and 1 deletions
|
@ -528,6 +528,9 @@ if (BUILD_LAGOM)
|
|||
)
|
||||
set_tests_properties(JS PROPERTIES ENVIRONMENT SERENITY_SOURCE_DIR=${SERENITY_PROJECT_ROOT})
|
||||
|
||||
# test-invalid-unicode-js
|
||||
lagom_test(../../Tests/LibJS/test-invalid-unicode-js.cpp LIBS LagomJS)
|
||||
|
||||
# Markdown
|
||||
include(commonmark_spec)
|
||||
file(GLOB LIBMARKDOWN_TEST_SOURCES CONFIGURE_DEPENDS "../../Tests/LibMarkdown/*.cpp")
|
||||
|
|
|
@ -1,2 +1,5 @@
|
|||
serenity_testjs_test(test-js.cpp test-js)
|
||||
|
||||
install(TARGETS test-js RUNTIME DESTINATION bin OPTIONAL)
|
||||
|
||||
serenity_test(test-invalid-unicode-js.cpp LibJS LIBS LibJS)
|
||||
|
|
76
Tests/LibJS/test-invalid-unicode-js.cpp
Normal file
76
Tests/LibJS/test-invalid-unicode-js.cpp
Normal file
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Copyright (c) 2021, David Tuin <davidot@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibJS/Parser.h>
|
||||
#include <LibTest/TestCase.h>
|
||||
|
||||
TEST_CASE(invalid_unicode_only)
|
||||
{
|
||||
char const* code = "\xEA\xFD";
|
||||
auto lexer = JS::Lexer(code);
|
||||
auto token = lexer.next();
|
||||
EXPECT_EQ(token.type(), JS::TokenType::Invalid);
|
||||
|
||||
// After this we can get as many eof tokens as we like.
|
||||
for (auto i = 0; i < 10; i++) {
|
||||
auto eof_token = lexer.next();
|
||||
EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(long_invalid_unicode)
|
||||
{
|
||||
char const* code = "\xF7";
|
||||
auto lexer = JS::Lexer(code);
|
||||
auto token = lexer.next();
|
||||
EXPECT_EQ(token.type(), JS::TokenType::Invalid);
|
||||
|
||||
// After this we can get as many eof tokens as we like.
|
||||
for (auto i = 0; i < 10; i++) {
|
||||
auto eof_token = lexer.next();
|
||||
EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(invalid_unicode_and_valid_code)
|
||||
{
|
||||
char const* code = "\xEA\xFDthrow 1;";
|
||||
auto lexer = JS::Lexer(code);
|
||||
auto invalid_token = lexer.next();
|
||||
EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
|
||||
// 0xEA is the start of a three character unicode code point thus it consumes the 't'.
|
||||
auto token_after = lexer.next();
|
||||
EXPECT_EQ(token_after.value(), "hrow");
|
||||
}
|
||||
|
||||
TEST_CASE(long_invalid_unicode_and_valid_code)
|
||||
{
|
||||
char const* code = "\xF7throw 1;";
|
||||
auto lexer = JS::Lexer(code);
|
||||
auto invalid_token = lexer.next();
|
||||
EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
|
||||
// 0xF7 is the start of a four character unicode code point thus it consumes 'thr'.
|
||||
auto token_after = lexer.next();
|
||||
EXPECT_EQ(token_after.value(), "ow");
|
||||
}
|
||||
|
||||
TEST_CASE(invalid_unicode_after_valid_code_and_before_eof)
|
||||
{
|
||||
char const* code = "let \xEA\xFD;";
|
||||
auto lexer = JS::Lexer(code);
|
||||
auto let_token = lexer.next();
|
||||
EXPECT_EQ(let_token.type(), JS::TokenType::Let);
|
||||
auto invalid_token = lexer.next();
|
||||
EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
|
||||
// It should still get the valid trivia in front.
|
||||
EXPECT_EQ(invalid_token.trivia(), " ");
|
||||
|
||||
// After this we can get as many eof tokens as we like.
|
||||
for (auto i = 0; i < 10; i++) {
|
||||
auto eof_token = lexer.next();
|
||||
EXPECT_EQ(eof_token.type(), JS::TokenType::Eof);
|
||||
}
|
||||
}
|
|
@ -145,7 +145,7 @@ void Lexer::consume()
|
|||
return false;
|
||||
m_eof = true;
|
||||
m_current_char = '\0';
|
||||
m_position++;
|
||||
m_position = m_source.length() + 1;
|
||||
m_line_column++;
|
||||
return true;
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue