LibWeb: Fix assertion failure when tokenizing JS regex literals

This fixes parsing the following regular expression: /</g;

It also adds a simple script element to the HTMLTokenizer regression
test, which also contains that specific regex.
This commit is contained in:
Max Wipfli 2021-07-15 01:25:34 +02:00 committed by Andreas Kling
parent c82c652ee4
commit 2404ad6897
Notes: sideshowbarker 2024-07-18 09:00:07 +09:00
3 changed files with 10 additions and 1 deletions

View file

@ -193,5 +193,5 @@ TEST_CASE(regression)
auto file_contents = file.value()->read_all(); auto file_contents = file.value()->read_all();
auto tokens = run_tokenizer(file_contents); auto tokens = run_tokenizer(file_contents);
u32 hash = hash_tokens(tokens); u32 hash = hash_tokens(tokens);
EXPECT_EQ(hash, 2891738465u); EXPECT_EQ(hash, 2203864459u);
} }

View file

@ -3,6 +3,13 @@
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<title>This is a test page :^)</title> <title>This is a test page :^)</title>
<script>
let foo = 2;
var bar = 3;
if (foo < bar)
alert("Check happens with HTML special <characters>.");
let regex = /</g;
</script>
</head> </head>
<body> <body>
<p>This is the first paragraph.</p> <p>This is the first paragraph.</p>

View file

@ -2561,6 +2561,8 @@ _StartOfFunction:
{ {
m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/')); m_queued_tokens.enqueue(HTMLToken::make_character('/'));
// NOTE: The spec doesn't mention this, but it seems that m_current_token (an end tag) is just dropped in this case.
m_current_builder.clear();
for (auto code_point : m_temporary_buffer) for (auto code_point : m_temporary_buffer)
m_queued_tokens.enqueue(HTMLToken::make_character(code_point)); m_queued_tokens.enqueue(HTMLToken::make_character(code_point));
RECONSUME_IN(ScriptData); RECONSUME_IN(ScriptData);