From f8995d37a2bf71e98e273f4aa0764d9c022ac127 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Mon, 14 Oct 2024 16:18:57 +0100 Subject: [PATCH] LibWeb/CSS: Tokenize comments as whitespace tokens This is in a weird position where the spec tells us to discard the comments, but then we have to preserve the original source text which may include comments. As a compromise, I'm treating each comment as a whitespace token - comments are functionally equivalent to whitespace so this should not have any behaviour changes beyond preserving the original text. --- Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index d0728dac65c..443fc93af50 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -1064,11 +1064,20 @@ Token Tokenizer::consume_a_token() // This section describes how to consume a token from a stream of code points. // It will return a single token of any type. + auto start_byte_offset = current_byte_offset(); + // Consume comments. consume_comments(); + // AD-HOC: Preserve comments as whitespace tokens, for serializing custom properties. + auto after_comments_byte_offset = current_byte_offset(); + if (after_comments_byte_offset != start_byte_offset) { + auto token = create_new_token(Token::Type::Whitespace); + token.m_original_source_text = input_since(start_byte_offset); + return token; + } + // Consume the next input code point. - auto start_byte_offset = current_byte_offset(); auto input = next_code_point(); // whitespace