From f8995d37a2bf71e98e273f4aa0764d9c022ac127 Mon Sep 17 00:00:00 2001
From: Sam Atkins <sam@ladybird.org>
Date: Mon, 14 Oct 2024 16:18:57 +0100
Subject: [PATCH] LibWeb/CSS: Tokenize comments as whitespace tokens

This is in a weird position where the spec tells us to discard the
comments, but then we have to preserve the original source text which
may include comments. As a compromise, I'm treating each comment as a
whitespace token - comments are functionally equivalent to whitespace
so this should not have any behaviour changes beyond preserving the
original text.
---
 Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp
index d0728dac65c..443fc93af50 100644
--- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp
+++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp
@@ -1064,11 +1064,20 @@ Token Tokenizer::consume_a_token()
     // This section describes how to consume a token from a stream of code points.
     // It will return a single token of any type.
 
+    auto start_byte_offset = current_byte_offset();
+
     // Consume comments.
     consume_comments();
 
+    // AD-HOC: Preserve comments as whitespace tokens, for serializing custom properties.
+    auto after_comments_byte_offset = current_byte_offset();
+    if (after_comments_byte_offset != start_byte_offset) {
+        auto token = create_new_token(Token::Type::Whitespace);
+        token.m_original_source_text = input_since(start_byte_offset);
+        return token;
+    }
+
     // Consume the next input code point.
-    auto start_byte_offset = current_byte_offset();
     auto input = next_code_point();
 
     // whitespace