ladybird/Tests/LibWeb/TestCSSTokenStream.cpp
Sam Atkins b645e26e9b LibWeb/CSS: Bring TokenStream in line with spec
When the TokenStream code was originally written, there was no such
concept in the CSS Syntax spec. But since then, it's been officially
added, (https://drafts.csswg.org/css-syntax/#css-token-stream) and the
parsing algorithms are described in terms of it. This patch brings our
implementation in line with the spec. A few deprecated TokenStream
methods are left around until their users are also updated to match the
newer spec.

There are a few differences:

- They name things differently. The main confusing one is we had
  `next_token()` which consumed a token and returned it, but the spec
  has a `next_token()` which peeks the next token. The spec names are
  honestly better than what I'd come up with. (`discard_a_token()` is a
  nice addition too!)

- We used to store the index of the token that was just consumed, and
  they instead store the index of the token that will be consumed next.
  This is a perfect breeding ground for off-by-one errors, so I've
  finally added a test suite for TokenStream itself.

- We use a transaction system for rewinding, and the spec uses a stack
  of "marks", which can be manually rewound to. These should be able to
  coexist as long as we stick with marks in the parser spec algorithms,
  and stick with transactions elsewhere.
2024-10-09 17:29:29 +01:00

98 lines
2.5 KiB
C++

/*
* Copyright (c) 2024, Sam Atkins <sam@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/FlyString.h>
#include <AK/Vector.h>
#include <LibTest/TestCase.h>
#include <LibWeb/CSS/Parser/TokenStream.h>
namespace Web::CSS::Parser {
TEST_CASE(basic)
{
Vector<Token> tokens {
Token::create_ident("hello"_fly_string),
};
TokenStream stream { tokens };
EXPECT(!stream.is_empty());
EXPECT(stream.has_next_token());
EXPECT_EQ(stream.remaining_token_count(), 1u);
// next_token() doesn't consume it
auto const& next = stream.next_token();
EXPECT(!stream.is_empty());
EXPECT(stream.has_next_token());
EXPECT_EQ(stream.remaining_token_count(), 1u);
// Check what the token is
EXPECT(next.is(Token::Type::Ident));
EXPECT_EQ(next.ident(), "hello"_fly_string);
// consume_a_token() does consume it
auto const& consumed = stream.consume_a_token();
EXPECT(stream.is_empty());
EXPECT(!stream.has_next_token());
EXPECT_EQ(stream.remaining_token_count(), 0u);
// Check what the token is
EXPECT(consumed.is(Token::Type::Ident));
EXPECT_EQ(consumed.ident(), "hello"_fly_string);
// Now, any further tokens should be EOF
EXPECT(stream.next_token().is(Token::Type::EndOfFile));
EXPECT(stream.consume_a_token().is(Token::Type::EndOfFile));
}
TEST_CASE(marks)
{
Vector<Token> tokens {
Token::create_ident("a"_fly_string),
Token::create_ident("b"_fly_string),
Token::create_ident("c"_fly_string),
Token::create_ident("d"_fly_string),
Token::create_ident("e"_fly_string),
Token::create_ident("f"_fly_string),
Token::create_ident("g"_fly_string),
};
TokenStream stream { tokens };
stream.mark(); // 0
EXPECT_EQ(stream.remaining_token_count(), 7u);
stream.discard_a_token();
stream.discard_a_token();
stream.discard_a_token();
EXPECT_EQ(stream.remaining_token_count(), 4u);
stream.mark(); // 3
stream.discard_a_token();
EXPECT_EQ(stream.remaining_token_count(), 3u);
stream.restore_a_mark(); // Back to 3
EXPECT_EQ(stream.remaining_token_count(), 4u);
stream.discard_a_token();
stream.discard_a_token();
stream.discard_a_token();
EXPECT_EQ(stream.remaining_token_count(), 1u);
stream.mark(); // 6
stream.discard_a_mark();
EXPECT_EQ(stream.remaining_token_count(), 1u);
stream.restore_a_mark(); // Back to 0
EXPECT_EQ(stream.remaining_token_count(), 7u);
}
}