mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
AK/GenericLexer: constexpr where possible
Problem: - Much of the `GenericLexer` can be `constexpr`, but is not. Solution: - Make it `constexpr` and de-duplicate code. - Extend some of `StringView` with `constexpr` to support. - Add tests to ensure `constexpr` behavior. Note: - Construction of `StringView` from pointer and length is not `constexpr`-compatible at the moment because the VERIFY cannot be, yet.
This commit is contained in:
parent
c2280a907d
commit
254e010c75
Notes:
sideshowbarker
2024-07-18 19:14:28 +09:00
Author: https://github.com/ldm5180 Commit: https://github.com/SerenityOS/serenity/commit/254e010c754 Pull-request: https://github.com/SerenityOS/serenity/pull/6553 Reviewed-by: https://github.com/linusg
6 changed files with 303 additions and 188 deletions
|
@ -10,92 +10,6 @@
|
|||
#include <AK/StringBuilder.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
GenericLexer::GenericLexer(const StringView& input)
|
||||
: m_input(input)
|
||||
{
|
||||
}
|
||||
|
||||
GenericLexer::~GenericLexer()
|
||||
{
|
||||
}
|
||||
|
||||
// Tells whether the parser's index has reached input's end
|
||||
bool GenericLexer::is_eof() const
|
||||
{
|
||||
return m_index >= m_input.length();
|
||||
}
|
||||
|
||||
// Returns the current character at the parser index, plus `offset` if specified
|
||||
char GenericLexer::peek(size_t offset) const
|
||||
{
|
||||
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
|
||||
}
|
||||
|
||||
// Tests the next character in the input
|
||||
bool GenericLexer::next_is(char expected) const
|
||||
{
|
||||
return peek() == expected;
|
||||
}
|
||||
|
||||
// Tests if the `expected` string comes next in the input
|
||||
bool GenericLexer::next_is(StringView expected) const
|
||||
{
|
||||
for (size_t i = 0; i < expected.length(); ++i)
|
||||
if (peek(i) != expected[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Tests if the `expected` string comes next in the input
|
||||
bool GenericLexer::next_is(const char* expected) const
|
||||
{
|
||||
for (size_t i = 0; expected[i] != '\0'; ++i)
|
||||
if (peek(i) != expected[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Go back to the previous character
|
||||
void GenericLexer::retreat()
|
||||
{
|
||||
VERIFY(m_index > 0);
|
||||
m_index--;
|
||||
}
|
||||
|
||||
// Consume a character and advance the parser index
|
||||
char GenericLexer::consume()
|
||||
{
|
||||
VERIFY(!is_eof());
|
||||
return m_input[m_index++];
|
||||
}
|
||||
|
||||
// Consume the given character if it is next in the input
|
||||
bool GenericLexer::consume_specific(char specific)
|
||||
{
|
||||
if (peek() != specific)
|
||||
return false;
|
||||
|
||||
ignore();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Consume the given string if it is next in the input
|
||||
bool GenericLexer::consume_specific(StringView str)
|
||||
{
|
||||
if (!next_is(str))
|
||||
return false;
|
||||
|
||||
ignore(str.length());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Consume the given string if it is next in the input
|
||||
bool GenericLexer::consume_specific(const char* str)
|
||||
{
|
||||
return consume_specific(StringView(str));
|
||||
}
|
||||
|
||||
// Consume a number of characters
|
||||
StringView GenericLexer::consume(size_t count)
|
||||
{
|
||||
|
@ -214,46 +128,4 @@ String GenericLexer::consume_and_unescape_string(char escape_char)
|
|||
return builder.to_string();
|
||||
}
|
||||
|
||||
char GenericLexer::consume_escaped_character(char escape_char, const StringView& escape_map)
|
||||
{
|
||||
if (!consume_specific(escape_char))
|
||||
return consume();
|
||||
|
||||
auto c = consume();
|
||||
|
||||
for (size_t i = 0; i < escape_map.length(); i += 2) {
|
||||
if (c == escape_map[i])
|
||||
return escape_map[i + 1];
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
// Ignore a number of characters (1 by default)
|
||||
void GenericLexer::ignore(size_t count)
|
||||
{
|
||||
count = min(count, m_input.length() - m_index);
|
||||
m_index += count;
|
||||
}
|
||||
|
||||
// Ignore characters until `stop` is peek'd
|
||||
// The `stop` character is ignored as it is user-defined
|
||||
void GenericLexer::ignore_until(char stop)
|
||||
{
|
||||
while (!is_eof() && peek() != stop)
|
||||
m_index++;
|
||||
|
||||
ignore();
|
||||
}
|
||||
|
||||
// Ignore characters until the string `stop` is found
|
||||
// The `stop` string is ignored, as it is user-defined
|
||||
void GenericLexer::ignore_until(const char* stop)
|
||||
{
|
||||
while (!is_eof() && !next_is(stop))
|
||||
m_index++;
|
||||
|
||||
ignore(__builtin_strlen(stop));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -12,29 +12,95 @@ namespace AK {
|
|||
|
||||
class GenericLexer {
|
||||
public:
|
||||
explicit GenericLexer(const StringView& input);
|
||||
virtual ~GenericLexer();
|
||||
constexpr explicit GenericLexer(const StringView& input)
|
||||
: m_input(input)
|
||||
{
|
||||
}
|
||||
|
||||
size_t tell() const { return m_index; }
|
||||
size_t tell_remaining() const { return m_input.length() - m_index; }
|
||||
constexpr size_t tell() const { return m_index; }
|
||||
constexpr size_t tell_remaining() const { return m_input.length() - m_index; }
|
||||
|
||||
StringView remaining() const { return m_input.substring_view(m_index); }
|
||||
|
||||
bool is_eof() const;
|
||||
constexpr bool is_eof() const { return m_index >= m_input.length(); }
|
||||
|
||||
char peek(size_t offset = 0) const;
|
||||
constexpr char peek(size_t offset = 0) const
|
||||
{
|
||||
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
|
||||
}
|
||||
|
||||
bool next_is(char) const;
|
||||
bool next_is(StringView) const;
|
||||
bool next_is(const char*) const;
|
||||
constexpr bool next_is(char expected) const
|
||||
{
|
||||
return peek() == expected;
|
||||
}
|
||||
|
||||
void retreat();
|
||||
constexpr bool next_is(StringView expected) const
|
||||
{
|
||||
for (size_t i = 0; i < expected.length(); ++i)
|
||||
if (peek(i) != expected[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
constexpr bool next_is(const char* expected) const
|
||||
{
|
||||
for (size_t i = 0; expected[i] != '\0'; ++i)
|
||||
if (peek(i) != expected[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
constexpr void retreat()
|
||||
{
|
||||
VERIFY(m_index > 0);
|
||||
--m_index;
|
||||
}
|
||||
|
||||
constexpr char consume()
|
||||
{
|
||||
VERIFY(!is_eof());
|
||||
return m_input[m_index++];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
constexpr bool consume_specific(const T& next)
|
||||
{
|
||||
if (!next_is(next))
|
||||
return false;
|
||||
|
||||
if constexpr (requires { next.length(); }) {
|
||||
ignore(next.length());
|
||||
} else {
|
||||
ignore(sizeof(next));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool consume_specific(const String& next)
|
||||
{
|
||||
return consume_specific(StringView { next });
|
||||
}
|
||||
|
||||
constexpr bool consume_specific(const char* next)
|
||||
{
|
||||
return consume_specific(StringView { next });
|
||||
}
|
||||
|
||||
constexpr char consume_escaped_character(char escape_char = '\\', const StringView& escape_map = "n\nr\rt\tb\bf\f")
|
||||
{
|
||||
if (!consume_specific(escape_char))
|
||||
return consume();
|
||||
|
||||
auto c = consume();
|
||||
|
||||
for (size_t i = 0; i < escape_map.length(); i += 2) {
|
||||
if (c == escape_map[i])
|
||||
return escape_map[i + 1];
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
char consume();
|
||||
bool consume_specific(char);
|
||||
bool consume_specific(StringView);
|
||||
bool consume_specific(const char*);
|
||||
char consume_escaped_character(char escape_char = '\\', const StringView& escape_map = "n\nr\rt\tb\bf\f");
|
||||
StringView consume(size_t count);
|
||||
StringView consume_all();
|
||||
StringView consume_line();
|
||||
|
@ -43,9 +109,27 @@ public:
|
|||
StringView consume_quoted_string(char escape_char = 0);
|
||||
String consume_and_unescape_string(char escape_char = '\\');
|
||||
|
||||
void ignore(size_t count = 1);
|
||||
void ignore_until(char);
|
||||
void ignore_until(const char*);
|
||||
constexpr void ignore(size_t count = 1)
|
||||
{
|
||||
count = min(count, m_input.length() - m_index);
|
||||
m_index += count;
|
||||
}
|
||||
|
||||
constexpr void ignore_until(char stop)
|
||||
{
|
||||
while (!is_eof() && peek() != stop) {
|
||||
++m_index;
|
||||
}
|
||||
ignore();
|
||||
}
|
||||
|
||||
constexpr void ignore_until(const char* stop)
|
||||
{
|
||||
while (!is_eof() && !next_is(stop)) {
|
||||
++m_index;
|
||||
}
|
||||
ignore(__builtin_strlen(stop));
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditions are used to match arbitrary characters. You can use lambdas,
|
||||
|
@ -57,19 +141,19 @@ public:
|
|||
*/
|
||||
|
||||
// Test the next character against a Condition
|
||||
template<typename C>
|
||||
bool next_is(C condition) const
|
||||
template<typename TPredicate>
|
||||
constexpr bool next_is(TPredicate pred) const
|
||||
{
|
||||
return condition(peek());
|
||||
return pred(peek());
|
||||
}
|
||||
|
||||
// Consume and return characters while `condition` returns true
|
||||
template<typename C>
|
||||
StringView consume_while(C condition)
|
||||
// Consume and return characters while `pred` returns true
|
||||
template<typename TPredicate>
|
||||
StringView consume_while(TPredicate pred)
|
||||
{
|
||||
size_t start = m_index;
|
||||
while (!is_eof() && condition(peek()))
|
||||
m_index++;
|
||||
while (!is_eof() && pred(peek()))
|
||||
++m_index;
|
||||
size_t length = m_index - start;
|
||||
|
||||
if (length == 0)
|
||||
|
@ -77,13 +161,13 @@ public:
|
|||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Consume and return characters until `condition` return true
|
||||
template<typename C>
|
||||
StringView consume_until(C condition)
|
||||
// Consume and return characters until `pred` return true
|
||||
template<typename TPredicate>
|
||||
StringView consume_until(TPredicate pred)
|
||||
{
|
||||
size_t start = m_index;
|
||||
while (!is_eof() && !condition(peek()))
|
||||
m_index++;
|
||||
while (!is_eof() && !pred(peek()))
|
||||
++m_index;
|
||||
size_t length = m_index - start;
|
||||
|
||||
if (length == 0)
|
||||
|
@ -91,21 +175,21 @@ public:
|
|||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Ignore characters while `condition` returns true
|
||||
template<typename C>
|
||||
void ignore_while(C condition)
|
||||
// Ignore characters while `pred` returns true
|
||||
template<typename TPredicate>
|
||||
constexpr void ignore_while(TPredicate pred)
|
||||
{
|
||||
while (!is_eof() && condition(peek()))
|
||||
m_index++;
|
||||
while (!is_eof() && pred(peek()))
|
||||
++m_index;
|
||||
}
|
||||
|
||||
// Ignore characters until `condition` return true
|
||||
// Ignore characters until `pred` return true
|
||||
// We don't skip the stop character as it may not be a unique value
|
||||
template<typename C>
|
||||
void ignore_until(C condition)
|
||||
template<typename TPredicate>
|
||||
constexpr void ignore_until(TPredicate pred)
|
||||
{
|
||||
while (!is_eof() && !condition(peek()))
|
||||
m_index++;
|
||||
while (!is_eof() && !pred(peek()))
|
||||
++m_index;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
|
@ -172,17 +172,6 @@ bool StringView::equals_ignoring_case(const StringView& other) const
|
|||
return StringUtils::equals_ignoring_case(*this, other);
|
||||
}
|
||||
|
||||
StringView StringView::substring_view(size_t start, size_t length) const
|
||||
{
|
||||
VERIFY(start + length <= m_length);
|
||||
return { m_characters + start, length };
|
||||
}
|
||||
StringView StringView::substring_view(size_t start) const
|
||||
{
|
||||
VERIFY(start <= m_length);
|
||||
return { m_characters + start, length() - start };
|
||||
}
|
||||
|
||||
StringView StringView::substring_view_starting_from_substring(const StringView& substring) const
|
||||
{
|
||||
const char* remaining_characters = substring.characters_without_null_termination();
|
||||
|
|
|
@ -46,15 +46,15 @@ public:
|
|||
StringView(const String&);
|
||||
StringView(const FlyString&);
|
||||
|
||||
[[nodiscard]] bool is_null() const { return !m_characters; }
|
||||
[[nodiscard]] bool is_empty() const { return m_length == 0; }
|
||||
[[nodiscard]] constexpr bool is_null() const { return !m_characters; }
|
||||
[[nodiscard]] constexpr bool is_empty() const { return m_length == 0; }
|
||||
|
||||
[[nodiscard]] const char* characters_without_null_termination() const { return m_characters; }
|
||||
[[nodiscard]] size_t length() const { return m_length; }
|
||||
[[nodiscard]] constexpr size_t length() const { return m_length; }
|
||||
|
||||
[[nodiscard]] ReadonlyBytes bytes() const { return { m_characters, m_length }; }
|
||||
|
||||
const char& operator[](size_t index) const { return m_characters[index]; }
|
||||
constexpr const char& operator[](size_t index) const { return m_characters[index]; }
|
||||
|
||||
using ConstIterator = SimpleIterator<const StringView, const char>;
|
||||
|
||||
|
@ -84,8 +84,17 @@ public:
|
|||
Optional<size_t> find(const StringView&) const;
|
||||
Optional<size_t> find(char c) const;
|
||||
|
||||
[[nodiscard]] StringView substring_view(size_t start, size_t length) const;
|
||||
[[nodiscard]] StringView substring_view(size_t start) const;
|
||||
[[nodiscard]] constexpr StringView substring_view(size_t start, size_t length) const
|
||||
{
|
||||
VERIFY(start + length <= m_length);
|
||||
return { m_characters + start, length };
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr StringView substring_view(size_t start) const
|
||||
{
|
||||
return substring_view(start, length() - start);
|
||||
}
|
||||
|
||||
[[nodiscard]] Vector<StringView> split_view(char, bool keep_empty = false) const;
|
||||
[[nodiscard]] Vector<StringView> split_view(const StringView&, bool keep_empty = false) const;
|
||||
|
||||
|
@ -166,7 +175,7 @@ public:
|
|||
|
||||
bool operator==(const String&) const;
|
||||
|
||||
bool operator==(const StringView& other) const
|
||||
constexpr bool operator==(const StringView& other) const
|
||||
{
|
||||
if (is_null())
|
||||
return other.is_null();
|
||||
|
@ -177,7 +186,7 @@ public:
|
|||
return !__builtin_memcmp(m_characters, other.m_characters, m_length);
|
||||
}
|
||||
|
||||
bool operator!=(const StringView& other) const
|
||||
constexpr bool operator!=(const StringView& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ set(AK_TEST_SOURCES
|
|||
TestEnumBits.cpp
|
||||
TestFind.cpp
|
||||
TestFormat.cpp
|
||||
TestGenericLexer.cpp
|
||||
TestHashFunctions.cpp
|
||||
TestHashMap.cpp
|
||||
TestHashTable.cpp
|
||||
|
|
160
AK/Tests/TestGenericLexer.cpp
Normal file
160
AK/Tests/TestGenericLexer.cpp
Normal file
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Copyright (c) 2021, the SerenityOS developers.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/TestSuite.h>
|
||||
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/StringView.h>
|
||||
|
||||
TEST_CASE(should_constexpr_construct_from_empty_string_view)
|
||||
{
|
||||
constexpr GenericLexer sut(StringView {});
|
||||
static_assert(sut.is_eof());
|
||||
}
|
||||
|
||||
TEST_CASE(should_construct_from_string_view)
|
||||
{
|
||||
constexpr GenericLexer sut(StringView { "abcdef" });
|
||||
static_assert(!sut.is_eof());
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_tell)
|
||||
{
|
||||
constexpr GenericLexer sut(StringView { "abcdef" });
|
||||
static_assert(sut.tell() == 0);
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_tell_remaining)
|
||||
{
|
||||
constexpr GenericLexer sut(StringView { "abcdef" });
|
||||
static_assert(sut.tell_remaining() == 6);
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_peek)
|
||||
{
|
||||
constexpr GenericLexer sut(StringView { "abcdef" });
|
||||
static_assert(sut.peek() == 'a');
|
||||
static_assert(sut.peek(2) == 'c');
|
||||
static_assert(sut.peek(100) == '\0');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_next_is)
|
||||
{
|
||||
constexpr GenericLexer sut(StringView { "abcdef" });
|
||||
static_assert(sut.next_is('a'));
|
||||
static_assert(sut.next_is("abc"));
|
||||
static_assert(sut.next_is(StringView { "abc" }));
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_retreat)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.consume();
|
||||
sut.retreat();
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'a');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_consume_1)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.consume();
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_consume_specific_char)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.consume_specific('a');
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_consume_specific_string_view)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.consume_specific(StringView { "ab" });
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'c');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_consume_specific_cstring)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.consume_specific("abcd");
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'e');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_ignore_until)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.ignore_until('d');
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'e');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_ignore_until_cstring)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.ignore_until("cde");
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'f');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_next_is_pred)
|
||||
{
|
||||
constexpr auto pred = [](auto c) {
|
||||
return c == 'a';
|
||||
};
|
||||
constexpr GenericLexer sut(StringView { "abcdef" });
|
||||
static_assert(sut.next_is(pred));
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_ignore_while_pred)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
constexpr auto pred = [](auto c) {
|
||||
return c == 'a';
|
||||
};
|
||||
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.ignore_while(pred);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'b');
|
||||
}
|
||||
|
||||
TEST_CASE(should_constexpr_ignore_until_pred)
|
||||
{
|
||||
constexpr auto sut = [] {
|
||||
constexpr auto pred = [](auto c) {
|
||||
return c == 'c';
|
||||
};
|
||||
|
||||
GenericLexer sut(StringView { "abcdef" });
|
||||
sut.ignore_until(pred);
|
||||
return sut;
|
||||
}();
|
||||
static_assert(sut.peek() == 'c');
|
||||
}
|
||||
|
||||
TEST_MAIN(GenericLexer)
|
Loading…
Reference in a new issue