diff --git a/AK/GenericLexer.cpp b/AK/GenericLexer.cpp index 559c15a43c0..2cbabf3d3e3 100644 --- a/AK/GenericLexer.cpp +++ b/AK/GenericLexer.cpp @@ -10,92 +10,6 @@ #include namespace AK { - -GenericLexer::GenericLexer(const StringView& input) - : m_input(input) -{ -} - -GenericLexer::~GenericLexer() -{ -} - -// Tells whether the parser's index has reached input's end -bool GenericLexer::is_eof() const -{ - return m_index >= m_input.length(); -} - -// Returns the current character at the parser index, plus `offset` if specified -char GenericLexer::peek(size_t offset) const -{ - return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0'; -} - -// Tests the next character in the input -bool GenericLexer::next_is(char expected) const -{ - return peek() == expected; -} - -// Tests if the `expected` string comes next in the input -bool GenericLexer::next_is(StringView expected) const -{ - for (size_t i = 0; i < expected.length(); ++i) - if (peek(i) != expected[i]) - return false; - return true; -} - -// Tests if the `expected` string comes next in the input -bool GenericLexer::next_is(const char* expected) const -{ - for (size_t i = 0; expected[i] != '\0'; ++i) - if (peek(i) != expected[i]) - return false; - return true; -} - -// Go back to the previous character -void GenericLexer::retreat() -{ - VERIFY(m_index > 0); - m_index--; -} - -// Consume a character and advance the parser index -char GenericLexer::consume() -{ - VERIFY(!is_eof()); - return m_input[m_index++]; -} - -// Consume the given character if it is next in the input -bool GenericLexer::consume_specific(char specific) -{ - if (peek() != specific) - return false; - - ignore(); - return true; -} - -// Consume the given string if it is next in the input -bool GenericLexer::consume_specific(StringView str) -{ - if (!next_is(str)) - return false; - - ignore(str.length()); - return true; -} - -// Consume the given string if it is next in the input -bool GenericLexer::consume_specific(const char* str) -{ - return consume_specific(StringView(str)); -} - // Consume a number of characters StringView GenericLexer::consume(size_t count) { @@ -214,46 +128,4 @@ String GenericLexer::consume_and_unescape_string(char escape_char) return builder.to_string(); } -char GenericLexer::consume_escaped_character(char escape_char, const StringView& escape_map) -{ - if (!consume_specific(escape_char)) - return consume(); - - auto c = consume(); - - for (size_t i = 0; i < escape_map.length(); i += 2) { - if (c == escape_map[i]) - return escape_map[i + 1]; - } - - return c; -} - -// Ignore a number of characters (1 by default) -void GenericLexer::ignore(size_t count) -{ - count = min(count, m_input.length() - m_index); - m_index += count; -} - -// Ignore characters until `stop` is peek'd -// The `stop` character is ignored as it is user-defined -void GenericLexer::ignore_until(char stop) -{ - while (!is_eof() && peek() != stop) - m_index++; - - ignore(); -} - -// Ignore characters until the string `stop` is found -// The `stop` string is ignored, as it is user-defined -void GenericLexer::ignore_until(const char* stop) -{ - while (!is_eof() && !next_is(stop)) - m_index++; - - ignore(__builtin_strlen(stop)); -} - } diff --git a/AK/GenericLexer.h b/AK/GenericLexer.h index 4499c0e14b4..131c0b24dd8 100644 --- a/AK/GenericLexer.h +++ b/AK/GenericLexer.h @@ -12,29 +12,95 @@ namespace AK { class GenericLexer { public: - explicit GenericLexer(const StringView& input); - virtual ~GenericLexer(); + constexpr explicit GenericLexer(const StringView& input) + : m_input(input) + { + } - size_t tell() const { return m_index; } - size_t tell_remaining() const { return m_input.length() - m_index; } + constexpr size_t tell() const { return m_index; } + constexpr size_t tell_remaining() const { return m_input.length() - m_index; } StringView remaining() const { return m_input.substring_view(m_index); } - bool is_eof() const; + constexpr bool is_eof() const { return m_index >= m_input.length(); } - char peek(size_t offset = 0) const; + constexpr char peek(size_t offset = 0) const + { + return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0'; + } - bool next_is(char) const; - bool next_is(StringView) const; - bool next_is(const char*) const; + constexpr bool next_is(char expected) const + { + return peek() == expected; + } - void retreat(); + constexpr bool next_is(StringView expected) const + { + for (size_t i = 0; i < expected.length(); ++i) + if (peek(i) != expected[i]) + return false; + return true; + } + + constexpr bool next_is(const char* expected) const + { + for (size_t i = 0; expected[i] != '\0'; ++i) + if (peek(i) != expected[i]) + return false; + return true; + } + + constexpr void retreat() + { + VERIFY(m_index > 0); + --m_index; + } + + constexpr char consume() + { + VERIFY(!is_eof()); + return m_input[m_index++]; + } + + template + constexpr bool consume_specific(const T& next) + { + if (!next_is(next)) + return false; + + if constexpr (requires { next.length(); }) { + ignore(next.length()); + } else { + ignore(sizeof(next)); + } + return true; + } + + bool consume_specific(const String& next) + { + return consume_specific(StringView { next }); + } + + constexpr bool consume_specific(const char* next) + { + return consume_specific(StringView { next }); + } + + constexpr char consume_escaped_character(char escape_char = '\\', const StringView& escape_map = "n\nr\rt\tb\bf\f") + { + if (!consume_specific(escape_char)) + return consume(); + + auto c = consume(); + + for (size_t i = 0; i < escape_map.length(); i += 2) { + if (c == escape_map[i]) + return escape_map[i + 1]; + } + + return c; + } - char consume(); - bool consume_specific(char); - bool consume_specific(StringView); - bool consume_specific(const char*); - char consume_escaped_character(char escape_char = '\\', const StringView& escape_map = "n\nr\rt\tb\bf\f"); StringView consume(size_t count); StringView consume_all(); StringView consume_line(); @@ -43,9 +109,27 @@ public: StringView consume_quoted_string(char escape_char = 0); String consume_and_unescape_string(char escape_char = '\\'); - void ignore(size_t count = 1); - void ignore_until(char); - void ignore_until(const char*); + constexpr void ignore(size_t count = 1) + { + count = min(count, m_input.length() - m_index); + m_index += count; + } + + constexpr void ignore_until(char stop) + { + while (!is_eof() && peek() != stop) { + ++m_index; + } + ignore(); + } + + constexpr void ignore_until(const char* stop) + { + while (!is_eof() && !next_is(stop)) { + ++m_index; + } + ignore(__builtin_strlen(stop)); + } /* * Conditions are used to match arbitrary characters. You can use lambdas, @@ -57,19 +141,19 @@ public: */ // Test the next character against a Condition - template - bool next_is(C condition) const + template + constexpr bool next_is(TPredicate pred) const { - return condition(peek()); + return pred(peek()); } - // Consume and return characters while `condition` returns true - template - StringView consume_while(C condition) + // Consume and return characters while `pred` returns true + template + StringView consume_while(TPredicate pred) { size_t start = m_index; - while (!is_eof() && condition(peek())) - m_index++; + while (!is_eof() && pred(peek())) + ++m_index; size_t length = m_index - start; if (length == 0) @@ -77,13 +161,13 @@ public: return m_input.substring_view(start, length); } - // Consume and return characters until `condition` return true - template - StringView consume_until(C condition) + // Consume and return characters until `pred` return true + template + StringView consume_until(TPredicate pred) { size_t start = m_index; - while (!is_eof() && !condition(peek())) - m_index++; + while (!is_eof() && !pred(peek())) + ++m_index; size_t length = m_index - start; if (length == 0) @@ -91,21 +175,21 @@ public: return m_input.substring_view(start, length); } - // Ignore characters while `condition` returns true - template - void ignore_while(C condition) + // Ignore characters while `pred` returns true + template + constexpr void ignore_while(TPredicate pred) { - while (!is_eof() && condition(peek())) - m_index++; + while (!is_eof() && pred(peek())) + ++m_index; } - // Ignore characters until `condition` return true + // Ignore characters until `pred` return true // We don't skip the stop character as it may not be a unique value - template - void ignore_until(C condition) + template + constexpr void ignore_until(TPredicate pred) { - while (!is_eof() && !condition(peek())) - m_index++; + while (!is_eof() && !pred(peek())) + ++m_index; } protected: diff --git a/AK/StringView.cpp b/AK/StringView.cpp index 63c7b0dc2e4..3f8e283f7c9 100644 --- a/AK/StringView.cpp +++ b/AK/StringView.cpp @@ -172,17 +172,6 @@ bool StringView::equals_ignoring_case(const StringView& other) const return StringUtils::equals_ignoring_case(*this, other); } -StringView StringView::substring_view(size_t start, size_t length) const -{ - VERIFY(start + length <= m_length); - return { m_characters + start, length }; -} -StringView StringView::substring_view(size_t start) const -{ - VERIFY(start <= m_length); - return { m_characters + start, length() - start }; -} - StringView StringView::substring_view_starting_from_substring(const StringView& substring) const { const char* remaining_characters = substring.characters_without_null_termination(); diff --git a/AK/StringView.h b/AK/StringView.h index bbcf09db6e4..f32de9c8ccf 100644 --- a/AK/StringView.h +++ b/AK/StringView.h @@ -46,15 +46,15 @@ public: StringView(const String&); StringView(const FlyString&); - [[nodiscard]] bool is_null() const { return !m_characters; } - [[nodiscard]] bool is_empty() const { return m_length == 0; } + [[nodiscard]] constexpr bool is_null() const { return !m_characters; } + [[nodiscard]] constexpr bool is_empty() const { return m_length == 0; } [[nodiscard]] const char* characters_without_null_termination() const { return m_characters; } - [[nodiscard]] size_t length() const { return m_length; } + [[nodiscard]] constexpr size_t length() const { return m_length; } [[nodiscard]] ReadonlyBytes bytes() const { return { m_characters, m_length }; } - const char& operator[](size_t index) const { return m_characters[index]; } + constexpr const char& operator[](size_t index) const { return m_characters[index]; } using ConstIterator = SimpleIterator; @@ -84,8 +84,17 @@ public: Optional find(const StringView&) const; Optional find(char c) const; - [[nodiscard]] StringView substring_view(size_t start, size_t length) const; - [[nodiscard]] StringView substring_view(size_t start) const; + [[nodiscard]] constexpr StringView substring_view(size_t start, size_t length) const + { + VERIFY(start + length <= m_length); + return { m_characters + start, length }; + } + + [[nodiscard]] constexpr StringView substring_view(size_t start) const + { + return substring_view(start, length() - start); + } + [[nodiscard]] Vector split_view(char, bool keep_empty = false) const; [[nodiscard]] Vector split_view(const StringView&, bool keep_empty = false) const; @@ -166,7 +175,7 @@ public: bool operator==(const String&) const; - bool operator==(const StringView& other) const + constexpr bool operator==(const StringView& other) const { if (is_null()) return other.is_null(); @@ -177,7 +186,7 @@ public: return !__builtin_memcmp(m_characters, other.m_characters, m_length); } - bool operator!=(const StringView& other) const + constexpr bool operator!=(const StringView& other) const { return !(*this == other); } diff --git a/AK/Tests/CMakeLists.txt b/AK/Tests/CMakeLists.txt index ef4ceac2f76..07c5e99afd5 100644 --- a/AK/Tests/CMakeLists.txt +++ b/AK/Tests/CMakeLists.txt @@ -21,6 +21,7 @@ set(AK_TEST_SOURCES TestEnumBits.cpp TestFind.cpp TestFormat.cpp + TestGenericLexer.cpp TestHashFunctions.cpp TestHashMap.cpp TestHashTable.cpp diff --git a/AK/Tests/TestGenericLexer.cpp b/AK/Tests/TestGenericLexer.cpp new file mode 100644 index 00000000000..6b5d09bef29 --- /dev/null +++ b/AK/Tests/TestGenericLexer.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2021, the SerenityOS developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#include +#include + +TEST_CASE(should_constexpr_construct_from_empty_string_view) +{ + constexpr GenericLexer sut(StringView {}); + static_assert(sut.is_eof()); +} + +TEST_CASE(should_construct_from_string_view) +{ + constexpr GenericLexer sut(StringView { "abcdef" }); + static_assert(!sut.is_eof()); +} + +TEST_CASE(should_constexpr_tell) +{ + constexpr GenericLexer sut(StringView { "abcdef" }); + static_assert(sut.tell() == 0); +} + +TEST_CASE(should_constexpr_tell_remaining) +{ + constexpr GenericLexer sut(StringView { "abcdef" }); + static_assert(sut.tell_remaining() == 6); +} + +TEST_CASE(should_constexpr_peek) +{ + constexpr GenericLexer sut(StringView { "abcdef" }); + static_assert(sut.peek() == 'a'); + static_assert(sut.peek(2) == 'c'); + static_assert(sut.peek(100) == '\0'); +} + +TEST_CASE(should_constexpr_next_is) +{ + constexpr GenericLexer sut(StringView { "abcdef" }); + static_assert(sut.next_is('a')); + static_assert(sut.next_is("abc")); + static_assert(sut.next_is(StringView { "abc" })); +} + +TEST_CASE(should_constexpr_retreat) +{ + constexpr auto sut = [] { + GenericLexer sut(StringView { "abcdef" }); + sut.consume(); + sut.retreat(); + return sut; + }(); + static_assert(sut.peek() == 'a'); +} + +TEST_CASE(should_constexpr_consume_1) +{ + constexpr auto sut = [] { + GenericLexer sut(StringView { "abcdef" }); + sut.consume(); + return sut; + }(); + static_assert(sut.peek() == 'b'); +} + +TEST_CASE(should_constexpr_consume_specific_char) +{ + constexpr auto sut = [] { + GenericLexer sut(StringView { "abcdef" }); + sut.consume_specific('a'); + return sut; + }(); + static_assert(sut.peek() == 'b'); +} + +TEST_CASE(should_constexpr_consume_specific_string_view) +{ + constexpr auto sut = [] { + GenericLexer sut(StringView { "abcdef" }); + sut.consume_specific(StringView { "ab" }); + return sut; + }(); + static_assert(sut.peek() == 'c'); +} + +TEST_CASE(should_constexpr_consume_specific_cstring) +{ + constexpr auto sut = [] { + GenericLexer sut(StringView { "abcdef" }); + sut.consume_specific("abcd"); + return sut; + }(); + static_assert(sut.peek() == 'e'); +} + +TEST_CASE(should_constexpr_ignore_until) +{ + constexpr auto sut = [] { + GenericLexer sut(StringView { "abcdef" }); + sut.ignore_until('d'); + return sut; + }(); + static_assert(sut.peek() == 'e'); +} + +TEST_CASE(should_constexpr_ignore_until_cstring) +{ + constexpr auto sut = [] { + GenericLexer sut(StringView { "abcdef" }); + sut.ignore_until("cde"); + return sut; + }(); + static_assert(sut.peek() == 'f'); +} + +TEST_CASE(should_constexpr_next_is_pred) +{ + constexpr auto pred = [](auto c) { + return c == 'a'; + }; + constexpr GenericLexer sut(StringView { "abcdef" }); + static_assert(sut.next_is(pred)); +} + +TEST_CASE(should_constexpr_ignore_while_pred) +{ + constexpr auto sut = [] { + constexpr auto pred = [](auto c) { + return c == 'a'; + }; + + GenericLexer sut(StringView { "abcdef" }); + sut.ignore_while(pred); + return sut; + }(); + static_assert(sut.peek() == 'b'); +} + +TEST_CASE(should_constexpr_ignore_until_pred) +{ + constexpr auto sut = [] { + constexpr auto pred = [](auto c) { + return c == 'c'; + }; + + GenericLexer sut(StringView { "abcdef" }); + sut.ignore_until(pred); + return sut; + }(); + static_assert(sut.peek() == 'c'); +} + +TEST_MAIN(GenericLexer)