diff --git a/AK/Utf32View.h b/AK/Utf32View.h index 539443df1d7..4c6c7f8a2cc 100644 --- a/AK/Utf32View.h +++ b/AK/Utf32View.h @@ -32,8 +32,57 @@ namespace AK { +class Utf32View; + +class Utf32CodepointIterator { + friend class Utf32View; + +public: + Utf32CodepointIterator() { } + ~Utf32CodepointIterator() { } + + bool operator==(const Utf32CodepointIterator& other) const + { + return m_ptr == other.m_ptr && m_length == other.m_length; + } + bool operator!=(const Utf32CodepointIterator& other) const + { + return !(*this == other); + } + Utf32CodepointIterator& operator++() + { + ASSERT(m_length > 0); + m_ptr++; + m_length--; + return *this; + } + ssize_t operator-(const Utf32CodepointIterator& other) const + { + return m_ptr - other.m_ptr; + } + u32 operator*() const + { + ASSERT(m_length > 0); + return *m_ptr; + } + + constexpr int code_point_length_in_bytes() const { return sizeof(u32); } + bool done() const { return !m_length; } + +private: + Utf32CodepointIterator(const u32* ptr, size_t length) + : m_ptr(ptr) + , m_length((ssize_t)length) + { + } + const u32* m_ptr { nullptr }; + ssize_t m_length { -1 }; +}; + class Utf32View { public: + typedef Utf32CodepointIterator Iterator; + Utf32View() { } Utf32View(const u32* code_points, size_t length) : m_code_points(code_points) @@ -42,6 +91,16 @@ public: ASSERT(code_points || length == 0); } + Utf32CodepointIterator begin() const + { + return { begin_ptr(), m_length }; + } + + Utf32CodepointIterator end() const + { + return { end_ptr(), 0 }; + } + const u32* code_points() const { return m_code_points; } bool is_empty() const { return m_length == 0; } size_t length() const { return m_length; } @@ -57,6 +116,15 @@ public: } private: + const u32* begin_ptr() const + { + return m_code_points; + } + const u32* end_ptr() const + { + return m_code_points + m_length; + } + const u32* m_code_points { nullptr }; size_t m_length { 0 }; }; diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 0cec7a2f5df..cb3556bd857 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -135,7 +135,7 @@ bool Utf8View::validate(size_t& valid_bytes) const return true; } -size_t Utf8View::length_in_code_points() const +size_t Utf8View::calculate_length() const { size_t length = 0; for (auto code_point : *this) { diff --git a/AK/Utf8View.h b/AK/Utf8View.h index a24e7ea42a4..f03dc682a29 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -45,6 +45,11 @@ public: Utf8CodepointIterator& operator++(); u32 operator*() const; + ssize_t operator-(const Utf8CodepointIterator& other) const + { + return m_ptr - other.m_ptr; + } + int code_point_length_in_bytes() const; bool done() const { return !m_length; } @@ -56,6 +61,8 @@ private: class Utf8View { public: + typedef Utf8CodepointIterator Iterator; + Utf8View() { } explicit Utf8View(const String&); explicit Utf8View(const StringView&); @@ -80,13 +87,23 @@ public: return validate(valid_bytes); } - size_t length_in_code_points() const; + size_t length() const + { + if (!m_have_length) { + m_length = calculate_length(); + m_have_length = true; + } + return m_length; + } private: const unsigned char* begin_ptr() const; const unsigned char* end_ptr() const; + size_t calculate_length() const; StringView m_string; + mutable size_t m_length { 0 }; + mutable bool m_have_length { false }; }; } diff --git a/Libraries/LibWeb/HTML/Parser/HTMLToken.h b/Libraries/LibWeb/HTML/Parser/HTMLToken.h index fe43c250341..db74661ba2e 100644 --- a/Libraries/LibWeb/HTML/Parser/HTMLToken.h +++ b/Libraries/LibWeb/HTML/Parser/HTMLToken.h @@ -77,7 +77,7 @@ public: { ASSERT(is_character()); Utf8View view(m_comment_or_character.data.string_view()); - ASSERT(view.length_in_code_points() == 1); + ASSERT(view.length() == 1); return *view.begin(); } diff --git a/Userland/ls.cpp b/Userland/ls.cpp index 3054f7d895a..8aa18a006fb 100644 --- a/Userland/ls.cpp +++ b/Userland/ls.cpp @@ -145,7 +145,7 @@ static int print_escaped(const char* name) Utf8View utf8_name(name); if (utf8_name.validate()) { printf("%s", name); - return utf8_name.length_in_code_points(); + return utf8_name.length(); } for (int i = 0; name[i] != '\0'; i++) {