Kaynağa Gözat

UTF-8: Add Utf8CodepointIterator::codepoint_length_in_bytes()

This allows you to retrieve the length (in bytes) of the codepoint the
iterator is currently pointing at.
Andreas Kling 5 yıl önce
ebeveyn
işleme
f4e6dae6fe
2 değiştirilmiş dosya ile 12 ekleme ve 0 silme
  1. 10 0
      AK/Utf8View.cpp
  2. 2 0
      AK/Utf8View.h

+ 10 - 0
AK/Utf8View.cpp

@@ -139,6 +139,16 @@ Utf8CodepointIterator& Utf8CodepointIterator::operator++()
     return *this;
 }
 
+int Utf8CodepointIterator::codepoint_length_in_bytes() const
+{
+    ASSERT(m_length > 0);
+    int codepoint_length_in_bytes;
+    u32 value;
+    bool first_byte_makes_sense = decode_first_byte(*m_ptr, codepoint_length_in_bytes, value);
+    ASSERT(first_byte_makes_sense);
+    return codepoint_length_in_bytes;
+}
+
 u32 Utf8CodepointIterator::operator*() const
 {
     ASSERT(m_length > 0);

+ 2 - 0
AK/Utf8View.h

@@ -18,6 +18,8 @@ public:
     Utf8CodepointIterator& operator++();
     u32 operator*() const;
 
+    int codepoint_length_in_bytes() const;
+
 private:
     Utf8CodepointIterator(const unsigned char*, int);
     const unsigned char* m_ptr { nullptr };