Explorar o código

LibGfx: Do not exclude all ASCII code points from emoji lookups

Keycap emoji, for example, begin with ASCII digits. Instead, check the
first code point for the Emoji Unicode property.

On a profile of scrolling around on the welcome page in the Browser,
this raises the runtime percentage of Font::glyph_or_emoji_width from
about 0.8% to 1.3%.
Timothy Flynn %!s(int64=2) %!d(string=hai) anos
pai
achega
fcd4535a55
Modificáronse 1 ficheiros con 23 adicións e 2 borrados
  1. 23 2
      Userland/Libraries/LibGfx/Font/Emoji.cpp

+ 23 - 2
Userland/Libraries/LibGfx/Font/Emoji.cpp

@@ -5,7 +5,6 @@
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
-#include <AK/CharacterTypes.h>
 #include <AK/DeprecatedString.h>
 #include <AK/HashMap.h>
 #include <AK/Span.h>
@@ -13,6 +12,7 @@
 #include <AK/Utf8View.h>
 #include <LibGfx/Bitmap.h>
 #include <LibGfx/Font/Emoji.h>
+#include <LibUnicode/CharacterTypes.h>
 
 namespace Gfx {
 
@@ -46,6 +46,27 @@ Bitmap const* Emoji::emoji_for_code_points(ReadonlySpan<u32> const& code_points)
     return bitmap.ptr();
 }
 
+template<typename CodePointIterator>
+static bool could_be_emoji(CodePointIterator const& it)
+{
+    if (it.done())
+        return false;
+
+    static constexpr u32 supplementary_private_use_area_b_first_code_point = 0x100000;
+    if (*it >= supplementary_private_use_area_b_first_code_point) {
+        // We use Supplementary Private Use Area-B for custom Serenity emoji.
+        return true;
+    }
+
+    static auto const emoji_property = Unicode::property_from_string("Emoji"sv);
+    if (!emoji_property.has_value()) {
+        // This means Unicode data generation is disabled. Always check the disk in that case.
+        return true;
+    }
+
+    return Unicode::code_point_has_property(*it, *emoji_property);
+}
+
 template<typename CodePointIterator>
 static Bitmap const* emoji_for_code_point_iterator_impl(CodePointIterator& it)
 {
@@ -53,7 +74,7 @@ static Bitmap const* emoji_for_code_point_iterator_impl(CodePointIterator& it)
     // into a certain range in the loop below (emojis, modifiers, variation selectors, ZWJ),
     // and bailing out early if not. Current worst case is 10 file lookups for any sequence of
     // code points (if the first glyph isn't part of the font in regular text rendering).
-    if (is_ascii(*it))
+    if (!could_be_emoji(it))
         return nullptr;
 
     constexpr size_t max_emoji_code_point_sequence_length = 10;