Browse Source

LibJS: Skip ID_{Start,Continue} property lookup for any ASCII characters

Before this change, Lexer::is_identifier_{start,middle}() would do a
Unicode property lookup via Unicode::code_point_has_property() quite
frequently, especially for common characters like .,;{}[]() etc.

Since these and any other ASCII characters not covered by the alpha /
alphanumeric check are known to not have the ID_Start / ID_Continue
(except '_', which is special-cased now) properties, we can easily
avoid this function call.
Linus Groh 3 years ago
parent
commit
a50e33abe3
1 changed files with 12 additions and 0 deletions
  1. 12 0
      Userland/Libraries/LibJS/Lexer.cpp

+ 12 - 0
Userland/Libraries/LibJS/Lexer.cpp

@@ -408,6 +408,11 @@ Optional<u32> Lexer::is_identifier_start(size_t& identifier_length) const
     if (is_ascii_alpha(code_point) || code_point == '_' || code_point == '$')
         return code_point;
 
+    // Optimization: the first codepoint with the ID_Start property after A-Za-z is outside the
+    // ASCII range (0x00AA), so we can skip code_point_has_property() for any ASCII characters.
+    if (is_ascii(code_point))
+        return {};
+
     static auto id_start_category = Unicode::property_from_string("ID_Start"sv);
     if (id_start_category.has_value() && Unicode::code_point_has_property(code_point, *id_start_category))
         return code_point;
@@ -436,6 +441,13 @@ Optional<u32> Lexer::is_identifier_middle(size_t& identifier_length) const
     if (is_ascii_alphanumeric(code_point) || (code_point == '$') || (code_point == ZERO_WIDTH_NON_JOINER) || (code_point == ZERO_WIDTH_JOINER))
         return code_point;
 
+    // Optimization: the first codepoint with the ID_Continue property after A-Za-z0-9_ is outside the
+    // ASCII range (0x00AA), so we can skip code_point_has_property() for any ASCII characters.
+    if (code_point == '_')
+        return code_point;
+    if (is_ascii(code_point))
+        return {};
+
     static auto id_continue_category = Unicode::property_from_string("ID_Continue"sv);
     if (id_continue_category.has_value() && Unicode::code_point_has_property(code_point, *id_continue_category))
         return code_point;