Переглянути джерело

LibJS: Resolve rope strings directly to UTF-16 when preferable

When someone calls PrimitiveString::utf16_string() on a rope string,
we know for sure that the client wants a UTF-16 string and may not
be interested in a UTF-8 version at all.

To avoid round-tripping through UTF-8 in this scenario, callers can
now inform resolve_rope_if_needed() about their preferred encoding,
should rope resolution take place. The UTF-16 case is actually a lot
simpler than the UTF-8 case, since we can simply ask for UTF-16 data
for each fiber of the rope, and then concatenate all the fibers.

Since LibJS always uses UTF-16 for regular expression matching, this
avoids round-tripping through UTF-8 whenever the input to a regex test
is already UTF-16. :^)
Andreas Kling 2 роки тому
батько
коміт
a3e4535f34

+ 19 - 22
Userland/Libraries/LibJS/Runtime/PrimitiveString.cpp

@@ -77,7 +77,7 @@ bool PrimitiveString::is_empty() const
 ThrowCompletionOr<String> PrimitiveString::utf8_string() const
 {
     auto& vm = this->vm();
-    TRY(resolve_rope_if_needed());
+    TRY(resolve_rope_if_needed(EncodingPreference::UTF8));
 
     if (!has_utf8_string()) {
         if (has_deprecated_string())
@@ -99,7 +99,7 @@ ThrowCompletionOr<StringView> PrimitiveString::utf8_string_view() const
 
 ThrowCompletionOr<DeprecatedString> PrimitiveString::deprecated_string() const
 {
-    TRY(resolve_rope_if_needed());
+    TRY(resolve_rope_if_needed(EncodingPreference::UTF8));
 
     if (!has_deprecated_string()) {
         if (has_utf8_string())
@@ -115,7 +115,7 @@ ThrowCompletionOr<DeprecatedString> PrimitiveString::deprecated_string() const
 
 ThrowCompletionOr<Utf16String> PrimitiveString::utf16_string() const
 {
-    TRY(resolve_rope_if_needed());
+    TRY(resolve_rope_if_needed(EncodingPreference::UTF16));
 
     if (!has_utf16_string()) {
         if (has_utf8_string()) {
@@ -245,31 +245,13 @@ NonnullGCPtr<PrimitiveString> PrimitiveString::create(VM& vm, PrimitiveString& l
     return vm.heap().allocate_without_realm<PrimitiveString>(lhs, rhs);
 }
 
-ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed() const
+ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed(EncodingPreference preference) const
 {
     if (!m_is_rope)
         return {};
 
     auto& vm = this->vm();
 
-    // NOTE: Special case for two concatenated UTF-16 strings.
-    //       This is here as an optimization, although I'm unsure how valuable it is.
-    if (m_lhs->has_utf16_string() && m_rhs->has_utf16_string()) {
-        auto const& lhs_string = m_lhs->m_utf16_string.value();
-        auto const& rhs_string = m_rhs->m_utf16_string.value();
-
-        Utf16Data combined;
-        TRY_OR_THROW_OOM(vm, combined.try_ensure_capacity(lhs_string.length_in_code_units() + rhs_string.length_in_code_units()));
-        combined.extend(lhs_string.string());
-        combined.extend(rhs_string.string());
-
-        m_utf16_string = TRY(Utf16String::create(vm, move(combined)));
-        m_is_rope = false;
-        m_lhs = nullptr;
-        m_rhs = nullptr;
-        return {};
-    }
-
     // This vector will hold all the pieces of the rope that need to be assembled
     // into the resolved string.
     Vector<PrimitiveString const*> pieces;
@@ -289,6 +271,21 @@ ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed() const
         TRY_OR_THROW_OOM(vm, pieces.try_append(current));
     }
 
+    if (preference == EncodingPreference::UTF16) {
+        // The caller wants a UTF-16 string, so we can simply concatenate all the pieces
+        // into a UTF-16 code unit buffer and create a Utf16String from it.
+
+        Utf16Data code_units;
+        for (auto const* current : pieces)
+            code_units.extend(TRY(current->utf16_string()).string());
+
+        m_utf16_string = TRY(Utf16String::create(vm, move(code_units)));
+        m_is_rope = false;
+        m_lhs = nullptr;
+        m_rhs = nullptr;
+        return {};
+    }
+
     // Now that we have all the pieces, we can concatenate them using a StringBuilder.
     ThrowableStringBuilder builder(vm);
 

+ 5 - 1
Userland/Libraries/LibJS/Runtime/PrimitiveString.h

@@ -59,7 +59,11 @@ private:
 
     virtual void visit_edges(Cell::Visitor&) override;
 
-    ThrowCompletionOr<void> resolve_rope_if_needed() const;
+    enum class EncodingPreference {
+        UTF8,
+        UTF16,
+    };
+    ThrowCompletionOr<void> resolve_rope_if_needed(EncodingPreference) const;
 
     mutable bool m_is_rope { false };