LibJS: Resolve rope strings directly to UTF-16 when preferable
When someone calls PrimitiveString::utf16_string() on a rope string, we know for sure that the client wants a UTF-16 string and may not be interested in a UTF-8 version at all. To avoid round-tripping through UTF-8 in this scenario, callers can now inform resolve_rope_if_needed() about their preferred encoding, should rope resolution take place. The UTF-16 case is actually a lot simpler than the UTF-8 case, since we can simply ask for UTF-16 data for each fiber of the rope, and then concatenate all the fibers. Since LibJS always uses UTF-16 for regular expression matching, this avoids round-tripping through UTF-8 whenever the input to a regex test is already UTF-16. :^)
This commit is contained in:
parent
e78ea08ed9
commit
a3e4535f34
Notes:
sideshowbarker
2024-07-17 03:05:16 +09:00
Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/a3e4535f34 Pull-request: https://github.com/SerenityOS/serenity/pull/20000
2 changed files with 24 additions and 23 deletions
|
@ -77,7 +77,7 @@ bool PrimitiveString::is_empty() const
|
|||
ThrowCompletionOr<String> PrimitiveString::utf8_string() const
|
||||
{
|
||||
auto& vm = this->vm();
|
||||
TRY(resolve_rope_if_needed());
|
||||
TRY(resolve_rope_if_needed(EncodingPreference::UTF8));
|
||||
|
||||
if (!has_utf8_string()) {
|
||||
if (has_deprecated_string())
|
||||
|
@ -99,7 +99,7 @@ ThrowCompletionOr<StringView> PrimitiveString::utf8_string_view() const
|
|||
|
||||
ThrowCompletionOr<DeprecatedString> PrimitiveString::deprecated_string() const
|
||||
{
|
||||
TRY(resolve_rope_if_needed());
|
||||
TRY(resolve_rope_if_needed(EncodingPreference::UTF8));
|
||||
|
||||
if (!has_deprecated_string()) {
|
||||
if (has_utf8_string())
|
||||
|
@ -115,7 +115,7 @@ ThrowCompletionOr<DeprecatedString> PrimitiveString::deprecated_string() const
|
|||
|
||||
ThrowCompletionOr<Utf16String> PrimitiveString::utf16_string() const
|
||||
{
|
||||
TRY(resolve_rope_if_needed());
|
||||
TRY(resolve_rope_if_needed(EncodingPreference::UTF16));
|
||||
|
||||
if (!has_utf16_string()) {
|
||||
if (has_utf8_string()) {
|
||||
|
@ -245,31 +245,13 @@ NonnullGCPtr<PrimitiveString> PrimitiveString::create(VM& vm, PrimitiveString& l
|
|||
return vm.heap().allocate_without_realm<PrimitiveString>(lhs, rhs);
|
||||
}
|
||||
|
||||
ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed() const
|
||||
ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed(EncodingPreference preference) const
|
||||
{
|
||||
if (!m_is_rope)
|
||||
return {};
|
||||
|
||||
auto& vm = this->vm();
|
||||
|
||||
// NOTE: Special case for two concatenated UTF-16 strings.
|
||||
// This is here as an optimization, although I'm unsure how valuable it is.
|
||||
if (m_lhs->has_utf16_string() && m_rhs->has_utf16_string()) {
|
||||
auto const& lhs_string = m_lhs->m_utf16_string.value();
|
||||
auto const& rhs_string = m_rhs->m_utf16_string.value();
|
||||
|
||||
Utf16Data combined;
|
||||
TRY_OR_THROW_OOM(vm, combined.try_ensure_capacity(lhs_string.length_in_code_units() + rhs_string.length_in_code_units()));
|
||||
combined.extend(lhs_string.string());
|
||||
combined.extend(rhs_string.string());
|
||||
|
||||
m_utf16_string = TRY(Utf16String::create(vm, move(combined)));
|
||||
m_is_rope = false;
|
||||
m_lhs = nullptr;
|
||||
m_rhs = nullptr;
|
||||
return {};
|
||||
}
|
||||
|
||||
// This vector will hold all the pieces of the rope that need to be assembled
|
||||
// into the resolved string.
|
||||
Vector<PrimitiveString const*> pieces;
|
||||
|
@ -289,6 +271,21 @@ ThrowCompletionOr<void> PrimitiveString::resolve_rope_if_needed() const
|
|||
TRY_OR_THROW_OOM(vm, pieces.try_append(current));
|
||||
}
|
||||
|
||||
if (preference == EncodingPreference::UTF16) {
|
||||
// The caller wants a UTF-16 string, so we can simply concatenate all the pieces
|
||||
// into a UTF-16 code unit buffer and create a Utf16String from it.
|
||||
|
||||
Utf16Data code_units;
|
||||
for (auto const* current : pieces)
|
||||
code_units.extend(TRY(current->utf16_string()).string());
|
||||
|
||||
m_utf16_string = TRY(Utf16String::create(vm, move(code_units)));
|
||||
m_is_rope = false;
|
||||
m_lhs = nullptr;
|
||||
m_rhs = nullptr;
|
||||
return {};
|
||||
}
|
||||
|
||||
// Now that we have all the pieces, we can concatenate them using a StringBuilder.
|
||||
ThrowableStringBuilder builder(vm);
|
||||
|
||||
|
|
|
@ -59,7 +59,11 @@ private:
|
|||
|
||||
virtual void visit_edges(Cell::Visitor&) override;
|
||||
|
||||
ThrowCompletionOr<void> resolve_rope_if_needed() const;
|
||||
enum class EncodingPreference {
|
||||
UTF8,
|
||||
UTF16,
|
||||
};
|
||||
ThrowCompletionOr<void> resolve_rope_if_needed(EncodingPreference) const;
|
||||
|
||||
mutable bool m_is_rope { false };
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue