瀏覽代碼

AK+LibJS: Implement String.from{CharCode,CodePoint} using UTF-16 strings

Most of String.prototype and RegExp.prototype is implemented with UTF-16
so this is to prevent extra copying of the string data.
Timothy Flynn 3 年之前
父節點
當前提交
70080feab2
共有 3 個文件被更改,包括 31 次插入18 次删除
  1. 15 9
      AK/Utf16View.cpp
  2. 1 0
      AK/Utf16View.h
  3. 15 9
      Userland/Libraries/LibJS/Runtime/StringConstructor.cpp

+ 15 - 9
AK/Utf16View.cpp

@@ -25,15 +25,8 @@ static Vector<u16> to_utf16_impl(UtfViewType const& view) requires(IsSame<UtfVie
 {
 {
     Vector<u16> utf16_data;
     Vector<u16> utf16_data;
 
 
-    for (auto code_point : view) {
-        if (code_point < first_supplementary_plane_code_point) {
-            utf16_data.append(static_cast<u16>(code_point));
-        } else {
-            code_point -= first_supplementary_plane_code_point;
-            utf16_data.append(static_cast<u16>(high_surrogate_min | (code_point >> 10)));
-            utf16_data.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff)));
-        }
-    }
+    for (auto code_point : view)
+        code_point_to_utf16(utf16_data, code_point);
 
 
     return utf16_data;
     return utf16_data;
 }
 }
@@ -53,6 +46,19 @@ Vector<u16> utf32_to_utf16(Utf32View const& utf32_view)
     return to_utf16_impl(utf32_view);
     return to_utf16_impl(utf32_view);
 }
 }
 
 
+void code_point_to_utf16(Vector<u16>& string, u32 code_point)
+{
+    VERIFY(is_unicode(code_point));
+
+    if (code_point < first_supplementary_plane_code_point) {
+        string.append(static_cast<u16>(code_point));
+    } else {
+        code_point -= first_supplementary_plane_code_point;
+        string.append(static_cast<u16>(high_surrogate_min | (code_point >> 10)));
+        string.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff)));
+    }
+}
+
 bool Utf16View::is_high_surrogate(u16 code_unit)
 bool Utf16View::is_high_surrogate(u16 code_unit)
 {
 {
     return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max);
     return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max);

+ 1 - 0
AK/Utf16View.h

@@ -18,6 +18,7 @@ namespace AK {
 Vector<u16> utf8_to_utf16(StringView const&);
 Vector<u16> utf8_to_utf16(StringView const&);
 Vector<u16> utf8_to_utf16(Utf8View const&);
 Vector<u16> utf8_to_utf16(Utf8View const&);
 Vector<u16> utf32_to_utf16(Utf32View const&);
 Vector<u16> utf32_to_utf16(Utf32View const&);
+void code_point_to_utf16(Vector<u16>&, u32);
 
 
 class Utf16View;
 class Utf16View;
 
 

+ 15 - 9
Userland/Libraries/LibJS/Runtime/StringConstructor.cpp

@@ -5,6 +5,7 @@
  */
  */
 
 
 #include <AK/StringBuilder.h>
 #include <AK/StringBuilder.h>
+#include <AK/Utf16View.h>
 #include <AK/Utf32View.h>
 #include <AK/Utf32View.h>
 #include <LibJS/Runtime/AbstractOperations.h>
 #include <LibJS/Runtime/AbstractOperations.h>
 #include <LibJS/Runtime/Array.h>
 #include <LibJS/Runtime/Array.h>
@@ -124,22 +125,25 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::raw)
 // 22.1.2.1 String.fromCharCode ( ...codeUnits ), https://tc39.es/ecma262/#sec-string.fromcharcode
 // 22.1.2.1 String.fromCharCode ( ...codeUnits ), https://tc39.es/ecma262/#sec-string.fromcharcode
 JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_char_code)
 JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_char_code)
 {
 {
-    StringBuilder builder;
+    Vector<u16> string;
+    string.ensure_capacity(vm.argument_count());
+
     for (size_t i = 0; i < vm.argument_count(); ++i) {
     for (size_t i = 0; i < vm.argument_count(); ++i) {
-        auto char_code = vm.argument(i).to_i32(global_object);
+        auto code_unit = vm.argument(i).to_u16(global_object);
         if (vm.exception())
         if (vm.exception())
             return {};
             return {};
-        auto truncated = char_code & 0xffff;
-        // FIXME: We need an Utf16View :^)
-        builder.append(Utf32View((u32*)&truncated, 1));
+        string.append(code_unit);
     }
     }
-    return js_string(vm, builder.build());
+
+    return js_string(vm, move(string));
 }
 }
 
 
 // 22.1.2.2 String.fromCodePoint ( ...codePoints ), https://tc39.es/ecma262/#sec-string.fromcodepoint
 // 22.1.2.2 String.fromCodePoint ( ...codePoints ), https://tc39.es/ecma262/#sec-string.fromcodepoint
 JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
 JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
 {
 {
-    StringBuilder builder;
+    Vector<u16> string;
+    string.ensure_capacity(vm.argument_count()); // This will be an under-estimate if any code point is > 0xffff.
+
     for (size_t i = 0; i < vm.argument_count(); ++i) {
     for (size_t i = 0; i < vm.argument_count(); ++i) {
         auto next_code_point = vm.argument(i).to_number(global_object);
         auto next_code_point = vm.argument(i).to_number(global_object);
         if (vm.exception())
         if (vm.exception())
@@ -153,9 +157,11 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
             vm.throw_exception<RangeError>(global_object, ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
             vm.throw_exception<RangeError>(global_object, ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
             return {};
             return {};
         }
         }
-        builder.append_code_point(code_point);
+
+        AK::code_point_to_utf16(string, static_cast<u32>(code_point));
     }
     }
-    return js_string(vm, builder.build());
+
+    return js_string(vm, move(string));
 }
 }
 
 
 }
 }