Преглед изворни кода

LibJS: Implement String.prototype.codePointAt with UTF-16 code units

This also implements the CodePointAt abstract operation. This is needed
to handle invalid code units specific to the JavaScript spec, rather
than e.g. inserting replacement code units. This abstraction is public
because RegExp.prototype will also need it.
Timothy Flynn пре 4 година
родитељ
комит
a05ce330b8

+ 31 - 8
Userland/Libraries/LibJS/Runtime/StringPrototype.cpp

@@ -51,6 +51,29 @@ static Optional<size_t> split_match(const String& haystack, size_t start, const
     return start + r;
 }
 
+// 11.1.4 CodePointAt ( string, position ), https://tc39.es/ecma262/#sec-codepointat
+CodePoint code_point_at(Utf16View const& string, size_t position)
+{
+    VERIFY(position < string.length_in_code_units());
+
+    auto first = string.code_unit_at(position);
+    auto code_point = static_cast<u32>(first);
+
+    if (!Utf16View::is_high_surrogate(first) && !Utf16View::is_low_surrogate(first))
+        return { code_point, 1, false };
+
+    if (Utf16View::is_low_surrogate(first) || (position + 1 == string.length_in_code_units()))
+        return { code_point, 1, true };
+
+    auto second = string.code_unit_at(position + 1);
+
+    if (!Utf16View::is_low_surrogate(second))
+        return { code_point, 1, true };
+
+    code_point = Utf16View::decode_surrogate_pair(first, second);
+    return { code_point, 2, false };
+}
+
 StringPrototype::StringPrototype(GlobalObject& global_object)
     : StringObject(*js_string(global_object.heap(), String::empty()), *global_object.object_prototype())
 {
@@ -162,19 +185,19 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::char_code_at)
 // 22.1.3.3 String.prototype.codePointAt ( pos ), https://tc39.es/ecma262/#sec-string.prototype.codepointat
 JS_DEFINE_NATIVE_FUNCTION(StringPrototype::code_point_at)
 {
-    auto string = ak_string_from(vm, global_object);
-    if (!string.has_value())
+    auto string = utf16_string_from(vm, global_object);
+    if (vm.exception())
         return {};
     auto position = vm.argument(0).to_integer_or_infinity(global_object);
     if (vm.exception())
         return {};
-    auto view = Utf8View(*string);
-    if (position < 0 || position >= view.length())
+
+    Utf16View utf16_string_view { string };
+    if (position < 0 || position >= utf16_string_view.length_in_code_units())
         return js_undefined();
-    auto it = view.begin();
-    for (auto i = 0; i < position; ++i)
-        ++it;
-    return Value(*it);
+
+    auto code_point = JS::code_point_at(utf16_string_view, position);
+    return Value(code_point.code_point);
 }
 
 // 22.1.3.16 String.prototype.repeat ( count ), https://tc39.es/ecma262/#sec-string.prototype.repeat

+ 8 - 0
Userland/Libraries/LibJS/Runtime/StringPrototype.h

@@ -10,6 +10,14 @@
 
 namespace JS {
 
+struct CodePoint {
+    u32 code_point { 0 };
+    size_t code_unit_count { 0 };
+    bool is_unpaired_surrogate { false };
+};
+
+CodePoint code_point_at(Utf16View const& string, size_t position);
+
 class StringPrototype final : public StringObject {
     JS_OBJECT(StringPrototype, StringObject);
 

+ 29 - 0
Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.codePointAt.js

@@ -0,0 +1,29 @@
+test("basic functionality", () => {
+    expect(String.prototype.charAt).toHaveLength(1);
+
+    var s = "Foobar";
+    expect(typeof s).toBe("string");
+    expect(s).toHaveLength(6);
+
+    expect(s.codePointAt(0)).toBe(70);
+    expect(s.codePointAt(1)).toBe(111);
+    expect(s.codePointAt(2)).toBe(111);
+    expect(s.codePointAt(3)).toBe(98);
+    expect(s.codePointAt(4)).toBe(97);
+    expect(s.codePointAt(5)).toBe(114);
+    expect(s.codePointAt(6)).toBe(undefined);
+    expect(s.codePointAt(-1)).toBe(undefined);
+
+    expect(s.codePointAt()).toBe(70);
+    expect(s.codePointAt(NaN)).toBe(70);
+    expect(s.codePointAt("foo")).toBe(70);
+    expect(s.codePointAt(undefined)).toBe(70);
+});
+
+test("UTF-16", () => {
+    var s = "😀";
+    expect(s).toHaveLength(2);
+    expect(s.codePointAt(0)).toBe(0x1f600);
+    expect(s.codePointAt(1)).toBe(0xde00);
+    expect(s.codePointAt(2)).toBe(undefined);
+});