Selaa lähdekoodia

LibJS: Implement String.prototype.split

This adds a String.prototype.split implementation modelled after 
ECMA262 specification. 

Additionally, `Value::to_u32` was added as an implementation of
the standard `ToUint32` abstract operation.

There is a tiny kludge for when the separator is an empty string. 
Basic tests and visiting google.com prove that this is working.
Marcin Gasperowicz 4 vuotta sitten
vanhempi
commit
b24ce0b5ee

+ 81 - 0
Libraries/LibJS/Runtime/StringPrototype.cpp

@@ -28,6 +28,7 @@
 #include <AK/Function.h>
 #include <AK/StringBuilder.h>
 #include <LibJS/Heap/Heap.h>
+#include <LibJS/Runtime/Array.h>
 #include <LibJS/Runtime/Error.h>
 #include <LibJS/Runtime/GlobalObject.h>
 #include <LibJS/Runtime/PrimitiveString.h>
@@ -59,6 +60,17 @@ static String ak_string_from(VM& vm, GlobalObject& global_object)
     return Value(this_object).to_string(global_object);
 }
 
+static Optional<size_t> split_match(const String& haystack, size_t start, const String& needle)
+{
+    auto r = needle.length();
+    auto s = haystack.length();
+    if (start + r > s)
+        return {};
+    if (!haystack.substring_view(start).starts_with(needle))
+        return {};
+    return start + r;
+}
+
 StringPrototype::StringPrototype(GlobalObject& global_object)
     : StringObject(*js_string(global_object.heap(), String::empty()), *global_object.object_prototype())
 {
@@ -90,6 +102,7 @@ void StringPrototype::initialize(GlobalObject& global_object)
     define_native_function(vm.names.substring, substring, 2, attr);
     define_native_function(vm.names.includes, includes, 1, attr);
     define_native_function(vm.names.slice, slice, 2, attr);
+    define_native_function(vm.names.split, split, 2, attr);
     define_native_function(vm.names.lastIndexOf, last_index_of, 1, attr);
     define_native_function(vm.well_known_symbol_iterator(), symbol_iterator, 0, attr);
 }
@@ -507,6 +520,74 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::slice)
     return js_string(vm, string_part);
 }
 
+JS_DEFINE_NATIVE_FUNCTION(StringPrototype::split)
+{
+    // FIXME Implement the @@split part
+
+    auto string = ak_string_from(vm, global_object);
+    if (string.is_null())
+        return {};
+
+    auto* result = Array::create(global_object);
+    size_t result_len = 0;
+
+    auto limit = static_cast<u32>(MAX_U32);
+    if (!vm.argument(1).is_undefined()) {
+        limit = vm.argument(1).to_u32(global_object);
+        if (vm.exception())
+            return {};
+    }
+
+    auto separator = vm.argument(0).to_string(global_object);
+    if (vm.exception())
+        return {};
+
+    if (limit == 0)
+        return result;
+
+    if (vm.argument(0).is_undefined()) {
+        result->define_property(0, js_string(vm, string));
+        return result;
+    }
+
+    auto len = string.length();
+    auto separator_len = separator.length();
+    if (len == 0) {
+        if (separator_len > 0)
+            result->define_property(0, js_string(vm, string));
+        return result;
+    }
+
+    size_t start = 0;
+    auto pos = start;
+    if (separator_len == 0) {
+        for (pos = 0; pos < len; pos++)
+            result->define_property(pos, js_string(vm, string.substring(pos, 1)));
+        return result;
+    }
+
+    while (pos != len) {
+        auto e = split_match(string, pos, separator);
+        if (!e.has_value()) {
+            pos += 1;
+            continue;
+        }
+
+        auto segment = string.substring_view(start, pos - start);
+        result->define_property(result_len, js_string(vm, segment));
+        result_len++;
+        if (result_len == limit)
+            return result;
+        start = e.value();
+        pos = start;
+    }
+
+    auto rest = string.substring(start, len - start);
+    result->define_property(result_len, js_string(vm, rest));
+
+    return result;
+}
+
 JS_DEFINE_NATIVE_FUNCTION(StringPrototype::last_index_of)
 {
     auto string = ak_string_from(vm, global_object);

+ 1 - 0
Libraries/LibJS/Runtime/StringPrototype.h

@@ -61,6 +61,7 @@ private:
     JS_DECLARE_NATIVE_FUNCTION(concat);
     JS_DECLARE_NATIVE_FUNCTION(includes);
     JS_DECLARE_NATIVE_FUNCTION(slice);
+    JS_DECLARE_NATIVE_FUNCTION(split);
     JS_DECLARE_NATIVE_FUNCTION(last_index_of);
 
     JS_DECLARE_NATIVE_FUNCTION(symbol_iterator);

+ 19 - 0
Libraries/LibJS/Runtime/Value.cpp

@@ -470,6 +470,12 @@ i32 Value::as_i32() const
     return static_cast<i32>(as_double());
 }
 
+u32 Value::as_u32() const
+{
+    ASSERT(as_double() >= 0);
+    return min((double)as_i32(), MAX_U32);
+}
+
 size_t Value::as_size_t() const
 {
     ASSERT(as_double() >= 0);
@@ -494,6 +500,19 @@ i32 Value::to_i32(GlobalObject& global_object) const
     return number.as_i32();
 }
 
+u32 Value::to_u32(GlobalObject& global_object) const
+{
+    // 7.1.7 ToUint32, https://tc39.es/ecma262/#sec-touint32
+    auto number = to_number(global_object);
+    if (global_object.vm().exception())
+        return INVALID;
+    if (number.is_nan() || number.is_infinity())
+        return 0;
+    if (number.as_double() <= 0)
+        return 0;
+    return number.as_u32();
+}
+
 size_t Value::to_size_t(GlobalObject& global_object) const
 {
     // FIXME: Replace uses of this function with to_length/to_index for correct behaviour and remove this eventually.

+ 4 - 0
Libraries/LibJS/Runtime/Value.h

@@ -36,6 +36,8 @@
 
 // 2 ** 53 - 1
 static constexpr double MAX_ARRAY_LIKE_INDEX = 9007199254740991.0;
+// 2 ** 32 - 1
+static constexpr double MAX_U32 = 4294967295.0;
 
 namespace JS {
 
@@ -241,6 +243,7 @@ public:
     Function& as_function();
 
     i32 as_i32() const;
+    u32 as_u32() const;
     size_t as_size_t() const;
 
     String to_string(GlobalObject&, bool legacy_null_to_empty_string = false) const;
@@ -252,6 +255,7 @@ public:
     BigInt* to_bigint(GlobalObject&) const;
     double to_double(GlobalObject&) const;
     i32 to_i32(GlobalObject&) const;
+    u32 to_u32(GlobalObject&) const;
     size_t to_size_t(GlobalObject&) const;
     size_t to_length(GlobalObject&) const;
     size_t to_index(GlobalObject&) const;

+ 35 - 0
Libraries/LibJS/Tests/builtins/String/String.prototype.split.js

@@ -0,0 +1,35 @@
+test("basic functionality", () => {
+    expect(String.prototype.split).toHaveLength(2);
+
+    expect("hello friends".split()).toEqual(["hello friends"]);
+    expect("hello friends".split("")).toEqual([
+        "h",
+        "e",
+        "l",
+        "l",
+        "o",
+        " ",
+        "f",
+        "r",
+        "i",
+        "e",
+        "n",
+        "d",
+        "s",
+    ]);
+    expect("hello friends".split(" ")).toEqual(["hello", "friends"]);
+
+    expect("a,b,c,d".split(",")).toEqual(["a", "b", "c", "d"]);
+    expect(",a,b,c,d".split(",")).toEqual(["", "a", "b", "c", "d"]);
+    expect("a,b,c,d,".split(",")).toEqual(["a", "b", "c", "d", ""]);
+    expect("a,b,,c,d".split(",")).toEqual(["a", "b", "", "c", "d"]);
+    expect(",a,b,,c,d,".split(",")).toEqual(["", "a", "b", "", "c", "d", ""]);
+    expect(",a,b,,,c,d,".split(",,")).toEqual([",a,b", ",c,d,"]);
+});
+
+test("limits", () => {
+    expect("a b c d".split(" ", 0)).toEqual([]);
+    expect("a b c d".split(" ", 1)).toEqual(["a"]);
+    expect("a b c d".split(" ", 3)).toEqual(["a", "b", "c"]);
+    expect("a b c d".split(" ", 100)).toEqual(["a", "b", "c", "d"]);
+});