LibJS: Implement RegExp.prototype [ @@split ]

This commit is contained in:
Timothy Flynn 2021-07-09 09:57:07 -04:00 committed by Linus Groh
parent 43918b0104
commit e4124d0218
Notes: sideshowbarker 2024-07-18 09:58:20 +09:00
3 changed files with 155 additions and 0 deletions

View file

@ -10,6 +10,7 @@
#include <LibJS/Runtime/Array.h>
#include <LibJS/Runtime/Error.h>
#include <LibJS/Runtime/GlobalObject.h>
#include <LibJS/Runtime/RegExpConstructor.h>
#include <LibJS/Runtime/RegExpObject.h>
#include <LibJS/Runtime/RegExpPrototype.h>
#include <LibJS/Token.h>
@ -33,6 +34,7 @@ void RegExpPrototype::initialize(GlobalObject& global_object)
define_native_function(*vm.well_known_symbol_match(), symbol_match, 1, attr);
define_native_function(*vm.well_known_symbol_replace(), symbol_replace, 2, attr);
define_native_function(*vm.well_known_symbol_search(), symbol_search, 1, attr);
define_native_function(*vm.well_known_symbol_split(), symbol_split, 2, attr);
define_native_accessor(vm.names.flags, flags, {}, Attribute::Configurable);
define_native_accessor(vm.names.source, source, {}, Attribute::Configurable);
@ -617,4 +619,124 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_search)
return index;
}
// 22.2.5.13 RegExp.prototype [ @@split ] ( string, limit ), https://tc39.es/ecma262/#sec-regexp.prototype-@@split
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_split)
{
auto* regexp_object = this_object_from(vm, global_object);
if (!regexp_object)
return {};
auto string = vm.argument(0).to_string(global_object);
if (vm.exception())
return {};
auto* constructor = species_constructor(global_object, *regexp_object, *global_object.regexp_constructor());
if (vm.exception())
return {};
auto flags_object = regexp_object->get(vm.names.flags);
if (vm.exception())
return {};
auto flags = flags_object.to_string(global_object);
if (vm.exception())
return {};
auto new_flags = flags.find('y').has_value() ? move(flags) : String::formatted("{}y", flags);
MarkedValueList arguments(vm.heap());
arguments.append(regexp_object);
arguments.append(js_string(vm, move(new_flags)));
auto splitter_value = vm.construct(*constructor, *constructor, move(arguments));
if (vm.exception())
return {};
auto* splitter = splitter_value.to_object(global_object);
if (!splitter)
return {};
auto* array = Array::create(global_object, 0);
size_t array_length = 0;
auto limit = NumericLimits<u32>::max();
if (!vm.argument(1).is_undefined()) {
limit = vm.argument(1).to_u32(global_object);
if (vm.exception())
return {};
}
if (limit == 0)
return array;
if (string.is_empty()) {
auto result = regexp_exec(global_object, *splitter, string);
if (!result.is_null())
return array;
array->create_data_property_or_throw(0, js_string(vm, string));
return array;
}
size_t last_match_end = 0; // 'p' in the spec.
size_t next_search_from = 0; // 'q' in the spec.
while (next_search_from < string.length()) {
splitter->set(vm.names.lastIndex, Value(next_search_from), true);
if (vm.exception())
return {};
auto result = regexp_exec(global_object, *splitter, string);
if (vm.exception())
return {};
if (result.is_null()) {
// FIXME: Implement AdvanceStringIndex to take Unicode code points into account - https://tc39.es/ecma262/#sec-advancestringindex
++next_search_from;
continue;
}
auto last_index_value = splitter->get(vm.names.lastIndex);
if (vm.exception())
return {};
auto last_index = last_index_value.to_length(global_object); // 'e' in the spec.
if (vm.exception())
return {};
last_index = min(last_index, string.length());
if (last_index == last_match_end) {
// FIXME: Implement AdvanceStringIndex to take Unicode code points into account - https://tc39.es/ecma262/#sec-advancestringindex
++next_search_from;
continue;
}
auto substring = string.substring(last_match_end, next_search_from - last_match_end);
array->create_data_property_or_throw(array_length, js_string(vm, move(substring)));
if (++array_length == limit)
return array;
auto* result_object = result.to_object(global_object);
if (!result_object)
return {};
auto number_of_captures = length_of_array_like(global_object, *result_object);
if (vm.exception())
return {};
if (number_of_captures > 0)
--number_of_captures;
for (size_t i = 1; i <= number_of_captures; ++i) {
auto next_capture = result_object->get(i);
if (vm.exception())
return {};
array->create_data_property_or_throw(array_length, next_capture);
if (++array_length == limit)
return array;
}
last_match_end = last_index;
next_search_from = last_index;
}
auto substring = string.substring(last_match_end);
array->create_data_property_or_throw(array_length, js_string(vm, move(substring)));
return array;
}
}

View file

@ -28,6 +28,7 @@ private:
JS_DECLARE_NATIVE_FUNCTION(symbol_match);
JS_DECLARE_NATIVE_FUNCTION(symbol_replace);
JS_DECLARE_NATIVE_FUNCTION(symbol_search);
JS_DECLARE_NATIVE_FUNCTION(symbol_split);
#define __JS_ENUMERATE(_, flag_name, ...) \
JS_DECLARE_NATIVE_GETTER(flag_name);

View file

@ -33,3 +33,35 @@ test("limits", () => {
expect("a b c d".split(" ", 3)).toEqual(["a", "b", "c"]);
expect("a b c d".split(" ", 100)).toEqual(["a", "b", "c", "d"]);
});
test("regex split", () => {
class RegExp1 extends RegExp {
[Symbol.split](str, limit) {
const result = RegExp.prototype[Symbol.split].call(this, str, limit);
return result.map(x => `(${x})`);
}
}
expect("2016-01-02".split(new RegExp1("-"))).toEqual(["(2016)", "(01)", "(02)"]);
expect("2016-01-02".split(new RegExp("-"))).toEqual(["2016", "01", "02"]);
expect(/a*?/[Symbol.split]("ab")).toEqual(["a", "b"]);
expect(/a*/[Symbol.split]("ab")).toEqual(["", "b"]);
let captureResult = /<(\/)?([^<>]+)>/[Symbol.split]("A<B>bold</B>and<CODE>coded</CODE>");
expect(captureResult).toEqual([
"A",
undefined,
"B",
"bold",
"/",
"B",
"and",
undefined,
"CODE",
"coded",
"/",
"CODE",
"",
]);
});