RegExpPrototype.cpp 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127
  1. /*
  2. * Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
  3. * Copyright (c) 2020-2023, Linus Groh <linusg@serenityos.org>
  4. * Copyright (c) 2021, Tim Flynn <trflynn89@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #include <AK/CharacterTypes.h>
  9. #include <AK/Function.h>
  10. #include <AK/Utf16View.h>
  11. #include <LibJS/Runtime/AbstractOperations.h>
  12. #include <LibJS/Runtime/Array.h>
  13. #include <LibJS/Runtime/Error.h>
  14. #include <LibJS/Runtime/ErrorTypes.h>
  15. #include <LibJS/Runtime/GlobalObject.h>
  16. #include <LibJS/Runtime/RegExpConstructor.h>
  17. #include <LibJS/Runtime/RegExpObject.h>
  18. #include <LibJS/Runtime/RegExpPrototype.h>
  19. #include <LibJS/Runtime/RegExpStringIterator.h>
  20. #include <LibJS/Runtime/StringPrototype.h>
  21. #include <LibJS/Runtime/ValueInlines.h>
  22. namespace JS {
  23. GC_DEFINE_ALLOCATOR(RegExpPrototype);
  24. RegExpPrototype::RegExpPrototype(Realm& realm)
  25. : PrototypeObject(realm.intrinsics().object_prototype())
  26. {
  27. }
  28. void RegExpPrototype::initialize(Realm& realm)
  29. {
  30. auto& vm = this->vm();
  31. Base::initialize(realm);
  32. u8 attr = Attribute::Writable | Attribute::Configurable;
  33. define_native_function(realm, vm.names.toString, to_string, 0, attr);
  34. define_native_function(realm, vm.names.test, test, 1, attr);
  35. define_native_function(realm, vm.names.exec, exec, 1, attr);
  36. define_native_function(realm, vm.names.compile, compile, 2, attr);
  37. define_native_function(realm, vm.well_known_symbol_match(), symbol_match, 1, attr);
  38. define_native_function(realm, vm.well_known_symbol_match_all(), symbol_match_all, 1, attr);
  39. define_native_function(realm, vm.well_known_symbol_replace(), symbol_replace, 2, attr);
  40. define_native_function(realm, vm.well_known_symbol_search(), symbol_search, 1, attr);
  41. define_native_function(realm, vm.well_known_symbol_split(), symbol_split, 2, attr);
  42. define_native_accessor(realm, vm.names.flags, flags, {}, Attribute::Configurable);
  43. define_native_accessor(realm, vm.names.source, source, {}, Attribute::Configurable);
  44. #define __JS_ENUMERATE(FlagName, flagName, flag_name, flag_char) \
  45. define_native_accessor(realm, vm.names.flagName, flag_name, {}, Attribute::Configurable);
  46. JS_ENUMERATE_REGEXP_FLAGS
  47. #undef __JS_ENUMERATE
  48. }
  49. // Non-standard abstraction around steps used by multiple prototypes.
  50. static ThrowCompletionOr<void> increment_last_index(VM& vm, Object& regexp_object, Utf16View const& string, bool unicode)
  51. {
  52. // Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))).
  53. auto last_index_value = TRY(regexp_object.get(vm.names.lastIndex));
  54. auto last_index = TRY(last_index_value.to_length(vm));
  55. // Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode).
  56. last_index = advance_string_index(string, last_index, unicode);
  57. // Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true).
  58. TRY(regexp_object.set(vm.names.lastIndex, Value(last_index), Object::ShouldThrowExceptions::Yes));
  59. return {};
  60. }
  61. // 22.2.7.5 Match Records, https://tc39.es/ecma262/#sec-match-records
  62. struct Match {
  63. static Match create(regex::Match const& match)
  64. {
  65. return { match.global_offset, match.global_offset + match.view.length() };
  66. }
  67. size_t start_index { 0 };
  68. size_t end_index { 0 };
  69. };
  70. // 22.2.7.7 GetMatchIndexPair ( S, match ), https://tc39.es/ecma262/#sec-getmatchindexpair
  71. static Value get_match_index_par(VM& vm, Utf16View const& string, Match const& match)
  72. {
  73. auto& realm = *vm.current_realm();
  74. // 1. Assert: match.[[StartIndex]] is an integer value ≥ 0 and ≤ the length of S.
  75. VERIFY(match.start_index <= string.length_in_code_units());
  76. // 2. Assert: match.[[EndIndex]] is an integer value ≥ match.[[StartIndex]] and ≤ the length of S.
  77. VERIFY(match.end_index >= match.start_index);
  78. VERIFY(match.end_index <= string.length_in_code_units());
  79. // 3. Return CreateArrayFromList(« match.[[StartIndex]], match.[[EndIndex]] »).
  80. return Array::create_from(realm, { Value(match.start_index), Value(match.end_index) });
  81. }
  82. // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups ), https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
  83. static Value make_match_indices_index_pair_array(VM& vm, Utf16View const& string, Vector<Optional<Match>> const& indices, HashMap<DeprecatedFlyString, Match> const& group_names, bool has_groups)
  84. {
  85. // Note: This implementation differs from the spec, but has the same behavior.
  86. //
  87. // The spec dictates that [[RegExpMatcher]] results should contain one list of capture groups,
  88. // where each entry holds its group name (if it has one). However, LibRegex stores named capture
  89. // groups in a separate hash map.
  90. //
  91. // The spec further specifies that the group names provided to this abstraction align with the
  92. // provided indices starting at indices[1], where any entry in indices that does not have a group
  93. // name is undefined in the group names list. But, the undefined groups names are then just
  94. // dropped when copying them to the output array.
  95. //
  96. // Therefore, this implementation tracks the group names without the assertion that the group
  97. // names align with the indices. The end result is the same.
  98. auto& realm = *vm.current_realm();
  99. // 1. Let n be the number of elements in indices.
  100. // 2. Assert: n < 2^32-1.
  101. VERIFY(indices.size() < NumericLimits<u32>::max());
  102. // 3. Assert: groupNames is a List with n - 1 elements.
  103. // 4. NOTE: The groupNames List contains elements aligned with the indices List starting at indices[1].
  104. // 5. Set A to ! ArrayCreate(n).
  105. auto array = MUST(Array::create(realm, indices.size()));
  106. // 6. If hasGroups is true, then
  107. // a. Let groups be ! ObjectCreate(null).
  108. // 7. Else,
  109. // a. Let groups be undefined.
  110. auto groups = has_groups ? Object::create(realm, nullptr) : js_undefined();
  111. // 9. For each integer i such that i ≥ 0 and i < n, do
  112. for (size_t i = 0; i < indices.size(); ++i) {
  113. // a. Let matchIndices be indices[i].
  114. auto const& match_indices = indices[i];
  115. // b. If matchIndices is not undefined, then
  116. // i. Let matchIndicesArray be ! GetMatchIndicesArray(S, matchIndices).
  117. // c. Else,
  118. // i. Let matchIndicesArray be undefined.
  119. auto match_indices_array = js_undefined();
  120. if (match_indices.has_value())
  121. match_indices_array = get_match_index_par(vm, string, *match_indices);
  122. // d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(i), matchIndicesArray).
  123. MUST(array->create_data_property_or_throw(i, match_indices_array));
  124. }
  125. for (auto const& entry : group_names) {
  126. auto match_indices_array = get_match_index_par(vm, string, entry.value);
  127. // e. If i > 0 and groupNames[i - 1] is not undefined, then
  128. // i. Assert: groups is not undefined.
  129. // ii. Perform ! CreateDataPropertyOrThrow(groups, groupNames[i - 1], matchIndicesArray).
  130. MUST(groups.as_object().create_data_property_or_throw(entry.key, match_indices_array));
  131. }
  132. // 8. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
  133. // NOTE: This step must be performed after the above loops in order for groups to be populated.
  134. MUST(array->create_data_property_or_throw(vm.names.groups, groups));
  135. // 10. Return A.
  136. return array;
  137. }
  138. // 22.2.7.2 RegExpBuiltinExec ( R, S ), https://tc39.es/ecma262/#sec-regexpbuiltinexec
  139. // 22.2.7.2 RegExpBuiltInExec ( R, S ), https://github.com/tc39/proposal-regexp-legacy-features#regexpbuiltinexec--r-s-
  140. static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp_object, Utf16String string)
  141. {
  142. auto& realm = *vm.current_realm();
  143. // 1. Let length be the length of S.
  144. // 2. Let lastIndex be ℝ(? ToLength(? Get(R, "lastIndex"))).
  145. auto last_index_value = TRY(regexp_object.get(vm.names.lastIndex));
  146. auto last_index = TRY(last_index_value.to_length(vm));
  147. auto const& regex = regexp_object.regex();
  148. // 3. Let flags be R.[[OriginalFlags]].
  149. // 4. If flags contains "g", let global be true; else let global be false.
  150. bool global = regex.options().has_flag_set(ECMAScriptFlags::Global);
  151. // 5. If flags contains "y", let sticky be true; else let sticky be false.
  152. bool sticky = regex.options().has_flag_set(ECMAScriptFlags::Sticky);
  153. // 6. If flags contains "d", let hasIndices be true, else let hasIndices be false.
  154. bool has_indices = regexp_object.flags().find('d').has_value();
  155. // 7. If global is false and sticky is false, set lastIndex to 0.
  156. if (!global && !sticky)
  157. last_index = 0;
  158. // 8. Let matcher be R.[[RegExpMatcher]].
  159. // 9. If flags contains "u" or flags contains "v", let fullUnicode be true; else let fullUnicode be false.
  160. bool full_unicode = regex.options().has_flag_set(ECMAScriptFlags::Unicode) || regex.options().has_flag_set(ECMAScriptFlags::UnicodeSets);
  161. RegexResult result;
  162. // NOTE: For optimisation purposes, this whole loop is implemented in LibRegex.
  163. // 10. Let matchSucceeded be false.
  164. // 11. If fullUnicode is true, let input be StringToCodePoints(S). Otherwise, let input be a List whose elements are the code units that are the elements of S.
  165. // 12. NOTE: Each element of input is considered to be a character.
  166. // 13. Repeat, while matchSucceeded is false
  167. // a. If lastIndex > length, then
  168. // i. If global is true or sticky is true, then
  169. // 1. Perform ? Set(R, "lastIndex", 0, true).
  170. // ii. Return null.
  171. // b. Let inputIndex be the index into input of the character that was obtained from element lastIndex of S.
  172. // c. Let r be matcher(input, inputIndex).
  173. // d. If r is failure, then
  174. // i. If sticky is true, then
  175. // 1. Perform ? Set(R, "lastIndex", 0, true).
  176. // 2. Return null.
  177. // ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
  178. // e. Else,
  179. // i. Assert: r is a State.
  180. // ii. Set matchSucceeded to true.
  181. // 13.b and 13.c
  182. regex.start_offset = full_unicode ? string.view().code_point_offset_of(last_index) : last_index;
  183. result = regex.match(string.view());
  184. // 13.d and 13.a
  185. if (!result.success) {
  186. // 13.d.i, 13.a.i
  187. if (sticky || global)
  188. TRY(regexp_object.set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes));
  189. // 13.a.ii, 13.d.i.2
  190. return js_null();
  191. }
  192. auto& match = result.matches[0];
  193. auto match_index = match.global_offset;
  194. // 14. Let e be r's endIndex value.
  195. // https://tc39.es/ecma262/#sec-notation: The endIndex is one plus the index of the last input character matched so far by the pattern.
  196. auto end_index = match_index + match.view.length();
  197. // 15. If fullUnicode is true, set e to ! GetStringIndex(S, Input, e).
  198. if (full_unicode) {
  199. match_index = string.view().code_unit_offset_of(match.global_offset);
  200. end_index = string.view().code_unit_offset_of(end_index);
  201. }
  202. // 16. If global is true or sticky is true, then
  203. if (global || sticky) {
  204. // a. Perform ? Set(R, "lastIndex", 𝔽(e), true).
  205. TRY(regexp_object.set(vm.names.lastIndex, Value(end_index), Object::ShouldThrowExceptions::Yes));
  206. }
  207. // 17. Let n be the number of elements in r's captures List. (This is the same value as 22.2.2.1's NcapturingParens.)
  208. // 18. Assert: n = R.[[RegExpRecord]].[[CapturingGroupsCount]].
  209. // 19. Assert: n < 2^32 - 1.
  210. VERIFY(result.n_named_capture_groups < NumericLimits<u32>::max());
  211. // 20. Let A be ! ArrayCreate(n + 1).
  212. auto array = MUST(Array::create(realm, result.n_named_capture_groups + 1));
  213. // 21. Assert: The mathematical value of A's "length" property is n + 1.
  214. // 22. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)).
  215. MUST(array->create_data_property_or_throw(vm.names.index, Value(match_index)));
  216. // 24. Let match be the Match { [[StartIndex]]: lastIndex, [[EndIndex]]: e }.
  217. auto match_indices = Match::create(match);
  218. // 25. Let indices be a new empty List.
  219. Vector<Optional<Match>> indices;
  220. Vector<Utf16String> captured_values;
  221. // 26. Let groupNames be a new empty List.
  222. HashMap<DeprecatedFlyString, Match> group_names;
  223. // 27. Add match as the last element of indices.
  224. indices.append(move(match_indices));
  225. // 28. Let matchedValue be ! GetMatchString(S, match).
  226. // 29. Perform ! CreateDataPropertyOrThrow(A, "0", matchedValue).
  227. MUST(array->create_data_property_or_throw(0, PrimitiveString::create(vm, Utf16String::create(match.view.u16_view()))));
  228. // 30. If R contains any GroupName, then
  229. // a. Let groups be OrdinaryObjectCreate(null).
  230. // b. Let hasGroups be true.
  231. // 31. Else,
  232. // a. Let groups be undefined.
  233. // b. Let hasGroups be false.
  234. bool has_groups = result.n_named_capture_groups != 0;
  235. auto groups_object = has_groups ? Object::create(realm, nullptr) : GC::Ptr<Object> {};
  236. // 33. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
  237. for (size_t i = 1; i <= result.n_capture_groups; ++i) {
  238. // a. Let captureI be ith element of r's captures List.
  239. auto& capture = result.capture_group_matches[0][i];
  240. Value captured_value;
  241. // b. If captureI is undefined, then
  242. if (capture.view.is_null()) {
  243. // i. Let capturedValue be undefined.
  244. captured_value = js_undefined();
  245. // ii. Append undefined to indices.
  246. indices.append({});
  247. // iii. Append capture to indices.
  248. captured_values.append(Utf16String::create());
  249. }
  250. // c. Else,
  251. else {
  252. // i. Let captureStart be captureI's startIndex.
  253. // ii. Let captureEnd be captureI's endIndex.
  254. // iii. If fullUnicode is true, then
  255. // 1. Set captureStart to ! GetStringIndex(S, Input, captureStart).
  256. // 2. Set captureEnd to ! GetStringIndex(S, Input, captureEnd).
  257. // iv. Let capture be the Match { [[StartIndex]]: captureStart, [[EndIndex]: captureEnd }.
  258. // v. Let capturedValue be ! GetMatchString(S, capture).
  259. auto capture_as_utf16_string = Utf16String::create(capture.view.u16_view());
  260. captured_value = PrimitiveString::create(vm, capture_as_utf16_string);
  261. // vi. Append capture to indices.
  262. indices.append(Match::create(capture));
  263. // vii. Append capturedValue to the end of capturedValues.
  264. captured_values.append(capture_as_utf16_string);
  265. }
  266. // d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue).
  267. MUST(array->create_data_property_or_throw(i, captured_value));
  268. // e. If the ith capture of R was defined with a GroupName, then
  269. if (capture.capture_group_name.has_value()) {
  270. // i. Let s be the CapturingGroupName of the corresponding RegExpIdentifierName.
  271. auto group_name = capture.capture_group_name.release_value();
  272. // ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
  273. MUST(groups_object->create_data_property_or_throw(group_name, captured_value));
  274. // iii. Append s to groupNames.
  275. group_names.set(move(group_name), Match::create(capture));
  276. }
  277. // f. Else,
  278. else {
  279. // i. Append undefined to groupNames.
  280. // See the note in MakeIndicesArray for why this step is skipped.
  281. }
  282. }
  283. // https://github.com/tc39/proposal-regexp-legacy-features#regexpbuiltinexec--r-s-
  284. // 5. Let thisRealm be the current Realm Record.
  285. auto* this_realm = &realm;
  286. // 6. Let rRealm be the value of R's [[Realm]] internal slot.
  287. auto* regexp_object_realm = &regexp_object.realm();
  288. // 7. If SameValue(thisRealm, rRealm) is true, then
  289. if (this_realm == regexp_object_realm) {
  290. // i. If the value of R’s [[LegacyFeaturesEnabled]] internal slot is true, then
  291. if (regexp_object.legacy_features_enabled()) {
  292. // a. Perform UpdateLegacyRegExpStaticProperties(%RegExp%, S, lastIndex, e, capturedValues).
  293. update_legacy_regexp_static_properties(realm.intrinsics().regexp_constructor(), string, match_indices.start_index, match_indices.end_index, captured_values);
  294. }
  295. // ii. Else,
  296. else {
  297. // a. Perform InvalidateLegacyRegExpStaticProperties(%RegExp%).
  298. invalidate_legacy_regexp_static_properties(realm.intrinsics().regexp_constructor());
  299. }
  300. }
  301. // 32. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
  302. // NOTE: This step must be performed after the above loop in order for groups to be populated.
  303. Value groups = has_groups ? groups_object : js_undefined();
  304. MUST(array->create_data_property_or_throw(vm.names.groups, groups));
  305. // 34. If hasIndices is true, then
  306. if (has_indices) {
  307. // a. Let indicesArray be MakeMatchIndicesIndexPairArray(S, indices, groupNames, hasGroups).
  308. auto indices_array = make_match_indices_index_pair_array(vm, string.view(), indices, group_names, has_groups);
  309. // b. Perform ! CreateDataProperty(A, "indices", indicesArray).
  310. MUST(array->create_data_property(vm.names.indices, indices_array));
  311. }
  312. // 23. Perform ! CreateDataPropertyOrThrow(A, "input", S).
  313. // NOTE: This step is performed last to allow the string to be moved into the PrimitiveString::create() invocation.
  314. MUST(array->create_data_property_or_throw(vm.names.input, PrimitiveString::create(vm, move(string))));
  315. // 35. Return A.
  316. return array;
  317. }
  318. // 22.2.7.1 RegExpExec ( R, S ), https://tc39.es/ecma262/#sec-regexpexec
  319. ThrowCompletionOr<Value> regexp_exec(VM& vm, Object& regexp_object, Utf16String string)
  320. {
  321. // 1. Let exec be ? Get(R, "exec").
  322. auto exec = TRY(regexp_object.get(vm.names.exec));
  323. // 2. If IsCallable(exec) is true, then
  324. if (exec.is_function()) {
  325. // a. Let result be ? Call(exec, R, « S »).
  326. auto result = TRY(call(vm, exec.as_function(), &regexp_object, PrimitiveString::create(vm, move(string))));
  327. // b. If Type(result) is neither Object nor Null, throw a TypeError exception.
  328. if (!result.is_object() && !result.is_null())
  329. return vm.throw_completion<TypeError>(ErrorType::NotAnObjectOrNull, result.to_string_without_side_effects());
  330. // c. Return result.
  331. return result;
  332. }
  333. // 3. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
  334. if (!is<RegExpObject>(regexp_object))
  335. return vm.throw_completion<TypeError>(ErrorType::NotAnObjectOfType, "RegExp");
  336. // 4. Return ? RegExpBuiltinExec(R, S).
  337. return regexp_builtin_exec(vm, static_cast<RegExpObject&>(regexp_object), move(string));
  338. }
  339. // 22.2.7.3 AdvanceStringIndex ( S, index, unicode ), https://tc39.es/ecma262/#sec-advancestringindex
  340. size_t advance_string_index(Utf16View const& string, size_t index, bool unicode)
  341. {
  342. // 1. Assert: index ≤ 2^53 - 1.
  343. // 2. If unicode is false, return index + 1.
  344. if (!unicode)
  345. return index + 1;
  346. // 3. Let length be the length of S.
  347. // 4. If index + 1 ≥ length, return index + 1.
  348. if (index + 1 >= string.length_in_code_units())
  349. return index + 1;
  350. // 5. Let cp be CodePointAt(S, index).
  351. auto code_point = code_point_at(string, index);
  352. // 6. Return index + cp.[[CodeUnitCount]].
  353. return index + code_point.code_unit_count;
  354. }
  355. // 22.2.6.3 get RegExp.prototype.dotAll, https://tc39.es/ecma262/#sec-get-regexp.prototype.dotAll
  356. // 22.2.6.5 get RegExp.prototype.global, https://tc39.es/ecma262/#sec-get-regexp.prototype.global
  357. // 22.2.6.6 get RegExp.prototype.hasIndices, https://tc39.es/ecma262/#sec-get-regexp.prototype.hasIndices
  358. // 22.2.6.7 get RegExp.prototype.ignoreCase, https://tc39.es/ecma262/#sec-get-regexp.prototype.ignorecase
  359. // 22.2.6.10 get RegExp.prototype.multiline, https://tc39.es/ecma262/#sec-get-regexp.prototype.multiline
  360. // 22.2.6.15 get RegExp.prototype.sticky, https://tc39.es/ecma262/#sec-get-regexp.prototype.sticky
  361. // 22.2.6.18 get RegExp.prototype.unicode, https://tc39.es/ecma262/#sec-get-regexp.prototype.unicode
  362. // 22.2.6.19 get RegExp.prototype.unicodeSets, https://tc39.es/ecma262/#sec-get-regexp.prototype.unicodesets
  363. #define __JS_ENUMERATE(FlagName, flagName, flag_name, flag_char) \
  364. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::flag_name) \
  365. { \
  366. auto& realm = *vm.current_realm(); \
  367. /* 1. If Type(R) is not Object, throw a TypeError exception. */ \
  368. auto regexp_object = TRY(this_object(vm)); \
  369. /* 2. If R does not have an [[OriginalFlags]] internal slot, then */ \
  370. if (!is<RegExpObject>(*regexp_object)) { \
  371. /* a. If SameValue(R, %RegExp.prototype%) is true, return undefined. */ \
  372. if (same_value(regexp_object, realm.intrinsics().regexp_prototype())) \
  373. return js_undefined(); \
  374. /* b. Otherwise, throw a TypeError exception. */ \
  375. return vm.throw_completion<TypeError>(ErrorType::NotAnObjectOfType, "RegExp"); \
  376. } \
  377. /* 3. Let flags be R.[[OriginalFlags]]. */ \
  378. auto flags = static_cast<RegExpObject&>(*regexp_object).flag_bits(); \
  379. /* 4. If flags contains codeUnit, return true. */ \
  380. /* 5. Return false. */ \
  381. return Value(has_flag(flags, RegExpObject::Flags::FlagName)); \
  382. }
  383. JS_ENUMERATE_REGEXP_FLAGS
  384. #undef __JS_ENUMERATE
  385. // 22.2.6.2 RegExp.prototype.exec ( string ), https://tc39.es/ecma262/#sec-regexp.prototype.exec
  386. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::exec)
  387. {
  388. // 1. Let R be the this value.
  389. // 2. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
  390. auto regexp_object = TRY(typed_this_object(vm));
  391. // 3. Let S be ? ToString(string).
  392. auto string = TRY(vm.argument(0).to_utf16_string(vm));
  393. // 4. Return ? RegExpBuiltinExec(R, S).
  394. return TRY(regexp_builtin_exec(vm, regexp_object, move(string)));
  395. }
  396. // 22.2.6.4 get RegExp.prototype.flags, https://tc39.es/ecma262/#sec-get-regexp.prototype.flags
  397. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::flags)
  398. {
  399. // 1. Let R be the this value.
  400. // 2. If Type(R) is not Object, throw a TypeError exception.
  401. auto regexp_object = TRY(this_object(vm));
  402. // 3. Let result be the empty String.
  403. StringBuilder builder(8);
  404. // 4. Let hasIndices be ToBoolean(? Get(R, "hasIndices")).
  405. // 5. If hasIndices is true, append the code unit 0x0064 (LATIN SMALL LETTER D) as the last code unit of result.
  406. // 6. Let global be ToBoolean(? Get(R, "global")).
  407. // 7. If global is true, append the code unit 0x0067 (LATIN SMALL LETTER G) as the last code unit of result.
  408. // 8. Let ignoreCase be ToBoolean(? Get(R, "ignoreCase")).
  409. // 9. If ignoreCase is true, append the code unit 0x0069 (LATIN SMALL LETTER I) as the last code unit of result.
  410. // 10. Let multiline be ToBoolean(? Get(R, "multiline")).
  411. // 11. If multiline is true, append the code unit 0x006D (LATIN SMALL LETTER M) as the last code unit of result.
  412. // 12. Let dotAll be ToBoolean(? Get(R, "dotAll")).
  413. // 13. If dotAll is true, append the code unit 0x0073 (LATIN SMALL LETTER S) as the last code unit of result.
  414. // 14. Let unicode be ToBoolean(? Get(R, "unicode")).
  415. // 15. If unicode is true, append the code unit 0x0075 (LATIN SMALL LETTER U) as the last code unit of result.
  416. // 16. Let unicodeSets be ! ToBoolean(? Get(R, "unicodeSets")).
  417. // 17. If unicodeSets is true, append the code unit 0x0076 (LATIN SMALL LETTER V) as the last code unit of result.
  418. // 18. Let sticky be ToBoolean(? Get(R, "sticky")).
  419. // 19. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) as the last code unit of result.
  420. #define __JS_ENUMERATE(FlagName, flagName, flag_name, flag_char) \
  421. auto flag_##flag_name = TRY(regexp_object->get(vm.names.flagName)); \
  422. if (flag_##flag_name.to_boolean()) \
  423. builder.append(#flag_char##sv);
  424. JS_ENUMERATE_REGEXP_FLAGS
  425. #undef __JS_ENUMERATE
  426. // 20. Return result.
  427. return PrimitiveString::create(vm, builder.to_byte_string());
  428. }
  429. // 22.2.6.8 RegExp.prototype [ @@match ] ( string ), https://tc39.es/ecma262/#sec-regexp.prototype-@@match
  430. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match)
  431. {
  432. auto& realm = *vm.current_realm();
  433. // 1. Let rx be the this value.
  434. // 2. If Type(rx) is not Object, throw a TypeError exception.
  435. auto regexp_object = TRY(this_object(vm));
  436. // 3. Let S be ? ToString(string).
  437. auto string = TRY(vm.argument(0).to_utf16_string(vm));
  438. // 4. Let flags be ? ToString(? Get(rx, "flags")).
  439. auto flags_value = TRY(regexp_object->get(vm.names.flags));
  440. auto flags = TRY(flags_value.to_byte_string(vm));
  441. // 5. If flags does not contain "g", then
  442. if (!flags.contains('g')) {
  443. // a. Return ? RegExpExec(rx, S).
  444. return TRY(regexp_exec(vm, regexp_object, move(string)));
  445. }
  446. // 6. Else,
  447. // a. If flags contains "u" or flags contains "v", let fullUnicode be true. Otherwise, let fullUnicode be false.
  448. bool full_unicode = flags.contains('u') || flags.contains('v');
  449. // b. Perform ? Set(rx, "lastIndex", +0𝔽, true).
  450. TRY(regexp_object->set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes));
  451. // c. Let A be ! ArrayCreate(0).
  452. auto array = MUST(Array::create(realm, 0));
  453. // d. Let n be 0.
  454. size_t n = 0;
  455. // e. Repeat,
  456. while (true) {
  457. // i. Let result be ? RegExpExec(rx, S).
  458. auto result_value = TRY(regexp_exec(vm, regexp_object, string));
  459. // ii. If result is null, then
  460. if (result_value.is_null()) {
  461. // 1. If n = 0, return null.
  462. if (n == 0)
  463. return js_null();
  464. // 2. Return A.
  465. return array;
  466. }
  467. VERIFY(result_value.is_object());
  468. auto& result = result_value.as_object();
  469. // iii. Else,
  470. // 1. Let matchStr be ? ToString(? Get(result, "0")).
  471. auto match_value = TRY(result.get(0));
  472. auto match_str = TRY(match_value.to_byte_string(vm));
  473. // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(n)), matchStr).
  474. MUST(array->create_data_property_or_throw(n, PrimitiveString::create(vm, match_str)));
  475. // 3. If matchStr is the empty String, then
  476. if (match_str.is_empty()) {
  477. // Steps 3a-3c are implemented by increment_last_index.
  478. TRY(increment_last_index(vm, regexp_object, string.view(), full_unicode));
  479. }
  480. // 4. Set n to n + 1.
  481. ++n;
  482. }
  483. }
  484. // 22.2.6.9 RegExp.prototype [ @@matchAll ] ( string ), https://tc39.es/ecma262/#sec-regexp-prototype-matchall
  485. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match_all)
  486. {
  487. auto& realm = *vm.current_realm();
  488. // 1. Let R be the this value.
  489. // 2. If Type(R) is not Object, throw a TypeError exception.
  490. auto regexp_object = TRY(this_object(vm));
  491. // 3. Let S be ? ToString(string).
  492. auto string = TRY(vm.argument(0).to_utf16_string(vm));
  493. // 4. Let C be ? SpeciesConstructor(R, %RegExp%).
  494. auto* constructor = TRY(species_constructor(vm, regexp_object, realm.intrinsics().regexp_constructor()));
  495. // 5. Let flags be ? ToString(? Get(R, "flags")).
  496. auto flags_value = TRY(regexp_object->get(vm.names.flags));
  497. auto flags = TRY(flags_value.to_byte_string(vm));
  498. // Steps 9-12 are performed early so that flags can be moved.
  499. // 9. If flags contains "g", let global be true.
  500. // 10. Else, let global be false.
  501. bool global = flags.contains('g');
  502. // 11. If flags contains "u" or flags contains "v", let fullUnicode be true.
  503. // 12. Else, let fullUnicode be false.
  504. bool full_unicode = flags.contains('u') || flags.contains('v');
  505. // 6. Let matcher be ? Construct(C, « R, flags »).
  506. auto matcher = TRY(construct(vm, *constructor, regexp_object, PrimitiveString::create(vm, move(flags))));
  507. // 7. Let lastIndex be ? ToLength(? Get(R, "lastIndex")).
  508. auto last_index_value = TRY(regexp_object->get(vm.names.lastIndex));
  509. auto last_index = TRY(last_index_value.to_length(vm));
  510. // 8. Perform ? Set(matcher, "lastIndex", lastIndex, true).
  511. TRY(matcher->set(vm.names.lastIndex, Value(last_index), Object::ShouldThrowExceptions::Yes));
  512. // 13. Return CreateRegExpStringIterator(matcher, S, global, fullUnicode).
  513. return RegExpStringIterator::create(realm, matcher, move(string), global, full_unicode);
  514. }
  515. // 22.2.6.11 RegExp.prototype [ @@replace ] ( string, replaceValue ), https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
  516. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_replace)
  517. {
  518. auto string_value = vm.argument(0);
  519. auto replace_value = vm.argument(1);
  520. // 1. Let rx be the this value.
  521. // 2. If Type(rx) is not Object, throw a TypeError exception.
  522. auto regexp_object = TRY(this_object(vm));
  523. // 3. Let S be ? ToString(string).
  524. auto string = TRY(string_value.to_utf16_string(vm));
  525. // 4. Let lengthS be the number of code unit elements in S.
  526. // 5. Let functionalReplace be IsCallable(replaceValue).
  527. // 6. If functionalReplace is false, then
  528. if (!replace_value.is_function()) {
  529. // a. Set replaceValue to ? ToString(replaceValue).
  530. auto replace_string = TRY(replace_value.to_byte_string(vm));
  531. replace_value = PrimitiveString::create(vm, move(replace_string));
  532. }
  533. // 7. Let flags be ? ToString(? Get(rx, "flags")).
  534. auto flags_value = TRY(regexp_object->get(vm.names.flags));
  535. auto flags = TRY(flags_value.to_byte_string(vm));
  536. // 8. If flags contains "g", let global be true. Otherwise, let global be false.
  537. bool global = flags.contains('g');
  538. // 9. If global is true, then
  539. if (global) {
  540. // a. Perform ? Set(rx, "lastIndex", +0𝔽, true).
  541. TRY(regexp_object->set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes));
  542. }
  543. // 10. Let results be a new empty List.
  544. GC::RootVector<Object*> results(vm.heap());
  545. // 11. Let done be false.
  546. // 12. Repeat, while done is false,
  547. while (true) {
  548. // a. Let result be ? RegExpExec(rx, S).
  549. auto result = TRY(regexp_exec(vm, regexp_object, string));
  550. // b. If result is null, set done to true.
  551. if (result.is_null())
  552. break;
  553. // c. Else,
  554. // i. Append result to the end of results.
  555. results.append(&result.as_object());
  556. // ii. If global is false, set done to true.
  557. if (!global)
  558. break;
  559. // iii. Else,
  560. // 1. Let matchStr be ? ToString(? Get(result, "0")).
  561. auto match_value = TRY(result.get(vm, 0));
  562. auto match_str = TRY(match_value.to_byte_string(vm));
  563. // 2. If matchStr is the empty String, then
  564. if (match_str.is_empty()) {
  565. // b. If flags contains "u" or flags contains "v", let fullUnicode be true. Otherwise, let fullUnicode be false.
  566. bool full_unicode = flags.contains('u') || flags.contains('v');
  567. // Steps 2a, 2c-2d are implemented by increment_last_index.
  568. TRY(increment_last_index(vm, regexp_object, string.view(), full_unicode));
  569. }
  570. }
  571. // 13. Let accumulatedResult be the empty String.
  572. StringBuilder accumulated_result;
  573. // 14. Let nextSourcePosition be 0.
  574. size_t next_source_position = 0;
  575. // 15. For each element result of results, do
  576. for (auto& result : results) {
  577. // a. Let resultLength be ? LengthOfArrayLike(result).
  578. size_t result_length = TRY(length_of_array_like(vm, *result));
  579. // b. Let nCaptures be max(resultLength - 1, 0).
  580. size_t n_captures = result_length == 0 ? 0 : result_length - 1;
  581. // c. Let matched be ? ToString(? Get(result, "0")).
  582. auto matched_value = TRY(result->get(0));
  583. auto matched = TRY(matched_value.to_utf16_string(vm));
  584. // d. Let matchLength be the length of matched.
  585. auto matched_length = matched.length_in_code_units();
  586. // e. Let position be ? ToIntegerOrInfinity(? Get(result, "index")).
  587. auto position_value = TRY(result->get(vm.names.index));
  588. double position = TRY(position_value.to_integer_or_infinity(vm));
  589. // f. Set position to the result of clamping position between 0 and lengthS.
  590. position = clamp(position, static_cast<double>(0), static_cast<double>(string.length_in_code_units()));
  591. // g. Let captures be a new empty List.
  592. GC::RootVector<Value> captures(vm.heap());
  593. // h. Let n be 1.
  594. // i. Repeat, while n ≤ nCaptures,
  595. for (size_t n = 1; n <= n_captures; ++n) {
  596. // i. Let capN be ? Get(result, ! ToString(𝔽(n))).
  597. auto capture = TRY(result->get(n));
  598. // ii. If capN is not undefined, then
  599. if (!capture.is_undefined()) {
  600. // 1. Set capN to ? ToString(capN).
  601. capture = PrimitiveString::create(vm, TRY(capture.to_byte_string(vm)));
  602. }
  603. // iii. Append capN as the last element of captures.
  604. captures.append(move(capture));
  605. // iv. NOTE: When n = 1, the preceding step puts the first element into captures (at index 0). More generally, the nth capture (the characters captured by the nth set of capturing parentheses) is at captures[n - 1].
  606. // v. Set n to n + 1.
  607. }
  608. // j. Let namedCaptures be ? Get(result, "groups").
  609. auto named_captures = TRY(result->get(vm.names.groups));
  610. String replacement;
  611. // k. If functionalReplace is true, then
  612. if (replace_value.is_function()) {
  613. // i. Let replacerArgs be the list-concatenation of « matched », captures, and « 𝔽(position), S ».
  614. GC::RootVector<Value> replacer_args(vm.heap());
  615. replacer_args.append(PrimitiveString::create(vm, move(matched)));
  616. replacer_args.extend(move(captures));
  617. replacer_args.append(Value(position));
  618. replacer_args.append(PrimitiveString::create(vm, string));
  619. // ii. If namedCaptures is not undefined, then
  620. if (!named_captures.is_undefined()) {
  621. // 1. Append namedCaptures as the last element of replacerArgs.
  622. replacer_args.append(move(named_captures));
  623. }
  624. // iii. Let replValue be ? Call(replaceValue, undefined, replacerArgs).
  625. auto replace_result = TRY(call(vm, replace_value.as_function(), js_undefined(), replacer_args.span()));
  626. // iv. Let replacement be ? ToString(replValue).
  627. replacement = TRY(replace_result.to_string(vm));
  628. }
  629. // l. Else,
  630. else {
  631. /// i. If namedCaptures is not undefined, then
  632. if (!named_captures.is_undefined()) {
  633. // 1. Set namedCaptures to ? ToObject(namedCaptures).
  634. named_captures = TRY(named_captures.to_object(vm));
  635. }
  636. // ii. Let replacement be ? GetSubstitution(matched, S, position, captures, namedCaptures, replaceValue).
  637. replacement = TRY(get_substitution(vm, matched.view(), string.view(), position, captures, named_captures, replace_value));
  638. }
  639. // m. If position ≥ nextSourcePosition, then
  640. if (position >= next_source_position) {
  641. // i. NOTE: position should not normally move backwards. If it does, it is an indication of an ill-behaving RegExp subclass or use of an access triggered side-effect to change the global flag or other characteristics of rx. In such cases, the corresponding substitution is ignored.
  642. // ii. Set accumulatedResult to the string-concatenation of accumulatedResult, the substring of S from nextSourcePosition to position, and replacement.
  643. auto substring = string.substring_view(next_source_position, position - next_source_position);
  644. accumulated_result.append(substring);
  645. accumulated_result.append(replacement);
  646. // iii. Set nextSourcePosition to position + matchLength.
  647. next_source_position = position + matched_length;
  648. }
  649. }
  650. // 16. If nextSourcePosition ≥ lengthS, return accumulatedResult.
  651. if (next_source_position >= string.length_in_code_units())
  652. return PrimitiveString::create(vm, accumulated_result.to_byte_string());
  653. // 17. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
  654. auto substring = string.substring_view(next_source_position);
  655. accumulated_result.append(substring);
  656. return PrimitiveString::create(vm, accumulated_result.to_byte_string());
  657. }
  658. // 22.2.6.12 RegExp.prototype [ @@search ] ( string ), https://tc39.es/ecma262/#sec-regexp.prototype-@@search
  659. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_search)
  660. {
  661. // 1. Let rx be the this value.
  662. // 2. If Type(rx) is not Object, throw a TypeError exception.
  663. auto regexp_object = TRY(this_object(vm));
  664. // 3. Let S be ? ToString(string).
  665. auto string = TRY(vm.argument(0).to_utf16_string(vm));
  666. // 4. Let previousLastIndex be ? Get(rx, "lastIndex").
  667. auto previous_last_index = TRY(regexp_object->get(vm.names.lastIndex));
  668. // 5. If SameValue(previousLastIndex, +0𝔽) is false, then
  669. if (!same_value(previous_last_index, Value(0))) {
  670. // a. Perform ? Set(rx, "lastIndex", +0𝔽, true).
  671. TRY(regexp_object->set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes));
  672. }
  673. // 6. Let result be ? RegExpExec(rx, S).
  674. auto result = TRY(regexp_exec(vm, regexp_object, move(string)));
  675. // 7. Let currentLastIndex be ? Get(rx, "lastIndex").
  676. auto current_last_index = TRY(regexp_object->get(vm.names.lastIndex));
  677. // 8. If SameValue(currentLastIndex, previousLastIndex) is false, then
  678. if (!same_value(current_last_index, previous_last_index)) {
  679. // a. Perform ? Set(rx, "lastIndex", previousLastIndex, true).
  680. TRY(regexp_object->set(vm.names.lastIndex, previous_last_index, Object::ShouldThrowExceptions::Yes));
  681. }
  682. // 9. If result is null, return -1𝔽.
  683. if (result.is_null())
  684. return Value(-1);
  685. // 10. Return ? Get(result, "index").
  686. return TRY(result.get(vm, vm.names.index));
  687. }
  688. // 22.2.6.13 get RegExp.prototype.source, https://tc39.es/ecma262/#sec-get-regexp.prototype.source
  689. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::source)
  690. {
  691. auto& realm = *vm.current_realm();
  692. // 1. Let R be the this value.
  693. // 2. If Type(R) is not Object, throw a TypeError exception.
  694. auto regexp_object = TRY(this_object(vm));
  695. // 3. If R does not have an [[OriginalSource]] internal slot, then
  696. if (!is<RegExpObject>(*regexp_object)) {
  697. // a. If SameValue(R, %RegExp.prototype%) is true, return "(?:)".
  698. if (same_value(regexp_object, realm.intrinsics().regexp_prototype()))
  699. return PrimitiveString::create(vm, "(?:)"_string);
  700. // b. Otherwise, throw a TypeError exception.
  701. return vm.throw_completion<TypeError>(ErrorType::NotAnObjectOfType, "RegExp");
  702. }
  703. // 4. Assert: R has an [[OriginalFlags]] internal slot.
  704. // 5. Let src be R.[[OriginalSource]].
  705. // 6. Let flags be R.[[OriginalFlags]].
  706. // 7. Return EscapeRegExpPattern(src, flags).
  707. return PrimitiveString::create(vm, static_cast<RegExpObject&>(*regexp_object).escape_regexp_pattern());
  708. }
  709. // 22.2.6.14 RegExp.prototype [ @@split ] ( string, limit ), https://tc39.es/ecma262/#sec-regexp.prototype-@@split
  710. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_split)
  711. {
  712. auto& realm = *vm.current_realm();
  713. // 1. Let rx be the this value.
  714. // 2. If Type(rx) is not Object, throw a TypeError exception.
  715. auto regexp_object = TRY(this_object(vm));
  716. // 3. Let S be ? ToString(string).
  717. auto string = TRY(vm.argument(0).to_utf16_string(vm));
  718. // 4. Let C be ? SpeciesConstructor(rx, %RegExp%).
  719. auto* constructor = TRY(species_constructor(vm, regexp_object, realm.intrinsics().regexp_constructor()));
  720. // 5. Let flags be ? ToString(? Get(rx, "flags")).
  721. auto flags_value = TRY(regexp_object->get(vm.names.flags));
  722. auto flags = TRY(flags_value.to_byte_string(vm));
  723. // 6. If flags contains "u" or flags contains "v", let unicodeMatching be true.
  724. // 7. Else, let unicodeMatching be false.
  725. bool unicode_matching = flags.contains('u') || flags.contains('v');
  726. // 8. If flags contains "y", let newFlags be flags.
  727. // 9. Else, let newFlags be the string-concatenation of flags and "y".
  728. auto new_flags = flags.find('y').has_value() ? move(flags) : ByteString::formatted("{}y", flags);
  729. // 10. Let splitter be ? Construct(C, « rx, newFlags »).
  730. auto splitter = TRY(construct(vm, *constructor, regexp_object, PrimitiveString::create(vm, move(new_flags))));
  731. // 11. Let A be ! ArrayCreate(0).
  732. auto array = MUST(Array::create(realm, 0));
  733. // 12. Let lengthA be 0.
  734. size_t array_length = 0;
  735. // 13. If limit is undefined, let lim be 2^32 - 1; else let lim be ℝ(? ToUint32(limit)).
  736. auto limit = NumericLimits<u32>::max();
  737. if (!vm.argument(1).is_undefined())
  738. limit = TRY(vm.argument(1).to_u32(vm));
  739. // 14. If lim is 0, return A.
  740. if (limit == 0)
  741. return array;
  742. // 15. If S is the empty String, then
  743. if (string.is_empty()) {
  744. // a. Let z be ? RegExpExec(splitter, S).
  745. auto result = TRY(regexp_exec(vm, splitter, string));
  746. // b. If z is not null, return A.
  747. if (!result.is_null())
  748. return array;
  749. // c. Perform ! CreateDataPropertyOrThrow(A, "0", S).
  750. MUST(array->create_data_property_or_throw(0, PrimitiveString::create(vm, move(string))));
  751. // d. Return A.
  752. return array;
  753. }
  754. // 16. Let size be the length of S.
  755. // 17. Let p be 0.
  756. size_t last_match_end = 0;
  757. // 18. Let q be p.
  758. size_t next_search_from = 0;
  759. // 19. Repeat, while q < size,
  760. while (next_search_from < string.length_in_code_units()) {
  761. // a. Perform ? Set(splitter, "lastIndex", 𝔽(q), SplitBehavior::KeepEmpty).
  762. TRY(splitter->set(vm.names.lastIndex, Value(next_search_from), Object::ShouldThrowExceptions::Yes));
  763. // b. Let z be ? RegExpExec(splitter, S).
  764. auto result = TRY(regexp_exec(vm, splitter, string));
  765. // c. If z is null, set q to AdvanceStringIndex(S, q, unicodeMatching).
  766. if (result.is_null()) {
  767. next_search_from = advance_string_index(string.view(), next_search_from, unicode_matching);
  768. continue;
  769. }
  770. // d. Else,
  771. // i. Let e be ℝ(? ToLength(? Get(splitter, "lastIndex"))).
  772. auto last_index_value = TRY(splitter->get(vm.names.lastIndex));
  773. auto last_index = TRY(last_index_value.to_length(vm));
  774. // ii. Set e to min(e, size).
  775. last_index = min(last_index, string.length_in_code_units());
  776. // iii. If e = p, set q to AdvanceStringIndex(S, q, unicodeMatching).
  777. if (last_index == last_match_end) {
  778. next_search_from = advance_string_index(string.view(), next_search_from, unicode_matching);
  779. continue;
  780. }
  781. // iv. Else,
  782. // 1. Let T be the substring of S from p to q.
  783. auto substring = string.substring_view(last_match_end, next_search_from - last_match_end);
  784. // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T).
  785. MUST(array->create_data_property_or_throw(array_length, PrimitiveString::create(vm, Utf16String::create(substring))));
  786. // 3. Set lengthA to lengthA + 1.
  787. ++array_length;
  788. // 4. If lengthA = lim, return A.
  789. if (array_length == limit)
  790. return array;
  791. // 5. Set p to e.
  792. last_match_end = last_index;
  793. // 6. Let numberOfCaptures be ? LengthOfArrayLike(z).
  794. auto number_of_captures = TRY(length_of_array_like(vm, result.as_object()));
  795. // 7. Set numberOfCaptures to max(numberOfCaptures - 1, 0).
  796. if (number_of_captures > 0)
  797. --number_of_captures;
  798. // 8. Let i be 1.
  799. // 9. Repeat, while i ≤ numberOfCaptures,
  800. for (size_t i = 1; i <= number_of_captures; ++i) {
  801. // a. Let nextCapture be ? Get(z, ! ToString(𝔽(i))).
  802. auto next_capture = TRY(result.get(vm, i));
  803. // b. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), nextCapture).
  804. MUST(array->create_data_property_or_throw(array_length, next_capture));
  805. // c. Set i to i + 1.
  806. // d. Set lengthA to lengthA + 1.
  807. ++array_length;
  808. // e. If lengthA = lim, return A.
  809. if (array_length == limit)
  810. return array;
  811. }
  812. // 10. Set q to p.
  813. next_search_from = last_match_end;
  814. }
  815. // 20. Let T be the substring of S from p to size.
  816. auto substring = string.substring_view(last_match_end);
  817. // 21. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T).
  818. MUST(array->create_data_property_or_throw(array_length, PrimitiveString::create(vm, Utf16String::create(substring))));
  819. // 22. Return A.
  820. return array;
  821. }
  822. // 22.2.6.16 RegExp.prototype.test ( S ), https://tc39.es/ecma262/#sec-regexp.prototype.test
  823. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::test)
  824. {
  825. // 1. Let R be the this value.
  826. // 2. If Type(R) is not Object, throw a TypeError exception.
  827. auto regexp_object = TRY(this_object(vm));
  828. // 3. Let string be ? ToString(S).
  829. auto string = TRY(vm.argument(0).to_utf16_string(vm));
  830. // 4. Let match be ? RegExpExec(R, string).
  831. auto match = TRY(regexp_exec(vm, regexp_object, move(string)));
  832. // 5. If match is not null, return true; else return false.
  833. return Value(!match.is_null());
  834. }
  835. // 22.2.6.17 RegExp.prototype.toString ( ), https://tc39.es/ecma262/#sec-regexp.prototype.tostring
  836. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::to_string)
  837. {
  838. // 1. Let R be the this value.
  839. // 2. If Type(R) is not Object, throw a TypeError exception.
  840. auto regexp_object = TRY(this_object(vm));
  841. // 3. Let pattern be ? ToString(? Get(R, "source")).
  842. auto source_attr = TRY(regexp_object->get(vm.names.source));
  843. auto pattern = TRY(source_attr.to_byte_string(vm));
  844. // 4. Let flags be ? ToString(? Get(R, "flags")).
  845. auto flags_attr = TRY(regexp_object->get(vm.names.flags));
  846. auto flags = TRY(flags_attr.to_byte_string(vm));
  847. // 5. Let result be the string-concatenation of "/", pattern, "/", and flags.
  848. // 6. Return result.
  849. return PrimitiveString::create(vm, ByteString::formatted("/{}/{}", pattern, flags));
  850. }
  851. // B.2.4.1 RegExp.prototype.compile ( pattern, flags ), https://tc39.es/ecma262/#sec-regexp.prototype.compile
  852. // B.2.4.1 RegExp.prototype.compile ( pattern, flags ), https://github.com/tc39/proposal-regexp-legacy-features#regexpprototypecompile--pattern-flags-
  853. JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::compile)
  854. {
  855. auto pattern = vm.argument(0);
  856. auto flags = vm.argument(1);
  857. // 1. Let O be the this value.
  858. // 2. Perform ? RequireInternalSlot(O, [[RegExpMatcher]]).
  859. auto regexp_object = TRY(typed_this_object(vm));
  860. // 3. Let thisRealm be the current Realm Record.
  861. auto* this_realm = vm.current_realm();
  862. // 4. Let oRealm be the value of O’s [[Realm]] internal slot.
  863. auto* regexp_object_realm = &regexp_object->realm();
  864. // 5. If SameValue(thisRealm, oRealm) is false, throw a TypeError exception.
  865. if (this_realm != regexp_object_realm)
  866. return vm.throw_completion<TypeError>(ErrorType::RegExpCompileError, "thisRealm and oRealm is not same value");
  867. // 6. If the value of R’s [[LegacyFeaturesEnabled]] internal slot is false, throw a TypeError exception.
  868. if (!regexp_object->legacy_features_enabled())
  869. return vm.throw_completion<TypeError>(ErrorType::RegExpCompileError, "legacy features is not enabled");
  870. // 7. If Type(pattern) is Object and pattern has a [[RegExpMatcher]] internal slot, then
  871. if (pattern.is_object() && is<RegExpObject>(pattern.as_object())) {
  872. // a. If flags is not undefined, throw a TypeError exception.
  873. if (!flags.is_undefined())
  874. return vm.throw_completion<TypeError>(ErrorType::NotUndefined, flags.to_string_without_side_effects());
  875. auto& regexp_pattern = static_cast<RegExpObject&>(pattern.as_object());
  876. // b. Let P be pattern.[[OriginalSource]].
  877. pattern = PrimitiveString::create(vm, regexp_pattern.pattern());
  878. // c. Let F be pattern.[[OriginalFlags]].
  879. flags = PrimitiveString::create(vm, regexp_pattern.flags());
  880. }
  881. // 8. Else,
  882. // a. Let P be pattern.
  883. // b. Let F be flags.
  884. // 9. Return ? RegExpInitialize(O, P, F).
  885. return TRY(regexp_object->regexp_initialize(vm, pattern, flags));
  886. }
  887. }