RegExpConstructor.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /*
  2. * Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/Find.h>
  8. #include <LibJS/Lexer.h>
  9. #include <LibJS/Runtime/Error.h>
  10. #include <LibJS/Runtime/GlobalObject.h>
  11. #include <LibJS/Runtime/RegExpConstructor.h>
  12. #include <LibJS/Runtime/RegExpObject.h>
  13. #include <LibJS/Runtime/Value.h>
  14. namespace JS {
  15. GC_DEFINE_ALLOCATOR(RegExpConstructor);
  16. RegExpConstructor::RegExpConstructor(Realm& realm)
  17. : NativeFunction(realm.vm().names.RegExp.as_string(), realm.intrinsics().function_prototype())
  18. {
  19. }
  20. void RegExpConstructor::initialize(Realm& realm)
  21. {
  22. auto& vm = this->vm();
  23. Base::initialize(realm);
  24. // 22.2.5.1 RegExp.prototype, https://tc39.es/ecma262/#sec-regexp.prototype
  25. define_direct_property(vm.names.prototype, realm.intrinsics().regexp_prototype(), 0);
  26. u8 attr = Attribute::Writable | Attribute::Configurable;
  27. define_native_function(realm, vm.names.escape, escape, 1, attr);
  28. define_native_accessor(realm, vm.well_known_symbol_species(), symbol_species_getter, {}, Attribute::Configurable);
  29. define_direct_property(vm.names.length, Value(2), Attribute::Configurable);
  30. // Additional properties of the RegExp constructor, https://github.com/tc39/proposal-regexp-legacy-features#additional-properties-of-the-regexp-constructor
  31. define_native_accessor(realm, vm.names.input, input_getter, input_setter, Attribute::Configurable);
  32. define_native_accessor(realm, vm.names.inputAlias, input_alias_getter, input_alias_setter, Attribute::Configurable);
  33. define_native_accessor(realm, vm.names.lastMatch, last_match_getter, {}, Attribute::Configurable);
  34. define_native_accessor(realm, vm.names.lastMatchAlias, last_match_alias_getter, {}, Attribute::Configurable);
  35. define_native_accessor(realm, vm.names.lastParen, last_paren_getter, {}, Attribute::Configurable);
  36. define_native_accessor(realm, vm.names.lastParenAlias, last_paren_alias_getter, {}, Attribute::Configurable);
  37. define_native_accessor(realm, vm.names.leftContext, left_context_getter, {}, Attribute::Configurable);
  38. define_native_accessor(realm, vm.names.leftContextAlias, left_context_alias_getter, {}, Attribute::Configurable);
  39. define_native_accessor(realm, vm.names.rightContext, right_context_getter, {}, Attribute::Configurable);
  40. define_native_accessor(realm, vm.names.rightContextAlias, right_context_alias_getter, {}, Attribute::Configurable);
  41. define_native_accessor(realm, vm.names.$1, group_1_getter, {}, Attribute::Configurable);
  42. define_native_accessor(realm, vm.names.$2, group_2_getter, {}, Attribute::Configurable);
  43. define_native_accessor(realm, vm.names.$3, group_3_getter, {}, Attribute::Configurable);
  44. define_native_accessor(realm, vm.names.$4, group_4_getter, {}, Attribute::Configurable);
  45. define_native_accessor(realm, vm.names.$5, group_5_getter, {}, Attribute::Configurable);
  46. define_native_accessor(realm, vm.names.$6, group_6_getter, {}, Attribute::Configurable);
  47. define_native_accessor(realm, vm.names.$7, group_7_getter, {}, Attribute::Configurable);
  48. define_native_accessor(realm, vm.names.$8, group_8_getter, {}, Attribute::Configurable);
  49. define_native_accessor(realm, vm.names.$9, group_9_getter, {}, Attribute::Configurable);
  50. }
  51. // 22.2.4.1 RegExp ( pattern, flags ), https://tc39.es/ecma262/#sec-regexp-pattern-flags
  52. ThrowCompletionOr<Value> RegExpConstructor::call()
  53. {
  54. auto& vm = this->vm();
  55. auto pattern = vm.argument(0);
  56. auto flags = vm.argument(1);
  57. // 1. Let patternIsRegExp be ? IsRegExp(pattern).
  58. bool pattern_is_regexp = TRY(pattern.is_regexp(vm));
  59. // 2. If NewTarget is undefined, then
  60. // a. Let newTarget be the active function object.
  61. auto& new_target = *this;
  62. // b. If patternIsRegExp is true and flags is undefined, then
  63. if (pattern_is_regexp && flags.is_undefined()) {
  64. // i. Let patternConstructor be ? Get(pattern, "constructor").
  65. auto pattern_constructor = TRY(pattern.as_object().get(vm.names.constructor));
  66. // ii. If SameValue(newTarget, patternConstructor) is true, return pattern.
  67. if (same_value(&new_target, pattern_constructor))
  68. return pattern;
  69. }
  70. return TRY(construct(new_target));
  71. }
  72. // 22.2.4.1 RegExp ( pattern, flags ), https://tc39.es/ecma262/#sec-regexp-pattern-flags
  73. ThrowCompletionOr<GC::Ref<Object>> RegExpConstructor::construct(FunctionObject& new_target)
  74. {
  75. auto& vm = this->vm();
  76. auto pattern = vm.argument(0);
  77. auto flags = vm.argument(1);
  78. // 1. Let patternIsRegExp be ? IsRegExp(pattern).
  79. bool pattern_is_regexp = TRY(pattern.is_regexp(vm));
  80. // NOTE: Step 2 is handled in call() above.
  81. // 3. Else, let newTarget be NewTarget.
  82. Value pattern_value;
  83. Value flags_value;
  84. // 4. If pattern is an Object and pattern has a [[RegExpMatcher]] internal slot, then
  85. if (pattern.is_object() && is<RegExpObject>(pattern.as_object())) {
  86. // a. Let P be pattern.[[OriginalSource]].
  87. auto& regexp_pattern = static_cast<RegExpObject&>(pattern.as_object());
  88. pattern_value = PrimitiveString::create(vm, regexp_pattern.pattern());
  89. // b. If flags is undefined, let F be pattern.[[OriginalFlags]].
  90. if (flags.is_undefined())
  91. flags_value = PrimitiveString::create(vm, regexp_pattern.flags());
  92. // c. Else, let F be flags.
  93. else
  94. flags_value = flags;
  95. }
  96. // 5. Else if patternIsRegExp is true, then
  97. else if (pattern_is_regexp) {
  98. // a. Let P be ? Get(pattern, "source").
  99. pattern_value = TRY(pattern.as_object().get(vm.names.source));
  100. // b. If flags is undefined, then
  101. if (flags.is_undefined()) {
  102. // i. Let F be ? Get(pattern, "flags").
  103. flags_value = TRY(pattern.as_object().get(vm.names.flags));
  104. }
  105. // c. Else, let F be flags.
  106. else {
  107. flags_value = flags;
  108. }
  109. }
  110. // 6. Else,
  111. else {
  112. // a. Let P be pattern.
  113. pattern_value = pattern;
  114. // b. Let F be flags.
  115. flags_value = flags;
  116. }
  117. // 7. Let O be ? RegExpAlloc(newTarget).
  118. auto regexp_object = TRY(regexp_alloc(vm, new_target));
  119. // 8. Return ? RegExpInitialize(O, P, F).
  120. return TRY(regexp_object->regexp_initialize(vm, pattern_value, flags_value));
  121. }
  122. // 22.2.5.1.1 EncodeForRegExpEscape ( c ), https://tc39.es/proposal-regex-escaping/#sec-encodeforregexpescape
  123. static String encode_for_regexp_escape(u32 code_point)
  124. {
  125. // https://tc39.es/ecma262/#table-controlescape-code-point-values
  126. // Table 63: ControlEscape Code Point Values
  127. struct ControlEscape {
  128. u32 code_point { 0 };
  129. char control_escape { 0 };
  130. };
  131. static constexpr auto control_escapes = to_array<ControlEscape>({
  132. { 0x09, 't' },
  133. { 0x0A, 'n' },
  134. { 0x0B, 'v' },
  135. { 0x0C, 'f' },
  136. { 0x0D, 'r' },
  137. });
  138. // 1. If c is matched by SyntaxCharacter or c is U+002F (SOLIDUS), then
  139. if (JS::is_syntax_character(code_point) || code_point == '/') {
  140. // a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and UTF16EncodeCodePoint(c).
  141. return MUST(String::formatted("\\{}", String::from_code_point(code_point)));
  142. }
  143. // 2. Else if c is the code point listed in some cell of the “Code Point” column of Table 63, then
  144. auto it = find_if(control_escapes.begin(), control_escapes.end(), [&](auto const& escape) {
  145. return escape.code_point == code_point;
  146. });
  147. if (it != control_escapes.end()) {
  148. // a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and the string in the “ControlEscape” column
  149. // of the row whose “Code Point” column contains c.
  150. return MUST(String::formatted("\\{}", it->control_escape));
  151. }
  152. // 3. Let otherPunctuators be the string-concatenation of ",-=<>#&!%:;@~'`" and the code unit 0x0022 (QUOTATION MARK).
  153. // 4. Let toEscape be StringToCodePoints(otherPunctuators).
  154. static constexpr Utf8View to_escape { ",-=<>#&!%:;@~'`\""sv };
  155. // 5. If toEscape contains c, c is matched by either WhiteSpace or LineTerminator, or c has the same numeric value
  156. // as a leading surrogate or trailing surrogate, then
  157. if (to_escape.contains(code_point) || JS::is_whitespace(code_point) || JS::is_line_terminator(code_point) || is_unicode_surrogate(code_point)) {
  158. // a. Let cNum be the numeric value of c.
  159. // b. If cNum ≤ 0xFF, then
  160. if (code_point <= 0xFF) {
  161. // i. Let hex be Number::toString(𝔽(cNum), 16).
  162. // ii. Return the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), "x", and
  163. // StringPad(hex, 2, "0", START).
  164. return MUST(String::formatted("\\x{:02x}", code_point));
  165. }
  166. // c. Let escaped be the empty String.
  167. // d. Let codeUnits be UTF16EncodeCodePoint(c).
  168. // e. For each code unit cu of codeUnits, do
  169. // i. Set escaped to the string-concatenation of escaped and UnicodeEscape(cu).
  170. // f. Return escaped.
  171. return MUST(String::formatted("\\u{:04x}", code_point));
  172. }
  173. // 6. Return UTF16EncodeCodePoint(c).
  174. return String::from_code_point(code_point);
  175. }
  176. // 22.2.5.1 RegExp.escape ( S ), https://tc39.es/proposal-regex-escaping/
  177. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::escape)
  178. {
  179. auto string = vm.argument(0);
  180. // 1. If S is not a String, throw a TypeError exception.
  181. if (!string.is_string())
  182. return vm.throw_completion<TypeError>(ErrorType::NotAString, string);
  183. // 2. Let escaped be the empty String.
  184. StringBuilder escaped(string.as_string().utf8_string().byte_count());
  185. // 3. Let cpList be StringToCodePoints(S).
  186. auto code_point_list = string.as_string().utf8_string();
  187. // 4. For each code point c of cpList, do
  188. for (auto code_point : code_point_list.code_points()) {
  189. // a. If escaped is the empty String and c is matched by either DecimalDigit or AsciiLetter, then
  190. if (escaped.is_empty() && is_ascii_alphanumeric(code_point)) {
  191. // i. NOTE: Escaping a leading digit ensures that output corresponds with pattern text which may be used
  192. // after a \0 character escape or a DecimalEscape such as \1 and still match S rather than be interpreted
  193. // as an extension of the preceding escape sequence. Escaping a leading ASCII letter does the same for
  194. // the context after \c.
  195. // ii. Let numericValue be the numeric value of c.
  196. // iii. Let hex be Number::toString(𝔽(numericValue), 16).
  197. // iv. Assert: The length of hex is 2.
  198. // v. Set escaped to the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), "x", and hex.
  199. escaped.appendff("\\x{:02x}", code_point);
  200. }
  201. // b. Else,
  202. else {
  203. // i. Set escaped to the string-concatenation of escaped and EncodeForRegExpEscape(c).
  204. escaped.append(encode_for_regexp_escape(code_point));
  205. }
  206. }
  207. // 5. Return escaped.
  208. return JS::PrimitiveString::create(vm, MUST(escaped.to_string()));
  209. }
  210. // 22.2.5.2 get RegExp [ @@species ], https://tc39.es/ecma262/#sec-get-regexp-@@species
  211. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::symbol_species_getter)
  212. {
  213. // 1. Return the this value.
  214. return vm.this_value();
  215. }
  216. // get RegExp.input, https://github.com/tc39/proposal-regexp-legacy-features#get-regexpinput
  217. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::input_getter)
  218. {
  219. auto regexp_constructor = vm.current_realm()->intrinsics().regexp_constructor();
  220. // 1. Return ? GetLegacyRegExpStaticProperty(%RegExp%, this value, [[RegExpInput]]).
  221. auto property_getter = &RegExpLegacyStaticProperties::input;
  222. return TRY(get_legacy_regexp_static_property(vm, regexp_constructor, vm.this_value(), property_getter));
  223. }
  224. // get RegExp.$_, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp_
  225. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::input_alias_getter)
  226. {
  227. // Keep the same implementation with `get RegExp.input`
  228. return input_getter(vm);
  229. }
  230. // set RegExp.input, https://github.com/tc39/proposal-regexp-legacy-features#set-regexpinput--val
  231. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::input_setter)
  232. {
  233. auto regexp_constructor = vm.current_realm()->intrinsics().regexp_constructor();
  234. // 1. Perform ? SetLegacyRegExpStaticProperty(%RegExp%, this value, [[RegExpInput]], val).
  235. auto property_setter = &RegExpLegacyStaticProperties::set_input;
  236. TRY(set_legacy_regexp_static_property(vm, regexp_constructor, vm.this_value(), property_setter, vm.argument(0)));
  237. return js_undefined();
  238. }
  239. // set RegExp.$_, https://github.com/tc39/proposal-regexp-legacy-features#set-regexp_---val
  240. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::input_alias_setter)
  241. {
  242. // Keep the same implementation with `set RegExp.input`
  243. return input_setter(vm);
  244. }
  245. // get RegExp.lastMatch, https://github.com/tc39/proposal-regexp-legacy-features#get-regexplastmatch
  246. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::last_match_getter)
  247. {
  248. auto regexp_constructor = vm.current_realm()->intrinsics().regexp_constructor();
  249. // 1. Return ? GetLegacyRegExpStaticProperty(%RegExp%, this value, [[RegExpLastMatch]]).
  250. auto property_getter = &RegExpLegacyStaticProperties::last_match;
  251. return TRY(get_legacy_regexp_static_property(vm, regexp_constructor, vm.this_value(), property_getter));
  252. }
  253. // get RegExp.$&, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp
  254. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::last_match_alias_getter)
  255. {
  256. // Keep the same implementation with `get RegExp.lastMatch`
  257. return last_match_getter(vm);
  258. }
  259. // get RegExp.lastParen, https://github.com/tc39/proposal-regexp-legacy-features#get-regexplastparen
  260. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::last_paren_getter)
  261. {
  262. auto regexp_constructor = vm.current_realm()->intrinsics().regexp_constructor();
  263. // 1. Return ? GetLegacyRegExpStaticProperty(%RegExp%, this value, [[RegExpLastParen]]).
  264. auto property_getter = &RegExpLegacyStaticProperties::last_paren;
  265. return TRY(get_legacy_regexp_static_property(vm, regexp_constructor, vm.this_value(), property_getter));
  266. }
  267. // get RegExp.$+, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp-1
  268. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::last_paren_alias_getter)
  269. {
  270. // Keep the same implementation with `get RegExp.lastParen`
  271. return last_paren_getter(vm);
  272. }
  273. // get RegExp.leftContext, https://github.com/tc39/proposal-regexp-legacy-features#get-regexpleftcontext
  274. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::left_context_getter)
  275. {
  276. auto regexp_constructor = vm.current_realm()->intrinsics().regexp_constructor();
  277. // 1. Return ? GetLegacyRegExpStaticProperty(%RegExp%, this value, [[RegExpLeftContext]]).
  278. auto property_getter = &RegExpLegacyStaticProperties::left_context;
  279. return TRY(get_legacy_regexp_static_property(vm, regexp_constructor, vm.this_value(), property_getter));
  280. }
  281. // get RegExp.$`, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp-2
  282. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::left_context_alias_getter)
  283. {
  284. // Keep the same implementation with `get RegExp.leftContext`
  285. return left_context_getter(vm);
  286. }
  287. // get RegExp.rightContext, https://github.com/tc39/proposal-regexp-legacy-features#get-regexprightcontext
  288. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::right_context_getter)
  289. {
  290. auto regexp_constructor = vm.current_realm()->intrinsics().regexp_constructor();
  291. // 1. Return ? GetLegacyRegExpStaticProperty(%RegExp%, this value, [[RegExpRightContext]]).
  292. auto property_getter = &RegExpLegacyStaticProperties::right_context;
  293. return TRY(get_legacy_regexp_static_property(vm, regexp_constructor, vm.this_value(), property_getter));
  294. }
  295. // get RegExp.$', https://github.com/tc39/proposal-regexp-legacy-features#get-regexp-3
  296. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::right_context_alias_getter)
  297. {
  298. // Keep the same implementation with `get RegExp.rightContext`
  299. return right_context_getter(vm);
  300. }
  301. #define DEFINE_REGEXP_GROUP_GETTER(n) \
  302. JS_DEFINE_NATIVE_FUNCTION(RegExpConstructor::group_##n##_getter) \
  303. { \
  304. auto regexp_constructor = vm.current_realm()->intrinsics().regexp_constructor(); \
  305. \
  306. /* 1. Return ? GetLegacyRegExpStaticProperty(%RegExp%, this value, [[RegExpParen##n##]]).*/ \
  307. auto property_getter = &RegExpLegacyStaticProperties::$##n; \
  308. return TRY(get_legacy_regexp_static_property(vm, regexp_constructor, vm.this_value(), property_getter)); \
  309. }
  310. // get RegExp.$1, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp1
  311. DEFINE_REGEXP_GROUP_GETTER(1);
  312. // get RegExp.$2, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp2
  313. DEFINE_REGEXP_GROUP_GETTER(2);
  314. // get RegExp.$3, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp3
  315. DEFINE_REGEXP_GROUP_GETTER(3);
  316. // get RegExp.$4, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp4
  317. DEFINE_REGEXP_GROUP_GETTER(4);
  318. // get RegExp.$5, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp5
  319. DEFINE_REGEXP_GROUP_GETTER(5);
  320. // get RegExp.$6, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp6
  321. DEFINE_REGEXP_GROUP_GETTER(6);
  322. // get RegExp.$7, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp7
  323. DEFINE_REGEXP_GROUP_GETTER(7);
  324. // get RegExp.$8, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp8
  325. DEFINE_REGEXP_GROUP_GETTER(8);
  326. // get RegExp.$9, https://github.com/tc39/proposal-regexp-legacy-features#get-regexp9
  327. DEFINE_REGEXP_GROUP_GETTER(9);
  328. #undef DEFINE_REGEXP_GROUP_GETTER
  329. }