RegExpObject.cpp 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. /*
  2. * Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Function.h>
  7. #include <LibJS/Runtime/GlobalObject.h>
  8. #include <LibJS/Runtime/PrimitiveString.h>
  9. #include <LibJS/Runtime/RegExpObject.h>
  10. #include <LibJS/Runtime/StringPrototype.h>
  11. #include <LibJS/Runtime/Value.h>
  12. #include <LibJS/Token.h>
  13. namespace JS {
  14. Result<regex::RegexOptions<ECMAScriptFlags>, String> regex_flags_from_string(StringView flags)
  15. {
  16. bool d = false, g = false, i = false, m = false, s = false, u = false, y = false, v = false;
  17. auto options = RegExpObject::default_flags;
  18. for (auto ch : flags) {
  19. switch (ch) {
  20. case 'd':
  21. if (d)
  22. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  23. d = true;
  24. break;
  25. case 'g':
  26. if (g)
  27. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  28. g = true;
  29. options |= regex::ECMAScriptFlags::Global;
  30. break;
  31. case 'i':
  32. if (i)
  33. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  34. i = true;
  35. options |= regex::ECMAScriptFlags::Insensitive;
  36. break;
  37. case 'm':
  38. if (m)
  39. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  40. m = true;
  41. options |= regex::ECMAScriptFlags::Multiline;
  42. break;
  43. case 's':
  44. if (s)
  45. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  46. s = true;
  47. options |= regex::ECMAScriptFlags::SingleLine;
  48. break;
  49. case 'u':
  50. if (u)
  51. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  52. u = true;
  53. options |= regex::ECMAScriptFlags::Unicode;
  54. break;
  55. case 'y':
  56. if (y)
  57. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  58. y = true;
  59. // Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
  60. options.reset_flag(regex::ECMAScriptFlags::Global);
  61. // "What's the difference between sticky and global, then", that's simple.
  62. // all the other flags imply 'global', and the "global" flag implies 'stateful';
  63. // however, the "sticky" flag does *not* imply 'global', only 'stateful'.
  64. options |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful;
  65. options |= regex::ECMAScriptFlags::Sticky;
  66. break;
  67. case 'v':
  68. if (v)
  69. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  70. v = true;
  71. options |= regex::ECMAScriptFlags::UnicodeSets;
  72. break;
  73. default:
  74. return String::formatted(ErrorType::RegExpObjectBadFlag.message(), ch);
  75. }
  76. }
  77. return options;
  78. }
  79. ErrorOr<String, ParseRegexPatternError> parse_regex_pattern(StringView pattern, bool unicode, bool unicode_sets)
  80. {
  81. if (unicode && unicode_sets)
  82. return ParseRegexPatternError { String::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') };
  83. auto utf16_pattern = AK::utf8_to_utf16(pattern);
  84. Utf16View utf16_pattern_view { utf16_pattern };
  85. StringBuilder builder;
  86. // If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
  87. // code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
  88. for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) {
  89. if (unicode || unicode_sets) {
  90. auto code_point = code_point_at(utf16_pattern_view, i);
  91. builder.append_code_point(code_point.code_point);
  92. i += code_point.code_unit_count;
  93. continue;
  94. }
  95. u16 code_unit = utf16_pattern_view.code_unit_at(i);
  96. ++i;
  97. if (code_unit > 0x7f)
  98. builder.appendff("\\u{:04x}", code_unit);
  99. else
  100. builder.append_code_point(code_unit);
  101. }
  102. return builder.build();
  103. }
  104. ThrowCompletionOr<String> parse_regex_pattern(StringView pattern, VM& vm, GlobalObject& global_object, bool unicode, bool unicode_sets)
  105. {
  106. auto result = parse_regex_pattern(pattern, unicode, unicode_sets);
  107. if (result.is_error())
  108. return vm.throw_completion<JS::SyntaxError>(global_object, result.release_error().error);
  109. return result.release_value();
  110. }
  111. RegExpObject* RegExpObject::create(GlobalObject& global_object)
  112. {
  113. return global_object.heap().allocate<RegExpObject>(global_object, *global_object.regexp_prototype());
  114. }
  115. RegExpObject* RegExpObject::create(GlobalObject& global_object, Regex<ECMA262> regex, String pattern, String flags)
  116. {
  117. return global_object.heap().allocate<RegExpObject>(global_object, move(regex), move(pattern), move(flags), *global_object.regexp_prototype());
  118. }
  119. RegExpObject::RegExpObject(Object& prototype)
  120. : Object(prototype)
  121. {
  122. }
  123. RegExpObject::RegExpObject(Regex<ECMA262> regex, String pattern, String flags, Object& prototype)
  124. : Object(prototype)
  125. , m_pattern(move(pattern))
  126. , m_flags(move(flags))
  127. , m_regex(move(regex))
  128. {
  129. VERIFY(m_regex->parser_result.error == regex::Error::NoError);
  130. }
  131. void RegExpObject::initialize(GlobalObject& global_object)
  132. {
  133. auto& vm = this->vm();
  134. Object::initialize(global_object);
  135. define_direct_property(vm.names.lastIndex, Value(0), Attribute::Writable);
  136. }
  137. // 22.2.3.2.2 RegExpInitialize ( obj, pattern, flags ), https://tc39.es/ecma262/#sec-regexpinitialize
  138. ThrowCompletionOr<RegExpObject*> RegExpObject::regexp_initialize(GlobalObject& global_object, Value pattern, Value flags)
  139. {
  140. auto& vm = global_object.vm();
  141. String f;
  142. if (flags.is_undefined()) {
  143. f = String::empty();
  144. } else {
  145. f = TRY(flags.to_string(global_object));
  146. }
  147. String original_pattern;
  148. String parsed_pattern;
  149. if (pattern.is_undefined()) {
  150. original_pattern = String::empty();
  151. parsed_pattern = String::empty();
  152. } else {
  153. original_pattern = TRY(pattern.to_string(global_object));
  154. bool unicode = f.find('u').has_value();
  155. bool unicode_sets = f.find('v').has_value();
  156. parsed_pattern = TRY(parse_regex_pattern(original_pattern, vm, global_object, unicode, unicode_sets));
  157. }
  158. auto parsed_flags_or_error = regex_flags_from_string(f);
  159. if (parsed_flags_or_error.is_error())
  160. return vm.throw_completion<SyntaxError>(global_object, parsed_flags_or_error.release_error());
  161. Regex<ECMA262> regex(move(parsed_pattern), parsed_flags_or_error.release_value());
  162. if (regex.parser_result.error != regex::Error::NoError)
  163. return vm.throw_completion<SyntaxError>(global_object, ErrorType::RegExpCompileError, regex.error_string());
  164. m_pattern = move(original_pattern);
  165. m_flags = move(f);
  166. m_regex = move(regex);
  167. TRY(set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes));
  168. return this;
  169. }
  170. // 22.2.3.2.5 EscapeRegExpPattern ( P, F ), https://tc39.es/ecma262/#sec-escaperegexppattern
  171. String RegExpObject::escape_regexp_pattern() const
  172. {
  173. if (m_pattern.is_empty())
  174. return "(?:)";
  175. // FIXME: Check the 'u' and 'v' flags and escape accordingly
  176. return m_pattern.replace("\n"sv, "\\n"sv, ReplaceMode::All).replace("\r"sv, "\\r"sv, ReplaceMode::All).replace(LINE_SEPARATOR_STRING, "\\u2028"sv, ReplaceMode::All).replace(PARAGRAPH_SEPARATOR_STRING, "\\u2029"sv, ReplaceMode::All).replace("/"sv, "\\/"sv, ReplaceMode::All);
  177. }
  178. // 22.2.3.2.4 RegExpCreate ( P, F ), https://tc39.es/ecma262/#sec-regexpcreate
  179. ThrowCompletionOr<RegExpObject*> regexp_create(GlobalObject& global_object, Value pattern, Value flags)
  180. {
  181. auto* regexp_object = RegExpObject::create(global_object);
  182. return TRY(regexp_object->regexp_initialize(global_object, pattern, flags));
  183. }
  184. }