RegExpObject.cpp 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. /*
  2. * Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Function.h>
  7. #include <LibJS/Runtime/GlobalObject.h>
  8. #include <LibJS/Runtime/PrimitiveString.h>
  9. #include <LibJS/Runtime/RegExpObject.h>
  10. #include <LibJS/Runtime/StringPrototype.h>
  11. #include <LibJS/Runtime/Value.h>
  12. namespace JS {
  13. Result<regex::RegexOptions<ECMAScriptFlags>, String> regex_flags_from_string(StringView flags)
  14. {
  15. bool d = false, g = false, i = false, m = false, s = false, u = false, y = false;
  16. auto options = RegExpObject::default_flags;
  17. for (auto ch : flags) {
  18. switch (ch) {
  19. case 'd':
  20. if (d)
  21. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  22. d = true;
  23. break;
  24. case 'g':
  25. if (g)
  26. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  27. g = true;
  28. options |= regex::ECMAScriptFlags::Global;
  29. break;
  30. case 'i':
  31. if (i)
  32. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  33. i = true;
  34. options |= regex::ECMAScriptFlags::Insensitive;
  35. break;
  36. case 'm':
  37. if (m)
  38. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  39. m = true;
  40. options |= regex::ECMAScriptFlags::Multiline;
  41. break;
  42. case 's':
  43. if (s)
  44. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  45. s = true;
  46. options |= regex::ECMAScriptFlags::SingleLine;
  47. break;
  48. case 'u':
  49. if (u)
  50. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  51. u = true;
  52. options |= regex::ECMAScriptFlags::Unicode;
  53. break;
  54. case 'y':
  55. if (y)
  56. return String::formatted(ErrorType::RegExpObjectRepeatedFlag.message(), ch);
  57. y = true;
  58. // Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
  59. options.reset_flag(regex::ECMAScriptFlags::Global);
  60. // "What's the difference between sticky and global, then", that's simple.
  61. // all the other flags imply 'global', and the "global" flag implies 'stateful';
  62. // however, the "sticky" flag does *not* imply 'global', only 'stateful'.
  63. options |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful;
  64. options |= regex::ECMAScriptFlags::Sticky;
  65. break;
  66. default:
  67. return String::formatted(ErrorType::RegExpObjectBadFlag.message(), ch);
  68. }
  69. }
  70. return options;
  71. }
  72. String parse_regex_pattern(StringView pattern, bool unicode)
  73. {
  74. auto utf16_pattern = AK::utf8_to_utf16(pattern);
  75. Utf16View utf16_pattern_view { utf16_pattern };
  76. StringBuilder builder;
  77. // If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
  78. // code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
  79. for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) {
  80. if (unicode) {
  81. auto code_point = code_point_at(utf16_pattern_view, i);
  82. builder.append_code_point(code_point.code_point);
  83. i += code_point.code_unit_count;
  84. continue;
  85. }
  86. u16 code_unit = utf16_pattern_view.code_unit_at(i);
  87. ++i;
  88. if (code_unit > 0x7f)
  89. builder.appendff("\\u{:04x}", code_unit);
  90. else
  91. builder.append_code_point(code_unit);
  92. }
  93. return builder.build();
  94. }
  95. RegExpObject* RegExpObject::create(GlobalObject& global_object, Regex<ECMA262> regex, String pattern, String flags)
  96. {
  97. return global_object.heap().allocate<RegExpObject>(global_object, move(regex), move(pattern), move(flags), *global_object.regexp_prototype());
  98. }
  99. RegExpObject::RegExpObject(Regex<ECMA262> regex, String pattern, String flags, Object& prototype)
  100. : Object(prototype)
  101. , m_pattern(move(pattern))
  102. , m_flags(move(flags))
  103. , m_regex(move(regex))
  104. {
  105. VERIFY(m_regex.parser_result.error == regex::Error::NoError);
  106. }
  107. RegExpObject::~RegExpObject()
  108. {
  109. }
  110. void RegExpObject::initialize(GlobalObject& global_object)
  111. {
  112. auto& vm = this->vm();
  113. Object::initialize(global_object);
  114. define_direct_property(vm.names.lastIndex, Value(0), Attribute::Writable);
  115. }
  116. // 22.2.3.2.4 RegExpCreate ( P, F ), https://tc39.es/ecma262/#sec-regexpcreate
  117. RegExpObject* regexp_create(GlobalObject& global_object, Value pattern, Value flags)
  118. {
  119. auto& vm = global_object.vm();
  120. String f;
  121. if (flags.is_undefined()) {
  122. f = String::empty();
  123. } else {
  124. f = flags.to_string(global_object);
  125. if (vm.exception())
  126. return {};
  127. }
  128. String original_pattern;
  129. String parsed_pattern;
  130. if (pattern.is_undefined()) {
  131. original_pattern = String::empty();
  132. parsed_pattern = String::empty();
  133. } else {
  134. original_pattern = pattern.to_string(global_object);
  135. if (vm.exception())
  136. return {};
  137. bool unicode = f.find('u').has_value();
  138. parsed_pattern = parse_regex_pattern(original_pattern, unicode);
  139. }
  140. auto parsed_flags_or_error = regex_flags_from_string(f);
  141. if (parsed_flags_or_error.is_error()) {
  142. vm.throw_exception(global_object, SyntaxError::create(global_object, parsed_flags_or_error.release_error()));
  143. return {};
  144. }
  145. Regex<ECMA262> regex(move(parsed_pattern), parsed_flags_or_error.release_value());
  146. if (regex.parser_result.error != regex::Error::NoError) {
  147. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpCompileError, regex.error_string());
  148. return {};
  149. }
  150. auto* object = RegExpObject::create(global_object, move(regex), move(original_pattern), move(f));
  151. object->set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes);
  152. if (vm.exception())
  153. return {};
  154. return object;
  155. }
  156. }