RegExpObject.cpp 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. /*
  2. * Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Function.h>
  7. #include <LibJS/Heap/Heap.h>
  8. #include <LibJS/Runtime/GlobalObject.h>
  9. #include <LibJS/Runtime/PrimitiveString.h>
  10. #include <LibJS/Runtime/RegExpObject.h>
  11. #include <LibJS/Runtime/StringPrototype.h>
  12. #include <LibJS/Runtime/Value.h>
  13. namespace JS {
  14. static Flags options_from(GlobalObject& global_object, const String& flags)
  15. {
  16. auto& vm = global_object.vm();
  17. bool d = false, g = false, i = false, m = false, s = false, u = false, y = false;
  18. Flags options {
  19. // JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
  20. // FIXME: Enable 'BrowserExtended' only if in a browser context.
  21. .effective_flags = { (regex::ECMAScriptFlags)regex::AllFlags::Global | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches | regex::ECMAScriptFlags::BrowserExtended },
  22. .declared_flags = {},
  23. };
  24. for (auto ch : flags) {
  25. switch (ch) {
  26. case 'd':
  27. if (d)
  28. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
  29. d = true;
  30. break;
  31. case 'g':
  32. if (g)
  33. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
  34. g = true;
  35. options.effective_flags |= regex::ECMAScriptFlags::Global;
  36. options.declared_flags |= regex::ECMAScriptFlags::Global;
  37. break;
  38. case 'i':
  39. if (i)
  40. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
  41. i = true;
  42. options.effective_flags |= regex::ECMAScriptFlags::Insensitive;
  43. options.declared_flags |= regex::ECMAScriptFlags::Insensitive;
  44. break;
  45. case 'm':
  46. if (m)
  47. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
  48. m = true;
  49. options.effective_flags |= regex::ECMAScriptFlags::Multiline;
  50. options.declared_flags |= regex::ECMAScriptFlags::Multiline;
  51. break;
  52. case 's':
  53. if (s)
  54. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
  55. s = true;
  56. options.effective_flags |= regex::ECMAScriptFlags::SingleLine;
  57. options.declared_flags |= regex::ECMAScriptFlags::SingleLine;
  58. break;
  59. case 'u':
  60. if (u)
  61. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
  62. u = true;
  63. options.effective_flags |= regex::ECMAScriptFlags::Unicode;
  64. options.declared_flags |= regex::ECMAScriptFlags::Unicode;
  65. break;
  66. case 'y':
  67. if (y)
  68. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
  69. y = true;
  70. // Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
  71. options.effective_flags.reset_flag(regex::ECMAScriptFlags::Global);
  72. // "What's the difference between sticky and global, then", that's simple.
  73. // all the other flags imply 'global', and the "global" flag implies 'stateful';
  74. // however, the "sticky" flag does *not* imply 'global', only 'stateful'.
  75. options.effective_flags |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful;
  76. options.effective_flags |= regex::ECMAScriptFlags::Sticky;
  77. options.declared_flags |= regex::ECMAScriptFlags::Sticky;
  78. break;
  79. default:
  80. vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectBadFlag, ch);
  81. return options;
  82. }
  83. }
  84. return options;
  85. }
  86. RegExpObject* RegExpObject::create(GlobalObject& global_object, String original_pattern, String parsed_pattern, String flags)
  87. {
  88. return global_object.heap().allocate<RegExpObject>(global_object, move(original_pattern), move(parsed_pattern), move(flags), *global_object.regexp_prototype());
  89. }
  90. RegExpObject::RegExpObject(String original_pattern, String parsed_pattern, String flags, Object& prototype)
  91. : Object(prototype)
  92. , m_original_pattern(move(original_pattern))
  93. , m_parsed_pattern(move(parsed_pattern))
  94. , m_flags(move(flags))
  95. , m_active_flags(options_from(global_object(), m_flags))
  96. , m_regex(m_parsed_pattern, m_active_flags.effective_flags)
  97. {
  98. if (m_regex.parser_result.error != regex::Error::NoError) {
  99. vm().throw_exception<SyntaxError>(global_object(), ErrorType::RegExpCompileError, m_regex.error_string());
  100. }
  101. }
  102. RegExpObject::~RegExpObject()
  103. {
  104. }
  105. void RegExpObject::initialize(GlobalObject& global_object)
  106. {
  107. auto& vm = this->vm();
  108. Object::initialize(global_object);
  109. define_direct_property(vm.names.lastIndex, {}, Attribute::Writable);
  110. }
  111. // 22.2.3.2.4 RegExpCreate ( P, F ), https://tc39.es/ecma262/#sec-regexpcreate
  112. RegExpObject* regexp_create(GlobalObject& global_object, Value pattern, Value flags)
  113. {
  114. auto& vm = global_object.vm();
  115. String f;
  116. if (flags.is_undefined()) {
  117. f = String::empty();
  118. } else {
  119. f = flags.to_string(global_object);
  120. if (vm.exception())
  121. return {};
  122. }
  123. String original_pattern;
  124. String parsed_pattern;
  125. if (pattern.is_undefined()) {
  126. original_pattern = String::empty();
  127. parsed_pattern = String::empty();
  128. } else {
  129. auto utf16_pattern = pattern.to_utf16_string(global_object);
  130. if (vm.exception())
  131. return {};
  132. Utf16View utf16_pattern_view { utf16_pattern };
  133. bool unicode = f.find('u').has_value();
  134. StringBuilder builder;
  135. // If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
  136. // code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
  137. for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) {
  138. if (unicode) {
  139. auto code_point = code_point_at(utf16_pattern_view, i);
  140. builder.append_code_point(code_point.code_point);
  141. i += code_point.code_unit_count;
  142. continue;
  143. }
  144. u16 code_unit = utf16_pattern_view.code_unit_at(i);
  145. ++i;
  146. if (code_unit > 0x7f)
  147. builder.appendff("\\u{:04x}", code_unit);
  148. else
  149. builder.append_code_point(code_unit);
  150. }
  151. original_pattern = utf16_pattern_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
  152. parsed_pattern = builder.build();
  153. }
  154. auto* object = RegExpObject::create(global_object, move(original_pattern), move(parsed_pattern), move(f));
  155. object->set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes);
  156. if (vm.exception())
  157. return {};
  158. return object;
  159. }
  160. }