mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-25 00:50:22 +00:00
LibJS: Hook up Regex<ECMA262> to RegExpObject and implement `test()'
This makes RegExpObject compile and store a Regex<ECMA262>, adds all flag-related properties, and implements `RegExpPrototype.test()` (complete with 'lastIndex' support) :^) It should be noted that this only implements `test()' using the builtin `exec()'.
This commit is contained in:
parent
75081b2bdd
commit
8ba273a2f3
Notes:
sideshowbarker
2024-07-19 01:14:31 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/8ba273a2f38 Pull-request: https://github.com/SerenityOS/serenity/pull/4103 Reviewed-by: https://github.com/linusg ✅
13 changed files with 396 additions and 12 deletions
|
@ -97,6 +97,7 @@ enum __RegexAllFlags {
|
|||
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
||||
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
|
||||
__Regex_Internal_Stateful = __Regex_Global << 14, // Internal flag; enables stateful matches.
|
||||
__Regex_Last = __Regex_SkipTrimEmptyMatches
|
||||
};
|
||||
|
||||
|
|
|
@ -79,4 +79,4 @@ set(SOURCES
|
|||
)
|
||||
|
||||
serenity_lib(LibJS js)
|
||||
target_link_libraries(LibJS LibM LibCore LibCrypto)
|
||||
target_link_libraries(LibJS LibM LibCore LibCrypto LibRegex)
|
||||
|
|
|
@ -86,6 +86,7 @@ namespace JS {
|
|||
P(deleteProperty) \
|
||||
P(description) \
|
||||
P(done) \
|
||||
P(dotAll) \
|
||||
P(entries) \
|
||||
P(enumerable) \
|
||||
P(error) \
|
||||
|
@ -96,6 +97,7 @@ namespace JS {
|
|||
P(filter) \
|
||||
P(find) \
|
||||
P(findIndex) \
|
||||
P(flags) \
|
||||
P(floor) \
|
||||
P(forEach) \
|
||||
P(from) \
|
||||
|
@ -122,9 +124,11 @@ namespace JS {
|
|||
P(getUTCMinutes) \
|
||||
P(getUTCMonth) \
|
||||
P(getUTCSeconds) \
|
||||
P(global) \
|
||||
P(globalThis) \
|
||||
P(has) \
|
||||
P(hasOwnProperty) \
|
||||
P(ignoreCase) \
|
||||
P(includes) \
|
||||
P(indexOf) \
|
||||
P(info) \
|
||||
|
@ -138,6 +142,7 @@ namespace JS {
|
|||
P(join) \
|
||||
P(keyFor) \
|
||||
P(keys) \
|
||||
P(lastIndex) \
|
||||
P(lastIndexOf) \
|
||||
P(length) \
|
||||
P(log) \
|
||||
|
@ -146,6 +151,7 @@ namespace JS {
|
|||
P(max) \
|
||||
P(message) \
|
||||
P(min) \
|
||||
P(multiline) \
|
||||
P(name) \
|
||||
P(next) \
|
||||
P(now) \
|
||||
|
@ -174,12 +180,15 @@ namespace JS {
|
|||
P(sin) \
|
||||
P(slice) \
|
||||
P(some) \
|
||||
P(source) \
|
||||
P(splice) \
|
||||
P(sqrt) \
|
||||
P(startsWith) \
|
||||
P(stringify) \
|
||||
P(sticky) \
|
||||
P(substring) \
|
||||
P(tan) \
|
||||
P(test) \
|
||||
P(toDateString) \
|
||||
P(toISOString) \
|
||||
P(toJSON) \
|
||||
|
@ -196,6 +205,7 @@ namespace JS {
|
|||
P(trimStart) \
|
||||
P(trunc) \
|
||||
P(undefined) \
|
||||
P(unicode) \
|
||||
P(unshift) \
|
||||
P(value) \
|
||||
P(valueOf) \
|
||||
|
|
|
@ -146,6 +146,9 @@
|
|||
M(ReflectBadArgumentsList, "Arguments list must be an object") \
|
||||
M(ReflectBadNewTarget, "Optional third argument of Reflect.construct() must be a constructor") \
|
||||
M(ReflectBadDescriptorArgument, "Descriptor argument is not an object") \
|
||||
M(RegExpCompileError, "RegExp compile error: '{}'") \
|
||||
M(RegExpObjectBadFlag, "Invalid RegExp flag '{}'") \
|
||||
M(RegExpObjectRepeatedFlag, "Repeated RegExp flag '{}'") \
|
||||
M(StringRawCannotConvert, "Cannot convert property 'raw' to object from {}") \
|
||||
M(StringRepeatCountMustBe, "repeat count must be a {} number") \
|
||||
M(ThisHasNotBeenInitialized, "|this| has not been initialized") \
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <AK/Function.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibJS/Heap/Heap.h>
|
||||
#include <LibJS/Runtime/GlobalObject.h>
|
||||
|
@ -33,6 +34,73 @@
|
|||
|
||||
namespace JS {
|
||||
|
||||
static Flags options_from(const String& flags, VM& vm, GlobalObject& global_object)
|
||||
{
|
||||
bool g = false, i = false, m = false, s = false, u = false, y = false;
|
||||
Flags options {
|
||||
{ (regex::ECMAScriptFlags)regex::AllFlags::Global }, // JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
|
||||
{},
|
||||
};
|
||||
|
||||
for (auto ch : flags) {
|
||||
switch (ch) {
|
||||
case 'g':
|
||||
if (g)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
g = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Global;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Global;
|
||||
break;
|
||||
case 'i':
|
||||
if (i)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
i = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Insensitive;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Insensitive;
|
||||
break;
|
||||
case 'm':
|
||||
if (m)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
m = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Multiline;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Multiline;
|
||||
break;
|
||||
case 's':
|
||||
if (s)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
s = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::SingleLine;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::SingleLine;
|
||||
break;
|
||||
case 'u':
|
||||
if (u)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
u = true;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Unicode;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Unicode;
|
||||
break;
|
||||
case 'y':
|
||||
if (y)
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
||||
y = true;
|
||||
// Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
|
||||
options.effective_flags.reset_flag(regex::ECMAScriptFlags::Global);
|
||||
// "What's the difference between sticky and global, then", that's simple.
|
||||
// all the other flags imply 'global', and the "global" flag implies 'stateful';
|
||||
// however, the "sticky" flag does *not* imply 'global', only 'stateful'.
|
||||
options.effective_flags |= (regex::ECMAScriptFlags)regex::AllFlags::Internal_Stateful;
|
||||
options.effective_flags |= regex::ECMAScriptFlags::Sticky;
|
||||
options.declared_flags |= regex::ECMAScriptFlags::Sticky;
|
||||
break;
|
||||
default:
|
||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectBadFlag, ch);
|
||||
return options;
|
||||
}
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
RegExpObject* RegExpObject::create(GlobalObject& global_object, String pattern, String flags)
|
||||
{
|
||||
return global_object.heap().allocate<RegExpObject>(global_object, pattern, flags, *global_object.regexp_prototype());
|
||||
|
@ -42,11 +110,61 @@ RegExpObject::RegExpObject(String pattern, String flags, Object& prototype)
|
|||
: Object(prototype)
|
||||
, m_pattern(pattern)
|
||||
, m_flags(flags)
|
||||
, m_active_flags(options_from(m_flags, this->vm(), this->global_object()))
|
||||
, m_regex(pattern, m_active_flags.effective_flags)
|
||||
{
|
||||
if (m_regex.parser_result.error != regex::Error::NoError) {
|
||||
vm().throw_exception<SyntaxError>(global_object(), ErrorType::RegExpCompileError, m_regex.error_string());
|
||||
}
|
||||
}
|
||||
|
||||
void RegExpObject::initialize(GlobalObject& global_object)
|
||||
{
|
||||
auto& vm = this->vm();
|
||||
Object::initialize(global_object);
|
||||
|
||||
define_native_property(vm.names.lastIndex, last_index, set_last_index, Attribute::Writable);
|
||||
}
|
||||
|
||||
RegExpObject::~RegExpObject()
|
||||
{
|
||||
}
|
||||
|
||||
static RegExpObject* regexp_object_from(VM& vm, GlobalObject& global_object)
|
||||
{
|
||||
auto* this_object = vm.this_value(global_object).to_object(global_object);
|
||||
if (!this_object)
|
||||
return nullptr;
|
||||
if (!this_object->is_regexp_object()) {
|
||||
vm.throw_exception<TypeError>(global_object, ErrorType::NotA, "RegExp");
|
||||
return nullptr;
|
||||
}
|
||||
return static_cast<RegExpObject*>(this_object);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpObject::last_index)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value((unsigned)regexp_object->regex().start_offset);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_SETTER(RegExpObject::set_last_index)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return;
|
||||
|
||||
auto index = value.to_i32(global_object);
|
||||
if (vm.exception())
|
||||
return;
|
||||
|
||||
if (index < 0)
|
||||
index = 0;
|
||||
|
||||
regexp_object->regex().start_offset = index;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,6 +28,12 @@
|
|||
|
||||
#include <LibJS/AST.h>
|
||||
#include <LibJS/Runtime/Object.h>
|
||||
#include <LibRegex/Regex.h>
|
||||
|
||||
struct Flags {
|
||||
regex::RegexOptions<ECMAScriptFlags> effective_flags;
|
||||
regex::RegexOptions<ECMAScriptFlags> declared_flags;
|
||||
};
|
||||
|
||||
namespace JS {
|
||||
|
||||
|
@ -38,16 +44,25 @@ public:
|
|||
static RegExpObject* create(GlobalObject&, String pattern, String flags);
|
||||
|
||||
RegExpObject(String pattern, String flags, Object& prototype);
|
||||
virtual void initialize(GlobalObject&) override;
|
||||
virtual ~RegExpObject() override;
|
||||
|
||||
const String& pattern() const { return m_pattern; }
|
||||
const String& flags() const { return m_flags; }
|
||||
const regex::RegexOptions<ECMAScriptFlags>& declared_options() { return m_active_flags.declared_flags; }
|
||||
const Regex<ECMA262>& regex() { return m_regex; }
|
||||
const Regex<ECMA262>& regex() const { return m_regex; }
|
||||
|
||||
private:
|
||||
virtual bool is_regexp_object() const override { return true; }
|
||||
|
||||
JS_DECLARE_NATIVE_GETTER(last_index);
|
||||
JS_DECLARE_NATIVE_SETTER(set_last_index);
|
||||
|
||||
String m_pattern;
|
||||
String m_flags;
|
||||
Flags m_active_flags;
|
||||
Regex<ECMA262> m_regex;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -43,6 +43,17 @@ void RegExpPrototype::initialize(GlobalObject& global_object)
|
|||
Object::initialize(global_object);
|
||||
u8 attr = Attribute::Writable | Attribute::Configurable;
|
||||
define_native_function(vm.names.toString, to_string, 0, attr);
|
||||
define_native_function(vm.names.test, test, 1, attr);
|
||||
|
||||
u8 readable_attr = Attribute::Configurable;
|
||||
define_native_property(vm.names.dotAll, dot_all, nullptr, readable_attr);
|
||||
define_native_property(vm.names.flags, flags, nullptr, readable_attr);
|
||||
define_native_property(vm.names.global, global, nullptr, readable_attr);
|
||||
define_native_property(vm.names.ignoreCase, ignore_case, nullptr, readable_attr);
|
||||
define_native_property(vm.names.multiline, multiline, nullptr, readable_attr);
|
||||
define_native_property(vm.names.source, source, nullptr, readable_attr);
|
||||
define_native_property(vm.names.sticky, sticky, nullptr, readable_attr);
|
||||
define_native_property(vm.names.unicode, unicode, nullptr, readable_attr);
|
||||
}
|
||||
|
||||
RegExpPrototype::~RegExpPrototype()
|
||||
|
@ -61,6 +72,124 @@ static RegExpObject* regexp_object_from(VM& vm, GlobalObject& global_object)
|
|||
return static_cast<RegExpObject*>(this_object);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::dot_all)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::SingleLine));
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::flags)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
auto flags = regexp_object->declared_options();
|
||||
StringBuilder builder(8);
|
||||
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Global))
|
||||
builder.append('g');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Insensitive))
|
||||
builder.append('i');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Multiline))
|
||||
builder.append('m');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::SingleLine))
|
||||
builder.append('s');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Unicode))
|
||||
builder.append('u');
|
||||
if (flags.has_flag_set(ECMAScriptFlags::Sticky))
|
||||
builder.append('y');
|
||||
|
||||
return js_string(vm, builder.to_string());
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::global)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Global)); // Note that this "Global" is actually "Global | Stateful"
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::ignore_case)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Insensitive));
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::multiline)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Multiline));
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::source)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return js_string(vm, regexp_object->pattern());
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::sticky)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Sticky));
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_GETTER(RegExpPrototype::unicode)
|
||||
{
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
return Value(regexp_object->declared_options().has_flag_set(ECMAScriptFlags::Unicode));
|
||||
}
|
||||
|
||||
RegexResult RegExpPrototype::do_match(const Regex<ECMA262>& re, const StringView& subject)
|
||||
{
|
||||
auto result = re.match(subject);
|
||||
// The 'lastIndex' property is reset on failing tests (if 'global')
|
||||
if (!result.success && re.options().has_flag_set(ECMAScriptFlags::Global))
|
||||
re.start_offset = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::test)
|
||||
{
|
||||
// FIXME: This should try using dynamic properties for 'exec' first,
|
||||
// before falling back to builtin_exec.
|
||||
auto regexp_object = regexp_object_from(vm, global_object);
|
||||
if (!regexp_object)
|
||||
return {};
|
||||
|
||||
auto str = vm.argument(0).to_string(global_object);
|
||||
if (vm.exception())
|
||||
return {};
|
||||
|
||||
// RegExps without "global" and "sticky" always start at offset 0.
|
||||
if (!regexp_object->regex().options().has_flag_set((ECMAScriptFlags)regex::AllFlags::Internal_Stateful))
|
||||
regexp_object->regex().start_offset = 0;
|
||||
|
||||
auto result = do_match(regexp_object->regex(), str);
|
||||
return Value(result.success);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::to_string)
|
||||
{
|
||||
auto* regexp_object = regexp_object_from(vm, global_object);
|
||||
|
|
|
@ -39,6 +39,18 @@ public:
|
|||
virtual ~RegExpPrototype() override;
|
||||
|
||||
private:
|
||||
static RegexResult do_match(const Regex<ECMA262>&, const StringView&);
|
||||
|
||||
JS_DECLARE_NATIVE_GETTER(dot_all);
|
||||
JS_DECLARE_NATIVE_GETTER(flags);
|
||||
JS_DECLARE_NATIVE_GETTER(global);
|
||||
JS_DECLARE_NATIVE_GETTER(ignore_case);
|
||||
JS_DECLARE_NATIVE_GETTER(multiline);
|
||||
JS_DECLARE_NATIVE_GETTER(source);
|
||||
JS_DECLARE_NATIVE_GETTER(sticky);
|
||||
JS_DECLARE_NATIVE_GETTER(unicode);
|
||||
|
||||
JS_DECLARE_NATIVE_FUNCTION(test);
|
||||
JS_DECLARE_NATIVE_FUNCTION(to_string);
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
test("basic functionality", () => {
|
||||
expect(RegExp.prototype.test).toHaveLength(1);
|
||||
});
|
||||
|
||||
test("simple test", () => {
|
||||
let re = /test/;
|
||||
expect(re.test("test")).toBe(true);
|
||||
expect(re.test("test")).toBe(true);
|
||||
});
|
||||
|
||||
test("simple global test", () => {
|
||||
let re = /test/g;
|
||||
expect(re.test("testtest")).toBe(true);
|
||||
expect(re.test("testtest")).toBe(true);
|
||||
expect(re.test("testtest")).toBe(false);
|
||||
expect(re.test("testtest")).toBe(true);
|
||||
expect(re.test("testtest")).toBe(true);
|
||||
});
|
||||
|
||||
test("global test with offset lastIndex", () => {
|
||||
let re = /test/g;
|
||||
re.lastIndex = 2;
|
||||
expect(re.test("testtest")).toBe(true);
|
||||
expect(re.test("testtest")).toBe(false);
|
||||
expect(re.test("testtest")).toBe(true);
|
||||
expect(re.test("testtest")).toBe(true);
|
||||
expect(re.test("testtest")).toBe(false);
|
||||
});
|
||||
|
||||
test("sticky test with offset lastIndex", () => {
|
||||
let re = /test/y;
|
||||
re.lastIndex = 2;
|
||||
expect(re.test("aatest")).toBe(true);
|
||||
expect(re.test("aatest")).toBe(false);
|
||||
expect(re.test("aatest")).toBe(false);
|
||||
});
|
||||
|
||||
test("flag and options", () => {
|
||||
expect(/foo/gi.flags).toBe("gi");
|
||||
expect(/foo/mu.flags).toBe("mu");
|
||||
expect(/foo/gimsuy.flags).toBe("gimsuy");
|
||||
|
||||
let re = /foo/gim;
|
||||
expect(re.dotAll).toBe(false);
|
||||
expect(re.global).toBe(true);
|
||||
expect(re.ignoreCase).toBe(true);
|
||||
expect(re.multiline).toBe(true);
|
||||
expect(re.sticky).toBe(false);
|
||||
expect(re.unicode).toBe(false);
|
||||
|
||||
expect(() => {
|
||||
/foo/gg;
|
||||
}).toThrowWithMessage(SyntaxError, "Repeated RegExp flag 'g'");
|
||||
|
||||
expect(() => {
|
||||
/foo/x;
|
||||
}).toThrowWithMessage(SyntaxError, "Invalid RegExp flag 'x'");
|
||||
});
|
|
@ -261,6 +261,7 @@ public:
|
|||
struct MatchInput {
|
||||
RegexStringView view { nullptr };
|
||||
AllOptions regex_options {};
|
||||
size_t start_offset { 0 }; // For Stateful matches, saved and restored from Regex::start_offset.
|
||||
|
||||
size_t match_index { 0 };
|
||||
size_t line { 0 };
|
||||
|
|
|
@ -52,6 +52,15 @@ Regex<Parser>::Regex(StringView pattern, typename ParserTraits<Parser>::OptionsT
|
|||
}
|
||||
}
|
||||
|
||||
template<class Parser>
|
||||
typename ParserTraits<Parser>::OptionsType Regex<Parser>::options() const
|
||||
{
|
||||
if (parser_result.error != Error::NoError)
|
||||
return {};
|
||||
|
||||
return matcher->options();
|
||||
}
|
||||
|
||||
template<class Parser>
|
||||
String Regex<Parser>::error_string(Optional<String> message) const
|
||||
{
|
||||
|
@ -81,6 +90,10 @@ RegexResult Matcher<Parser>::match(const RegexStringView& view, Optional<typenam
|
|||
template<typename Parser>
|
||||
RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options) const
|
||||
{
|
||||
// If the pattern *itself* isn't stateful, reset any changes to start_offset.
|
||||
if (!((AllFlags)m_regex_options.value() & AllFlags::Internal_Stateful))
|
||||
m_pattern.start_offset = 0;
|
||||
|
||||
size_t match_count { 0 };
|
||||
|
||||
MatchInput input;
|
||||
|
@ -88,8 +101,12 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
MatchOutput output;
|
||||
|
||||
input.regex_options = m_regex_options | regex_options.value_or({}).value();
|
||||
input.start_offset = m_pattern.start_offset;
|
||||
output.operations = 0;
|
||||
|
||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
ASSERT(views.size() == 1);
|
||||
|
||||
if (c_match_preallocation_count) {
|
||||
output.matches.ensure_capacity(c_match_preallocation_count);
|
||||
output.capture_group_matches.ensure_capacity(c_match_preallocation_count);
|
||||
|
@ -115,7 +132,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
output.matches.empend();
|
||||
|
||||
ASSERT(start_position + state.string_position - start_position <= input.view.length());
|
||||
if (input.regex_options & AllFlags::StringCopyMatches) {
|
||||
if (input.regex_options.has_flag_set(AllFlags::StringCopyMatches)) {
|
||||
output.matches.at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position).to_string(), input.line, start_position, input.global_offset + start_position };
|
||||
} else { // let the view point to the original string ...
|
||||
output.matches.at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position), input.line, start_position, input.global_offset + start_position };
|
||||
|
@ -126,7 +143,9 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
s_regex_dbg.print_header();
|
||||
#endif
|
||||
|
||||
bool continue_search = (input.regex_options & AllFlags::Global) || (input.regex_options & AllFlags::Multiline);
|
||||
bool continue_search = input.regex_options.has_flag_set(AllFlags::Global) || input.regex_options.has_flag_set(AllFlags::Multiline);
|
||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
continue_search = false;
|
||||
|
||||
for (auto& view : views) {
|
||||
input.view = view;
|
||||
|
@ -135,7 +154,9 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
#endif
|
||||
|
||||
auto view_length = view.length();
|
||||
for (size_t view_index = 0; view_index < view_length; ++view_index) {
|
||||
size_t view_index = m_pattern.start_offset;
|
||||
state.string_position = view_index;
|
||||
for (; view_index < view_length; ++view_index) {
|
||||
auto& match_length_minimum = m_pattern.parser_result.match_length_minimum;
|
||||
// FIXME: More performant would be to know the remaining minimum string
|
||||
// length needed to match from the current position onwards within
|
||||
|
@ -158,12 +179,12 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
|
||||
if (success.value()) {
|
||||
|
||||
if ((input.regex_options & AllFlags::MatchNotEndOfLine) && state.string_position == input.view.length()) {
|
||||
if (input.regex_options.has_flag_set(AllFlags::MatchNotEndOfLine) && state.string_position == input.view.length()) {
|
||||
if (!continue_search)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
if ((input.regex_options & AllFlags::MatchNotBeginOfLine) && view_index == 0) {
|
||||
if (input.regex_options.has_flag_set(AllFlags::MatchNotBeginOfLine) && view_index == 0) {
|
||||
if (!continue_search)
|
||||
break;
|
||||
continue;
|
||||
|
@ -182,26 +203,34 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
view_index = state.string_position - (has_zero_length ? 0 : 1);
|
||||
continue;
|
||||
|
||||
} else if (!continue_search && state.string_position < view_length)
|
||||
} else if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
|
||||
append_match(input, state, output, view_index);
|
||||
break;
|
||||
|
||||
} else if (state.string_position < view_length) {
|
||||
return { false, 0, {}, {}, {}, output.operations };
|
||||
}
|
||||
|
||||
append_match(input, state, output, view_index);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!continue_search)
|
||||
if (!continue_search && !input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
break;
|
||||
}
|
||||
|
||||
++input.line;
|
||||
input.global_offset += view.length() + 1; // +1 includes the line break character
|
||||
|
||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
m_pattern.start_offset = state.string_position;
|
||||
}
|
||||
|
||||
MatchOutput output_copy;
|
||||
if (match_count) {
|
||||
auto capture_groups_count = min(output.capture_group_matches.size(), output.matches.size());
|
||||
for (size_t i = 0; i < capture_groups_count; ++i) {
|
||||
if (input.regex_options & AllFlags::SkipTrimEmptyMatches) {
|
||||
if (input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) {
|
||||
output_copy.capture_group_matches.append(output.capture_group_matches.at(i));
|
||||
} else {
|
||||
Vector<Match> capture_group_matches;
|
||||
|
|
|
@ -73,6 +73,11 @@ public:
|
|||
RegexResult match(const RegexStringView&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
||||
RegexResult match(const Vector<RegexStringView>, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
||||
|
||||
typename ParserTraits<Parser>::OptionsType options() const
|
||||
{
|
||||
return m_regex_options;
|
||||
}
|
||||
|
||||
private:
|
||||
Optional<bool> execute(const MatchInput& input, MatchState& state, MatchOutput& output, size_t recursion_level) const;
|
||||
ALWAYS_INLINE Optional<bool> execute_low_prio_forks(const MatchInput& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const;
|
||||
|
@ -87,10 +92,12 @@ public:
|
|||
String pattern_value;
|
||||
regex::Parser::Result parser_result;
|
||||
OwnPtr<Matcher<Parser>> matcher { nullptr };
|
||||
mutable size_t start_offset { 0 };
|
||||
|
||||
explicit Regex(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
||||
~Regex() = default;
|
||||
|
||||
typename ParserTraits<Parser>::OptionsType options() const;
|
||||
void print_bytecode(FILE* f = stdout) const;
|
||||
String error_string(Optional<String> message = {}) const;
|
||||
|
||||
|
|
|
@ -53,7 +53,8 @@ enum class AllFlags {
|
|||
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
||||
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
||||
Last = SkipTrimEmptyMatches
|
||||
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
||||
Last = Internal_Stateful,
|
||||
};
|
||||
|
||||
enum class PosixFlags : FlagsUnderlyingType {
|
||||
|
@ -72,7 +73,7 @@ enum class PosixFlags : FlagsUnderlyingType {
|
|||
};
|
||||
|
||||
enum class ECMAScriptFlags : FlagsUnderlyingType {
|
||||
Global = (FlagsUnderlyingType)AllFlags::Global,
|
||||
Global = (FlagsUnderlyingType)AllFlags::Global | (FlagsUnderlyingType)AllFlags::Internal_Stateful, // Note: ECMAScript "Global" creates a stateful regex.
|
||||
Insensitive = (FlagsUnderlyingType)AllFlags::Insensitive,
|
||||
Ungreedy = (FlagsUnderlyingType)AllFlags::Ungreedy,
|
||||
Unicode = (FlagsUnderlyingType)AllFlags::Unicode,
|
||||
|
@ -123,7 +124,7 @@ public:
|
|||
void reset_flags() { m_flags = (T)0; }
|
||||
void reset_flag(T flag) { m_flags = (T)((FlagsUnderlyingType)m_flags & ~(FlagsUnderlyingType)flag); }
|
||||
void set_flag(T flag) { *this |= flag; }
|
||||
bool has_flag_set(T flag) const { return *this & flag; }
|
||||
bool has_flag_set(T flag) const { return (FlagsUnderlyingType)flag == ((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag); }
|
||||
T value() const { return m_flags; }
|
||||
|
||||
private:
|
||||
|
|
Loading…
Reference in a new issue