LibRegex+LibJS: Avoid searching for more than one match in JS RegExps
All of JS's regular expression APIs only want a single match, so avoid trying to produce more (which will be discarded anyway).
This commit is contained in:
parent
4c506f91fe
commit
2b028f6faa
Notes:
sideshowbarker
2024-07-17 19:47:24 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/2b028f6faa3 Pull-request: https://github.com/SerenityOS/serenity/pull/12287
5 changed files with 28 additions and 5 deletions
|
@ -990,3 +990,15 @@ TEST_CASE(negative_lookahead)
|
|||
EXPECT_EQ(re.match(":foobar").success, true);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(single_match_flag)
|
||||
{
|
||||
{
|
||||
// Ensure that only a single match is produced and nothing past that.
|
||||
Regex<ECMA262> re("[\\u0008-\\uffff]"sv, ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
|
||||
auto result = re.match("ABC");
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.matches.size(), 1u);
|
||||
EXPECT_EQ(result.matches.first().view.to_string(), "A"sv);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,10 +81,11 @@ enum __RegexAllFlags {
|
|||
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
||||
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
|
||||
__Regex_Internal_Stateful = __Regex_Global << 14, // Internal flag; enables stateful matches.
|
||||
__Regex_Internal_BrowserExtended = __Regex_Global << 15, // Internal flag; enable browser-specific ECMA262 extensions.
|
||||
__Regex_Internal_ConsiderNewline = __Regex_Global << 16, // Internal flag; allow matchers to consider newlines as line separators.
|
||||
__Regex_Last = __Regex_SkipTrimEmptyMatches
|
||||
__Regex_SingleMatch = __Regex_Global << 14, // Stop after acquiring a single match.
|
||||
__Regex_Internal_Stateful = __Regex_Global << 15, // Internal flag; enables stateful matches.
|
||||
__Regex_Internal_BrowserExtended = __Regex_Global << 16, // Internal flag; enable browser-specific ECMA262 extensions.
|
||||
__Regex_Internal_ConsiderNewline = __Regex_Global << 17, // Internal flag; allow matchers to consider newlines as line separators.
|
||||
__Regex_Last = __Regex_SingleMatch
|
||||
};
|
||||
|
||||
// Values for the cflags parameter to the regcomp() function:
|
||||
|
|
|
@ -25,7 +25,12 @@ class RegExpObject : public Object {
|
|||
public:
|
||||
// JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
|
||||
// FIXME: Enable 'BrowserExtended' only if in a browser context.
|
||||
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags { (regex::ECMAScriptFlags)regex::AllFlags::Global | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches | regex::ECMAScriptFlags::BrowserExtended };
|
||||
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags {
|
||||
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
||||
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
||||
| (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
|
||||
| regex::ECMAScriptFlags::BrowserExtended
|
||||
};
|
||||
|
||||
static RegExpObject* create(GlobalObject&);
|
||||
static RegExpObject* create(GlobalObject&, Regex<ECMA262> regex, String pattern, String flags);
|
||||
|
|
|
@ -183,6 +183,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
|||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
continue_search = false;
|
||||
|
||||
auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch);
|
||||
|
||||
for (auto const& view : views) {
|
||||
if (lines_to_skip != 0) {
|
||||
++input.line;
|
||||
|
@ -276,6 +278,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
|||
|
||||
bool has_zero_length = state.string_position == view_index;
|
||||
view_index = state.string_position - (has_zero_length ? 0 : 1);
|
||||
if (single_match_only)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
|
||||
|
|
|
@ -33,6 +33,7 @@ enum class AllFlags {
|
|||
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
||||
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
||||
SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
|
||||
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
||||
Internal_BrowserExtended = __Regex_Internal_BrowserExtended, // Only for ECMA262, Enable the behaviors defined in section B.1.4. of the ECMA262 spec.
|
||||
Internal_ConsiderNewline = __Regex_Internal_ConsiderNewline, // Only for ECMA262, Allow multiline matches to consider newlines as line boundaries.
|
||||
|
|
Loading…
Add table
Reference in a new issue