LibJS: Make invalid RegExp flags a SyntaxError at parse time

This patch changes the validation of RegExp flags (checking for
invalid and duplicate values) from a SyntaxError at runtime to a
SyntaxError at parse time - it's not something that's supposed to be
catchable.
As a nice side effect, this simplifies the RegExpObject constructor a
bit, as it can no longer throw an exception and doesn't have to validate
the flags itself.
This commit is contained in:
Linus Groh 2021-05-10 12:01:38 +01:00
parent c93c2dc72c
commit 60064e2049
Notes: sideshowbarker 2024-07-18 18:22:34 +09:00
4 changed files with 22 additions and 33 deletions

View file

@ -6,6 +6,7 @@
*/
#include "Parser.h"
#include <AK/HashTable.h>
#include <AK/ScopeGuard.h>
#include <AK/StdLibExtras.h>
#include <AK/TemporaryChange.h>
@ -686,7 +687,20 @@ NonnullRefPtr<RegExpLiteral> Parser::parse_regexp_literal()
auto pattern = consume().value();
// Remove leading and trailing slash.
pattern = pattern.substring_view(1, pattern.length() - 2);
auto flags = match(TokenType::RegexFlags) ? consume().value() : "";
auto flags = String::empty();
if (match(TokenType::RegexFlags)) {
auto flags_start = position();
flags = consume().value();
HashTable<char> seen_flags;
for (size_t i = 0; i < flags.length(); ++i) {
auto flag = flags.substring_view(i, 1);
if (!flag.is_one_of("g", "i", "m", "s", "u", "y"))
syntax_error(String::formatted("Invalid RegExp flag '{}'", flag), Position { flags_start.line, flags_start.column + i });
if (seen_flags.contains(*flag.characters_without_null_termination()))
syntax_error(String::formatted("Repeated RegExp flag '{}'", flag), Position { flags_start.line, flags_start.column + i });
seen_flags.set(*flag.characters_without_null_termination());
}
}
return create_ast_node<RegExpLiteral>({ m_parser_state.m_current_token.filename(), rule_start.position(), position() }, pattern, flags);
}

View file

@ -138,8 +138,6 @@
M(ReflectBadNewTarget, "Optional third argument of Reflect.construct() must be a constructor") \
M(ReflectBadDescriptorArgument, "Descriptor argument is not an object") \
M(RegExpCompileError, "RegExp compile error: {}") \
M(RegExpObjectBadFlag, "Invalid RegExp flag '{}'") \
M(RegExpObjectRepeatedFlag, "Repeated RegExp flag '{}'") \
M(StringRawCannotConvert, "Cannot convert property 'raw' to object from {}") \
M(StringRepeatCountMustBe, "repeat count must be a {} number") \
M(ThisHasNotBeenInitialized, "|this| has not been initialized") \

View file

@ -13,9 +13,8 @@
namespace JS {
static Flags options_from(const String& flags, VM& vm, GlobalObject& global_object)
static Flags options_from(const String& flags)
{
bool g = false, i = false, m = false, s = false, u = false, y = false;
Flags options {
// JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
// FIXME: Enable 'BrowserExtended' only if in a browser context.
@ -26,44 +25,26 @@ static Flags options_from(const String& flags, VM& vm, GlobalObject& global_obje
for (auto ch : flags) {
switch (ch) {
case 'g':
if (g)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
g = true;
options.effective_flags |= regex::ECMAScriptFlags::Global;
options.declared_flags |= regex::ECMAScriptFlags::Global;
break;
case 'i':
if (i)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
i = true;
options.effective_flags |= regex::ECMAScriptFlags::Insensitive;
options.declared_flags |= regex::ECMAScriptFlags::Insensitive;
break;
case 'm':
if (m)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
m = true;
options.effective_flags |= regex::ECMAScriptFlags::Multiline;
options.declared_flags |= regex::ECMAScriptFlags::Multiline;
break;
case 's':
if (s)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
s = true;
options.effective_flags |= regex::ECMAScriptFlags::SingleLine;
options.declared_flags |= regex::ECMAScriptFlags::SingleLine;
break;
case 'u':
if (u)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
u = true;
options.effective_flags |= regex::ECMAScriptFlags::Unicode;
options.declared_flags |= regex::ECMAScriptFlags::Unicode;
break;
case 'y':
if (y)
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
y = true;
// Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
options.effective_flags.reset_flag(regex::ECMAScriptFlags::Global);
// "What's the difference between sticky and global, then", that's simple.
@ -74,8 +55,8 @@ static Flags options_from(const String& flags, VM& vm, GlobalObject& global_obje
options.declared_flags |= regex::ECMAScriptFlags::Sticky;
break;
default:
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectBadFlag, ch);
return options;
// RegExp flags must be validated by the parser.
VERIFY_NOT_REACHED();
}
}
@ -91,7 +72,7 @@ RegExpObject::RegExpObject(String pattern, String flags, Object& prototype)
: Object(prototype)
, m_pattern(pattern)
, m_flags(flags)
, m_active_flags(options_from(m_flags, this->vm(), this->global_object()))
, m_active_flags(options_from(m_flags))
, m_regex(pattern, m_active_flags.effective_flags)
{
if (m_regex.parser_result.error != regex::Error::NoError) {
@ -167,11 +148,7 @@ RegExpObject* regexp_create(GlobalObject& global_object, Value pattern, Value fl
if (f.is_null())
return nullptr;
}
// FIXME: This is awkward: the RegExpObject C++ constructor may throw a VM exception.
auto* obj = RegExpObject::create(global_object, move(p), move(f));
if (global_object.vm().exception())
return nullptr;
return obj;
return RegExpObject::create(global_object, move(p), move(f));
}
}

View file

@ -49,10 +49,10 @@ test("flag and options", () => {
expect(re.unicode).toBe(false);
expect(() => {
/foo/gg;
Function("/foo/gg");
}).toThrowWithMessage(SyntaxError, "Repeated RegExp flag 'g'");
expect(() => {
/foo/x;
Function("/foo/x");
}).toThrowWithMessage(SyntaxError, "Invalid RegExp flag 'x'");
});