LibJS: Make invalid RegExp flags a SyntaxError at parse time
This patch changes the validation of RegExp flags (checking for invalid and duplicate values) from a SyntaxError at runtime to a SyntaxError at parse time - it's not something that's supposed to be catchable. As a nice side effect, this simplifies the RegExpObject constructor a bit, as it can no longer throw an exception and doesn't have to validate the flags itself.
This commit is contained in:
parent
c93c2dc72c
commit
60064e2049
Notes:
sideshowbarker
2024-07-18 18:22:34 +09:00
4 changed files with 22 additions and 33 deletions
|
@ -6,6 +6,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "Parser.h"
|
#include "Parser.h"
|
||||||
|
#include <AK/HashTable.h>
|
||||||
#include <AK/ScopeGuard.h>
|
#include <AK/ScopeGuard.h>
|
||||||
#include <AK/StdLibExtras.h>
|
#include <AK/StdLibExtras.h>
|
||||||
#include <AK/TemporaryChange.h>
|
#include <AK/TemporaryChange.h>
|
||||||
|
@ -686,7 +687,20 @@ NonnullRefPtr<RegExpLiteral> Parser::parse_regexp_literal()
|
||||||
auto pattern = consume().value();
|
auto pattern = consume().value();
|
||||||
// Remove leading and trailing slash.
|
// Remove leading and trailing slash.
|
||||||
pattern = pattern.substring_view(1, pattern.length() - 2);
|
pattern = pattern.substring_view(1, pattern.length() - 2);
|
||||||
auto flags = match(TokenType::RegexFlags) ? consume().value() : "";
|
auto flags = String::empty();
|
||||||
|
if (match(TokenType::RegexFlags)) {
|
||||||
|
auto flags_start = position();
|
||||||
|
flags = consume().value();
|
||||||
|
HashTable<char> seen_flags;
|
||||||
|
for (size_t i = 0; i < flags.length(); ++i) {
|
||||||
|
auto flag = flags.substring_view(i, 1);
|
||||||
|
if (!flag.is_one_of("g", "i", "m", "s", "u", "y"))
|
||||||
|
syntax_error(String::formatted("Invalid RegExp flag '{}'", flag), Position { flags_start.line, flags_start.column + i });
|
||||||
|
if (seen_flags.contains(*flag.characters_without_null_termination()))
|
||||||
|
syntax_error(String::formatted("Repeated RegExp flag '{}'", flag), Position { flags_start.line, flags_start.column + i });
|
||||||
|
seen_flags.set(*flag.characters_without_null_termination());
|
||||||
|
}
|
||||||
|
}
|
||||||
return create_ast_node<RegExpLiteral>({ m_parser_state.m_current_token.filename(), rule_start.position(), position() }, pattern, flags);
|
return create_ast_node<RegExpLiteral>({ m_parser_state.m_current_token.filename(), rule_start.position(), position() }, pattern, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -138,8 +138,6 @@
|
||||||
M(ReflectBadNewTarget, "Optional third argument of Reflect.construct() must be a constructor") \
|
M(ReflectBadNewTarget, "Optional third argument of Reflect.construct() must be a constructor") \
|
||||||
M(ReflectBadDescriptorArgument, "Descriptor argument is not an object") \
|
M(ReflectBadDescriptorArgument, "Descriptor argument is not an object") \
|
||||||
M(RegExpCompileError, "RegExp compile error: {}") \
|
M(RegExpCompileError, "RegExp compile error: {}") \
|
||||||
M(RegExpObjectBadFlag, "Invalid RegExp flag '{}'") \
|
|
||||||
M(RegExpObjectRepeatedFlag, "Repeated RegExp flag '{}'") \
|
|
||||||
M(StringRawCannotConvert, "Cannot convert property 'raw' to object from {}") \
|
M(StringRawCannotConvert, "Cannot convert property 'raw' to object from {}") \
|
||||||
M(StringRepeatCountMustBe, "repeat count must be a {} number") \
|
M(StringRepeatCountMustBe, "repeat count must be a {} number") \
|
||||||
M(ThisHasNotBeenInitialized, "|this| has not been initialized") \
|
M(ThisHasNotBeenInitialized, "|this| has not been initialized") \
|
||||||
|
|
|
@ -13,9 +13,8 @@
|
||||||
|
|
||||||
namespace JS {
|
namespace JS {
|
||||||
|
|
||||||
static Flags options_from(const String& flags, VM& vm, GlobalObject& global_object)
|
static Flags options_from(const String& flags)
|
||||||
{
|
{
|
||||||
bool g = false, i = false, m = false, s = false, u = false, y = false;
|
|
||||||
Flags options {
|
Flags options {
|
||||||
// JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
|
// JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
|
||||||
// FIXME: Enable 'BrowserExtended' only if in a browser context.
|
// FIXME: Enable 'BrowserExtended' only if in a browser context.
|
||||||
|
@ -26,44 +25,26 @@ static Flags options_from(const String& flags, VM& vm, GlobalObject& global_obje
|
||||||
for (auto ch : flags) {
|
for (auto ch : flags) {
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case 'g':
|
case 'g':
|
||||||
if (g)
|
|
||||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
|
||||||
g = true;
|
|
||||||
options.effective_flags |= regex::ECMAScriptFlags::Global;
|
options.effective_flags |= regex::ECMAScriptFlags::Global;
|
||||||
options.declared_flags |= regex::ECMAScriptFlags::Global;
|
options.declared_flags |= regex::ECMAScriptFlags::Global;
|
||||||
break;
|
break;
|
||||||
case 'i':
|
case 'i':
|
||||||
if (i)
|
|
||||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
|
||||||
i = true;
|
|
||||||
options.effective_flags |= regex::ECMAScriptFlags::Insensitive;
|
options.effective_flags |= regex::ECMAScriptFlags::Insensitive;
|
||||||
options.declared_flags |= regex::ECMAScriptFlags::Insensitive;
|
options.declared_flags |= regex::ECMAScriptFlags::Insensitive;
|
||||||
break;
|
break;
|
||||||
case 'm':
|
case 'm':
|
||||||
if (m)
|
|
||||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
|
||||||
m = true;
|
|
||||||
options.effective_flags |= regex::ECMAScriptFlags::Multiline;
|
options.effective_flags |= regex::ECMAScriptFlags::Multiline;
|
||||||
options.declared_flags |= regex::ECMAScriptFlags::Multiline;
|
options.declared_flags |= regex::ECMAScriptFlags::Multiline;
|
||||||
break;
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
if (s)
|
|
||||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
|
||||||
s = true;
|
|
||||||
options.effective_flags |= regex::ECMAScriptFlags::SingleLine;
|
options.effective_flags |= regex::ECMAScriptFlags::SingleLine;
|
||||||
options.declared_flags |= regex::ECMAScriptFlags::SingleLine;
|
options.declared_flags |= regex::ECMAScriptFlags::SingleLine;
|
||||||
break;
|
break;
|
||||||
case 'u':
|
case 'u':
|
||||||
if (u)
|
|
||||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
|
||||||
u = true;
|
|
||||||
options.effective_flags |= regex::ECMAScriptFlags::Unicode;
|
options.effective_flags |= regex::ECMAScriptFlags::Unicode;
|
||||||
options.declared_flags |= regex::ECMAScriptFlags::Unicode;
|
options.declared_flags |= regex::ECMAScriptFlags::Unicode;
|
||||||
break;
|
break;
|
||||||
case 'y':
|
case 'y':
|
||||||
if (y)
|
|
||||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectRepeatedFlag, ch);
|
|
||||||
y = true;
|
|
||||||
// Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
|
// Now for the more interesting flag, 'sticky' actually unsets 'global', part of which is the default.
|
||||||
options.effective_flags.reset_flag(regex::ECMAScriptFlags::Global);
|
options.effective_flags.reset_flag(regex::ECMAScriptFlags::Global);
|
||||||
// "What's the difference between sticky and global, then", that's simple.
|
// "What's the difference between sticky and global, then", that's simple.
|
||||||
|
@ -74,8 +55,8 @@ static Flags options_from(const String& flags, VM& vm, GlobalObject& global_obje
|
||||||
options.declared_flags |= regex::ECMAScriptFlags::Sticky;
|
options.declared_flags |= regex::ECMAScriptFlags::Sticky;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vm.throw_exception<SyntaxError>(global_object, ErrorType::RegExpObjectBadFlag, ch);
|
// RegExp flags must be validated by the parser.
|
||||||
return options;
|
VERIFY_NOT_REACHED();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,7 +72,7 @@ RegExpObject::RegExpObject(String pattern, String flags, Object& prototype)
|
||||||
: Object(prototype)
|
: Object(prototype)
|
||||||
, m_pattern(pattern)
|
, m_pattern(pattern)
|
||||||
, m_flags(flags)
|
, m_flags(flags)
|
||||||
, m_active_flags(options_from(m_flags, this->vm(), this->global_object()))
|
, m_active_flags(options_from(m_flags))
|
||||||
, m_regex(pattern, m_active_flags.effective_flags)
|
, m_regex(pattern, m_active_flags.effective_flags)
|
||||||
{
|
{
|
||||||
if (m_regex.parser_result.error != regex::Error::NoError) {
|
if (m_regex.parser_result.error != regex::Error::NoError) {
|
||||||
|
@ -167,11 +148,7 @@ RegExpObject* regexp_create(GlobalObject& global_object, Value pattern, Value fl
|
||||||
if (f.is_null())
|
if (f.is_null())
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
// FIXME: This is awkward: the RegExpObject C++ constructor may throw a VM exception.
|
return RegExpObject::create(global_object, move(p), move(f));
|
||||||
auto* obj = RegExpObject::create(global_object, move(p), move(f));
|
|
||||||
if (global_object.vm().exception())
|
|
||||||
return nullptr;
|
|
||||||
return obj;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,10 +49,10 @@ test("flag and options", () => {
|
||||||
expect(re.unicode).toBe(false);
|
expect(re.unicode).toBe(false);
|
||||||
|
|
||||||
expect(() => {
|
expect(() => {
|
||||||
/foo/gg;
|
Function("/foo/gg");
|
||||||
}).toThrowWithMessage(SyntaxError, "Repeated RegExp flag 'g'");
|
}).toThrowWithMessage(SyntaxError, "Repeated RegExp flag 'g'");
|
||||||
|
|
||||||
expect(() => {
|
expect(() => {
|
||||||
/foo/x;
|
Function("/foo/x");
|
||||||
}).toThrowWithMessage(SyntaxError, "Invalid RegExp flag 'x'");
|
}).toThrowWithMessage(SyntaxError, "Invalid RegExp flag 'x'");
|
||||||
});
|
});
|
||||||
|
|
Loading…
Add table
Reference in a new issue