mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
LibRegex: Fix parsing identity escape sequences
Also fixes the propagation of default options (the previous implementation reset them to zero before parsing...). Partially deals with #4189.
This commit is contained in:
parent
e83e7a03c2
commit
801750b95a
Notes:
sideshowbarker
2024-07-19 01:13:47 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/801750b95a6 Pull-request: https://github.com/SerenityOS/serenity/pull/4194 Issue: https://github.com/SerenityOS/serenity/issues/4186 Issue: https://github.com/SerenityOS/serenity/issues/4189
1 changed files with 24 additions and 7 deletions
|
@ -115,7 +115,6 @@ ALWAYS_INLINE void Parser::reset()
|
|||
m_parser_state.current_token = m_parser_state.lexer.next();
|
||||
m_parser_state.error = Error::NoError;
|
||||
m_parser_state.error_token = { TokenType::Eof, 0, StringView(nullptr) };
|
||||
m_parser_state.regex_options = {};
|
||||
}
|
||||
|
||||
Parser::Result Parser::parse(Optional<AllOptions> regex_options)
|
||||
|
@ -621,7 +620,7 @@ bool PosixExtendedParser::parse_root(ByteCode& stack, size_t& match_length_minim
|
|||
|
||||
bool ECMA262Parser::parse_internal(ByteCode& stack, size_t& match_length_minimum)
|
||||
{
|
||||
if (m_parser_state.regex_options & AllFlags::Unicode) {
|
||||
if (m_parser_state.regex_options.has_flag_set(AllFlags::Unicode)) {
|
||||
return parse_pattern(stack, match_length_minimum, true, true);
|
||||
} else {
|
||||
ByteCode new_stack;
|
||||
|
@ -918,6 +917,13 @@ bool ECMA262Parser::parse_quantifier(ByteCode& stack, size_t& match_length_minim
|
|||
|
||||
bool ECMA262Parser::parse_atom(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
|
||||
{
|
||||
if (match(TokenType::EscapeSequence)) {
|
||||
// Also part of AtomEscape.
|
||||
auto token = consume();
|
||||
match_length_minimum += 1;
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token.value()[0] } });
|
||||
return true;
|
||||
}
|
||||
if (try_skip("\\")) {
|
||||
// AtomEscape.
|
||||
return parse_atom_escape(stack, match_length_minimum, unicode, named);
|
||||
|
@ -1035,11 +1041,20 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
|||
}
|
||||
|
||||
// IdentityEscape
|
||||
if (match(TokenType::EscapeSequence)) {
|
||||
match_length_minimum += 1;
|
||||
auto token = consume().value();
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)token[token.length() - 1] } });
|
||||
return true;
|
||||
for (auto ch : StringView { "^$\\.*+?()[]{}|" }) {
|
||||
if (try_skip({ &ch, 1 })) {
|
||||
match_length_minimum += 1;
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)ch } });
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (unicode) {
|
||||
if (try_skip("/")) {
|
||||
match_length_minimum += 1;
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'/' } });
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (named && try_skip("k")) {
|
||||
|
@ -1171,6 +1186,8 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
|
|||
return { { .code_point = '\v', .is_character_class = false } };
|
||||
if (try_skip("b"))
|
||||
return { { .code_point = '\b', .is_character_class = false } };
|
||||
if (try_skip("/"))
|
||||
return { { .code_point = '/', .is_character_class = false } };
|
||||
|
||||
// CharacterEscape > ControlLetter
|
||||
if (try_skip("c")) {
|
||||
|
|
Loading…
Reference in a new issue