LibRegex: Use the correct capture group index in ERE bytecode generation

Otherwise the left and right capture instructions wouldn't point to the
same capture group if there was another nested group there.
This commit is contained in:
Ali Mohammad Pur 2021-09-07 14:33:06 +04:30 committed by Andreas Kling
parent 44b8afdbc4
commit 7fefb8148b
Notes: sideshowbarker 2024-07-18 04:30:23 +09:00
2 changed files with 22 additions and 9 deletions

View file

@ -485,6 +485,18 @@ TEST_CASE(simple_period_end_benchmark)
EXPECT_EQ(re.search("hello?", m), true);
}
TEST_CASE(posix_extended_nested_capture_group)
{
Regex<PosixExtended> re("(h(e(?<llo>llo)))"); // group 0 -> "hello", group 1 -> "ello", group 2/"llo" -> "llo"
auto result = re.match("hello");
EXPECT(result.success);
EXPECT_EQ(result.capture_group_matches.size(), 1u);
EXPECT_EQ(result.capture_group_matches[0].size(), 3u);
EXPECT_EQ(result.capture_group_matches[0][0].view, "hello"sv);
EXPECT_EQ(result.capture_group_matches[0][1].view, "ello"sv);
EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv);
}
TEST_CASE(ECMA262_parse)
{
struct _test {

View file

@ -799,6 +799,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
last_token = consume();
}
capture_group_name = StringView(start_token.value().characters_without_null_termination(), capture_group_name_length);
++m_parser_state.named_capture_groups_count;
} else if (match(TokenType::EqualSign)) { // positive lookahead
consume();
@ -817,8 +818,11 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
}
}
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group))
bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count);
auto current_capture_group = m_parser_state.capture_groups_count;
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
bytecode.insert_bytecode_group_capture_left(current_capture_group);
m_parser_state.capture_groups_count++;
}
ByteCode capture_group_bytecode;
@ -846,13 +850,10 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
consume(TokenType::RightParen, Error::MismatchingParen);
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
if (capture_group_name.has_value()) {
bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count, capture_group_name.value());
++m_parser_state.named_capture_groups_count;
} else {
bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count);
}
++m_parser_state.capture_groups_count;
if (capture_group_name.has_value())
bytecode.insert_bytecode_group_capture_right(current_capture_group, capture_group_name.value());
else
bytecode.insert_bytecode_group_capture_right(current_capture_group);
}
should_parse_repetition_symbol = true;
break;