From dabd60180f301ae206d6e23e0e0ff17d6c8975af Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Tue, 12 Nov 2024 09:18:38 +0100 Subject: [PATCH] LibRegex: Don't ignore references that weren't bound in checked blocks Fixes #2281. --- Libraries/LibRegex/RegexOptimizer.cpp | 18 +----------------- Tests/LibRegex/Regex.cpp | 1 + 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/Libraries/LibRegex/RegexOptimizer.cpp b/Libraries/LibRegex/RegexOptimizer.cpp index a23c9529629..857c4bbaec6 100644 --- a/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Libraries/LibRegex/RegexOptimizer.cpp @@ -510,7 +510,6 @@ enum class AtomicRewritePreconditionResult { static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block) { Vector> repeated_values; - HashTable active_capture_groups; MatchState state; auto has_seen_actionable_opcode = false; for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) { @@ -536,12 +535,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi case OpCodeId::Restore: case OpCodeId::GoBack: return AtomicRewritePreconditionResult::NotSatisfied; - case OpCodeId::SaveRightCaptureGroup: - active_capture_groups.set(static_cast(opcode).id()); - break; - case OpCodeId::SaveLeftCaptureGroup: - active_capture_groups.set(static_cast(opcode).id()); - break; case OpCodeId::ForkJump: case OpCodeId::ForkReplaceJump: case OpCodeId::JumpNonEmpty: @@ -556,7 +549,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi state.instruction_position += opcode.size(); } dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size()); - dbgln_if(REGEX_DEBUG, "Found {} active capture groups", active_capture_groups.size()); bool following_block_has_at_least_one_compare = false; // Find the first compare in the following block, it must NOT match any of the values in `repeated_values'. @@ -565,13 +557,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi final_instruction = state.instruction_position; auto& opcode = bytecode.get_opcode(state); switch (opcode.opcode_id()) { - // Note: These have to exist since we're effectively repeating the following block as well - case OpCodeId::SaveRightCaptureGroup: - active_capture_groups.set(static_cast(opcode).id()); - break; - case OpCodeId::SaveLeftCaptureGroup: - active_capture_groups.set(static_cast(opcode).id()); - break; case OpCodeId::Compare: { following_block_has_at_least_one_compare = true; // We found a compare, let's see what it has. @@ -580,8 +565,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi break; if (any_of(compares, [&](auto& compare) { - return compare.type == CharacterCompareType::AnyChar - || (compare.type == CharacterCompareType::Reference && active_capture_groups.contains(compare.value)); + return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference; })) return AtomicRewritePreconditionResult::NotSatisfied; diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 8bd85a2ef43..762d52284ec 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -708,6 +708,7 @@ TEST_CASE(ECMA262_match) { "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv, "?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever. { "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities. + { "(a)(?=a*\\1)"sv, "aaaa"sv, true, global_multiline.value() }, // Optimizer bug, ignoring references that weren't bound in the current or past block, ladybird#2281 }; // clang-format on