Kaynağa Gözat

LibRegex: Don't ignore references that weren't bound in checked blocks

Fixes #2281.
Ali Mohammad Pur 8 ay önce
ebeveyn
işleme
dabd60180f

+ 1 - 17
Libraries/LibRegex/RegexOptimizer.cpp

@@ -510,7 +510,6 @@ enum class AtomicRewritePreconditionResult {
 static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block)
 {
     Vector<Vector<CompareTypeAndValuePair>> repeated_values;
-    HashTable<size_t> active_capture_groups;
     MatchState state;
     auto has_seen_actionable_opcode = false;
     for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
@@ -536,12 +535,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
         case OpCodeId::Restore:
         case OpCodeId::GoBack:
             return AtomicRewritePreconditionResult::NotSatisfied;
-        case OpCodeId::SaveRightCaptureGroup:
-            active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
-            break;
-        case OpCodeId::SaveLeftCaptureGroup:
-            active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
-            break;
         case OpCodeId::ForkJump:
         case OpCodeId::ForkReplaceJump:
         case OpCodeId::JumpNonEmpty:
@@ -556,7 +549,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
         state.instruction_position += opcode.size();
     }
     dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size());
-    dbgln_if(REGEX_DEBUG, "Found {} active capture groups", active_capture_groups.size());
 
     bool following_block_has_at_least_one_compare = false;
     // Find the first compare in the following block, it must NOT match any of the values in `repeated_values'.
@@ -565,13 +557,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
         final_instruction = state.instruction_position;
         auto& opcode = bytecode.get_opcode(state);
         switch (opcode.opcode_id()) {
-        // Note: These have to exist since we're effectively repeating the following block as well
-        case OpCodeId::SaveRightCaptureGroup:
-            active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
-            break;
-        case OpCodeId::SaveLeftCaptureGroup:
-            active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
-            break;
         case OpCodeId::Compare: {
             following_block_has_at_least_one_compare = true;
             // We found a compare, let's see what it has.
@@ -580,8 +565,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
                 break;
 
             if (any_of(compares, [&](auto& compare) {
-                    return compare.type == CharacterCompareType::AnyChar
-                        || (compare.type == CharacterCompareType::Reference && active_capture_groups.contains(compare.value));
+                    return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference;
                 }))
                 return AtomicRewritePreconditionResult::NotSatisfied;
 

+ 1 - 0
Tests/LibRegex/Regex.cpp

@@ -708,6 +708,7 @@ TEST_CASE(ECMA262_match)
         { "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv,
             "?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever.
         { "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities.
+        { "(a)(?=a*\\1)"sv, "aaaa"sv, true, global_multiline.value() }, // Optimizer bug, ignoring references that weren't bound in the current or past block, ladybird#2281
     };
     // clang-format on