LibRegex: Don't ignore references that weren't bound in checked blocks

Fixes #2281.
This commit is contained in:
Ali Mohammad Pur 2024-11-12 09:18:38 +01:00 committed by Jelle Raaijmakers
parent 55b19c3177
commit dabd60180f
Notes: github-actions[bot] 2024-11-12 09:38:57 +00:00
2 changed files with 2 additions and 17 deletions

View file

@ -510,7 +510,6 @@ enum class AtomicRewritePreconditionResult {
static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block) static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block)
{ {
Vector<Vector<CompareTypeAndValuePair>> repeated_values; Vector<Vector<CompareTypeAndValuePair>> repeated_values;
HashTable<size_t> active_capture_groups;
MatchState state; MatchState state;
auto has_seen_actionable_opcode = false; auto has_seen_actionable_opcode = false;
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) { for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
@ -536,12 +535,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
case OpCodeId::Restore: case OpCodeId::Restore:
case OpCodeId::GoBack: case OpCodeId::GoBack:
return AtomicRewritePreconditionResult::NotSatisfied; return AtomicRewritePreconditionResult::NotSatisfied;
case OpCodeId::SaveRightCaptureGroup:
active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
break;
case OpCodeId::SaveLeftCaptureGroup:
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
break;
case OpCodeId::ForkJump: case OpCodeId::ForkJump:
case OpCodeId::ForkReplaceJump: case OpCodeId::ForkReplaceJump:
case OpCodeId::JumpNonEmpty: case OpCodeId::JumpNonEmpty:
@ -556,7 +549,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
state.instruction_position += opcode.size(); state.instruction_position += opcode.size();
} }
dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size()); dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size());
dbgln_if(REGEX_DEBUG, "Found {} active capture groups", active_capture_groups.size());
bool following_block_has_at_least_one_compare = false; bool following_block_has_at_least_one_compare = false;
// Find the first compare in the following block, it must NOT match any of the values in `repeated_values'. // Find the first compare in the following block, it must NOT match any of the values in `repeated_values'.
@ -565,13 +557,6 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
final_instruction = state.instruction_position; final_instruction = state.instruction_position;
auto& opcode = bytecode.get_opcode(state); auto& opcode = bytecode.get_opcode(state);
switch (opcode.opcode_id()) { switch (opcode.opcode_id()) {
// Note: These have to exist since we're effectively repeating the following block as well
case OpCodeId::SaveRightCaptureGroup:
active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
break;
case OpCodeId::SaveLeftCaptureGroup:
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
break;
case OpCodeId::Compare: { case OpCodeId::Compare: {
following_block_has_at_least_one_compare = true; following_block_has_at_least_one_compare = true;
// We found a compare, let's see what it has. // We found a compare, let's see what it has.
@ -580,8 +565,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
break; break;
if (any_of(compares, [&](auto& compare) { if (any_of(compares, [&](auto& compare) {
return compare.type == CharacterCompareType::AnyChar return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference;
|| (compare.type == CharacterCompareType::Reference && active_capture_groups.contains(compare.value));
})) }))
return AtomicRewritePreconditionResult::NotSatisfied; return AtomicRewritePreconditionResult::NotSatisfied;

View file

@ -708,6 +708,7 @@ TEST_CASE(ECMA262_match)
{ "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv, { "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv,
"?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever. "?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever.
{ "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities. { "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities.
{ "(a)(?=a*\\1)"sv, "aaaa"sv, true, global_multiline.value() }, // Optimizer bug, ignoring references that weren't bound in the current or past block, ladybird#2281
}; };
// clang-format on // clang-format on