mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibRegex: Avoid rewriting a+
as a*
as part of atomic rewriting
The initial `ForkStay` is only needed if the looping block has a following block, if there's no following block or the following block does not attempt to match anything, we should not insert the ForkStay, otherwise we would be rewriting `a+` as `a*` by allowing the 'end' to be executed. Fixes #10952.
This commit is contained in:
parent
f10036b7c5
commit
387df06385
Notes:
sideshowbarker
2024-07-18 01:01:24 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/387df06385f Pull-request: https://github.com/SerenityOS/serenity/pull/10953 Issue: https://github.com/SerenityOS/serenity/issues/10952
2 changed files with 34 additions and 17 deletions
|
@ -895,6 +895,7 @@ TEST_CASE(optimizer_atomic_groups)
|
|||
// Fork -> ForkReplace
|
||||
Tuple { "a*b"sv, "aaaaa"sv, false },
|
||||
Tuple { "a+b"sv, "aaaaa"sv, false },
|
||||
Tuple { "\\\\(\\d+)"sv, "\\\\"sv, false }, // Rewrite bug turning a+ to a*, see #10952.
|
||||
// Alternative fuse
|
||||
Tuple { "(abcfoo|abcbar|abcbaz).*x"sv, "abcbarx"sv, true },
|
||||
Tuple { "(a|a)"sv, "a"sv, true },
|
||||
|
|
|
@ -103,7 +103,12 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks()
|
|||
return block_boundaries;
|
||||
}
|
||||
|
||||
static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block)
|
||||
enum class AtomicRewritePreconditionResult {
|
||||
SatisfiedWithProperHeader,
|
||||
SatisfiedWithEmptyHeader,
|
||||
NotSatisfied,
|
||||
};
|
||||
static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block)
|
||||
{
|
||||
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
||||
HashTable<size_t> active_capture_groups;
|
||||
|
@ -114,21 +119,21 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||
case OpCodeId::Compare: {
|
||||
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
||||
if (repeated_values.is_empty() && any_of(compares, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
|
||||
return false;
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
repeated_values.append(move(compares));
|
||||
break;
|
||||
}
|
||||
case OpCodeId::CheckBegin:
|
||||
case OpCodeId::CheckEnd:
|
||||
if (repeated_values.is_empty())
|
||||
return true;
|
||||
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
|
||||
break;
|
||||
case OpCodeId::CheckBoundary:
|
||||
// FIXME: What should we do with these? for now, let's fail.
|
||||
return false;
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
case OpCodeId::Restore:
|
||||
case OpCodeId::GoBack:
|
||||
return false;
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
case OpCodeId::SaveRightCaptureGroup:
|
||||
active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id());
|
||||
break;
|
||||
|
@ -144,6 +149,7 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||
dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size());
|
||||
dbgln_if(REGEX_DEBUG, "Found {} active capture groups", active_capture_groups.size());
|
||||
|
||||
bool following_block_has_at_least_one_compare = false;
|
||||
// Find the first compare in the following block, it must NOT match any of the values in `repeated_values'.
|
||||
for (state.instruction_position = following_block.start; state.instruction_position < following_block.end;) {
|
||||
auto& opcode = bytecode.get_opcode(state);
|
||||
|
@ -156,6 +162,7 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||
active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id());
|
||||
break;
|
||||
case OpCodeId::Compare: {
|
||||
following_block_has_at_least_one_compare = true;
|
||||
// We found a compare, let's see what it has.
|
||||
auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares();
|
||||
if (compares.is_empty())
|
||||
|
@ -165,27 +172,27 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||
return compare.type == CharacterCompareType::AnyChar
|
||||
|| (compare.type == CharacterCompareType::Reference && active_capture_groups.contains(compare.value));
|
||||
}))
|
||||
return false;
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
|
||||
for (auto& repeated_value : repeated_values) {
|
||||
// FIXME: This is too naive!
|
||||
if (any_of(repeated_value, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; }))
|
||||
return false;
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
|
||||
for (auto& repeated_compare : repeated_value) {
|
||||
// FIXME: This is too naive! it will miss _tons_ of cases since it doesn't check ranges!
|
||||
if (any_of(compares, [&](auto& compare) { return compare.type == repeated_compare.type && compare.value == repeated_compare.value; }))
|
||||
return false;
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
|
||||
}
|
||||
case OpCodeId::CheckBegin:
|
||||
case OpCodeId::CheckEnd:
|
||||
return true; // Nothing can match the end!
|
||||
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader; // Nothing can match the end!
|
||||
case OpCodeId::CheckBoundary:
|
||||
// FIXME: What should we do with these? For now, consider them a failure.
|
||||
return false;
|
||||
return AtomicRewritePreconditionResult::NotSatisfied;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -193,7 +200,9 @@ static bool block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode
|
|||
state.instruction_position += opcode.size();
|
||||
}
|
||||
|
||||
return true;
|
||||
if (following_block_has_at_least_one_compare)
|
||||
return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
|
||||
return AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader;
|
||||
}
|
||||
|
||||
template<typename Parser>
|
||||
|
@ -213,7 +222,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||
// -------------------------
|
||||
// bb1 | RE1
|
||||
// can be rewritten as:
|
||||
// loop.hdr | ForkStay bb1
|
||||
// loop.hdr | ForkStay bb1 (if RE1 matches _something_, empty otherwise)
|
||||
// -------------------------
|
||||
// bb0 | RE0
|
||||
// | ForkReplaceX bb0
|
||||
|
@ -240,8 +249,9 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||
// bb2 | RE1
|
||||
|
||||
enum class AlternateForm {
|
||||
DirectLoopWithoutHeader, // loop without proper header, a block forking to itself. i.e. the first form.
|
||||
DirectLoopWithHeader, // loop with proper header, i.e. the second form.
|
||||
DirectLoopWithoutHeader, // loop without proper header, a block forking to itself. i.e. the first form.
|
||||
DirectLoopWithoutHeaderAndEmptyFollow, // loop without proper header, a block forking to itself. i.e. the first form but with RE1 being empty.
|
||||
DirectLoopWithHeader, // loop with proper header, i.e. the second form.
|
||||
};
|
||||
struct CandidateBlock {
|
||||
Block forking_block;
|
||||
|
@ -298,10 +308,15 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||
break;
|
||||
}
|
||||
|
||||
if (block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block)) {
|
||||
auto precondition = block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block);
|
||||
if (precondition == AtomicRewritePreconditionResult::SatisfiedWithProperHeader) {
|
||||
candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader });
|
||||
break;
|
||||
}
|
||||
if (precondition == AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader) {
|
||||
candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeaderAndEmptyFollow });
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check if the last instruction in the last block is a direct jump to this block
|
||||
|
@ -316,7 +331,8 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||
Optional<Block> block_following_fork_fallback;
|
||||
if (i + 2 < basic_blocks.size())
|
||||
block_following_fork_fallback = basic_blocks[i + 2];
|
||||
if (!block_following_fork_fallback.has_value() || block_satisfies_atomic_rewrite_precondition(bytecode, *fork_fallback_block, *block_following_fork_fallback)) {
|
||||
if (!block_following_fork_fallback.has_value()
|
||||
|| block_satisfies_atomic_rewrite_precondition(bytecode, *fork_fallback_block, *block_following_fork_fallback) != AtomicRewritePreconditionResult::NotSatisfied) {
|
||||
candidate_blocks.append({ forking_block, {}, AlternateForm::DirectLoopWithHeader });
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue