LibRegex: Don't ignore empty alternatives in append_alternation()

Doing so would cause patterns like `(a|)` to not match the empty string.
This commit is contained in:
Ali Mohammad Pur 2021-10-29 16:21:13 +03:30 committed by Andreas Kling
parent 7ee409ef98
commit ac856cb965
Notes: sideshowbarker 2024-07-18 01:48:07 +09:00
2 changed files with 17 additions and 6 deletions

View file

@ -898,6 +898,7 @@ TEST_CASE(optimizer_atomic_groups)
// Alternative fuse
Tuple { "(abcfoo|abcbar|abcbaz).*x"sv, "abcbarx"sv, true },
Tuple { "(a|a)"sv, "a"sv, true },
Tuple { "(a|)"sv, ""sv, true }, // Ensure that empty alternatives are not outright removed
// ForkReplace shouldn't be applied where it would change the semantics
Tuple { "(1+)\\1"sv, "11"sv, true },
Tuple { "(1+)1"sv, "11"sv, true },

View file

@ -436,13 +436,23 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&& right)
{
if (left.is_empty()) {
target.extend(right);
return;
}
auto left_is_empty = left.is_empty();
auto right_is_empty = right.is_empty();
if (left_is_empty || right_is_empty) {
if (left_is_empty && right_is_empty)
return;
if (right.is_empty()) {
target.extend(left);
// ForkJump right (+ left.size() + 2 + right.size())
// (left)
// Jump end (+ right.size())
// (right)
// LABEL end
target.append(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
target.append(left.size() + 2 + right.size());
target.extend(move(left));
target.append(static_cast<ByteCodeValueType>(OpCodeId::Jump));
target.append(right.size());
target.extend(move(right));
return;
}