LibRegex: Only skip full instructions when optimizing alternations

It makes no sense to skip half of an instruction, so make sure to skip
only full instructions!
This commit is contained in:
Ali Mohammad Pur 2022-02-09 23:43:09 +03:30 committed by Linus Groh
parent f0d2489254
commit 6a4c8a66ae
Notes: sideshowbarker 2024-07-17 19:05:25 +09:00
2 changed files with 8 additions and 1 deletions

View file

@ -923,6 +923,8 @@ TEST_CASE(optimizer_atomic_groups)
Tuple { "(a|)"sv, ""sv, true }, // Ensure that empty alternatives are not outright removed Tuple { "(a|)"sv, ""sv, true }, // Ensure that empty alternatives are not outright removed
Tuple { "a{2,3}|a{5,8}"sv, "abc"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247. Tuple { "a{2,3}|a{5,8}"sv, "abc"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247.
Tuple { "^(a{2,3}|a{5,8})$"sv, "aaaa"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247. Tuple { "^(a{2,3}|a{5,8})$"sv, "aaaa"sv, false }, // Optimizer should not mess up the instruction stream by ignoring inter-insn dependencies, see #11247.
// Optimizer should not chop off *half* of an instruction when fusing instructions.
Tuple { "cubic-bezier\\(\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*,\\s*(-?\\d+\\.?\\d*|-?\\.\\d+)\\s*\\)"sv, "cubic-bezier(.05, 0, 0, 1)"sv, true },
// ForkReplace shouldn't be applied where it would change the semantics // ForkReplace shouldn't be applied where it would change the semantics
Tuple { "(1+)\\1"sv, "11"sv, true }, Tuple { "(1+)\\1"sv, "11"sv, true },
Tuple { "(1+)1"sv, "11"sv, true }, Tuple { "(1+)1"sv, "11"sv, true },

View file

@ -484,7 +484,12 @@ void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&&
if (left.spans().slice(left_block.start, left_end - left_block.start) != right.spans().slice(right_block.start, right_end - right_block.start)) if (left.spans().slice(left_block.start, left_end - left_block.start) != right.spans().slice(right_block.start, right_end - right_block.start))
break; break;
left_skip = left_end; state.instruction_position = 0;
while (state.instruction_position < left_end) {
auto& opcode = left.get_opcode(state);
left_skip = state.instruction_position;
state.instruction_position += opcode.size();
}
} }
dbgln_if(REGEX_DEBUG, "Skipping {}/{} bytecode entries from {}/{}", left_skip, 0, left.size(), right.size()); dbgln_if(REGEX_DEBUG, "Skipping {}/{} bytecode entries from {}/{}", left_skip, 0, left.size(), right.size());