소스 검색

LibRegex: Do not treat repeats followed by fallthroughs as atomic

Ali Mohammad Pur 2 년 전
부모
커밋
b1ca2e5e39
2개의 변경된 파일18개의 추가작업 그리고 1개의 파일을 삭제
  1. 3 0
      Tests/LibRegex/Regex.cpp
  2. 15 1
      Userland/Libraries/LibRegex/RegexOptimizer.cpp

+ 3 - 0
Tests/LibRegex/Regex.cpp

@@ -995,6 +995,9 @@ TEST_CASE(optimizer_atomic_groups)
         Tuple { "[^x]+y"sv, "ay"sv, true },
         // .+ should not be rewritten here, as it's followed by something that would be matched by `.`.
         Tuple { ".+(a|b|c)"sv, "xxa"sv, true },
+        // (b+)(b+) produces an intermediate block with no matching ops, the optimiser should ignore that block when looking for following matches and correctly detect the overlap between (b+) and (b+).
+        // note that the second loop may be rewritten to a ForkReplace, but the first loop should not be rewritten.
+        Tuple { "(b+)(b+)"sv, "bbb"sv, true },
     };
 
     for (auto& test : tests) {

+ 15 - 1
Userland/Libraries/LibRegex/RegexOptimizer.cpp

@@ -351,7 +351,9 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
 
     bool following_block_has_at_least_one_compare = false;
     // Find the first compare in the following block, it must NOT match any of the values in `repeated_values'.
+    auto final_instruction = following_block.start;
     for (state.instruction_position = following_block.start; state.instruction_position < following_block.end;) {
+        final_instruction = state.instruction_position;
         auto& opcode = bytecode.get_opcode(state);
         switch (opcode.opcode_id()) {
         // Note: These have to exist since we're effectively repeating the following block as well
@@ -399,6 +401,18 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
         state.instruction_position += opcode.size();
     }
 
+    // If the following block falls through, we can't rewrite it.
+    state.instruction_position = final_instruction;
+    switch (bytecode.get_opcode(state).opcode_id()) {
+    case OpCodeId::Jump:
+    case OpCodeId::JumpNonEmpty:
+    case OpCodeId::ForkJump:
+    case OpCodeId::ForkReplaceJump:
+        break;
+    default:
+        return AtomicRewritePreconditionResult::NotSatisfied;
+    }
+
     if (following_block_has_at_least_one_compare)
         return AtomicRewritePreconditionResult::SatisfiedWithProperHeader;
     return AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader;
@@ -717,7 +731,7 @@ void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives
 
         size_t i = 0;
         for (auto& entry : alternatives) {
-            auto& blocks = basic_blocks[i];
+            auto& blocks = basic_blocks[i++];
             auto& block = blocks[block_index];
             auto end = block_index + 1 == blocks.size() ? block.end : blocks[block_index + 1].start;
             state.instruction_position = block.start;