Procházet zdrojové kódy

LibRegex: Make FailForks fail all forks up to the last save point

This makes negative lookarounds with more than one fork behave
correctly.
Fixes #11350.
Ali Mohammad Pur před 3 roky
rodič
revize
1a35e27490

+ 11 - 0
Tests/LibRegex/Regex.cpp

@@ -973,3 +973,14 @@ TEST_CASE(posix_basic_dollar_is_literal)
         EXPECT_EQ(re.match("123abc$", PosixFlags::Global).success, true);
     }
 }
+
+TEST_CASE(negative_lookahead)
+{
+    {
+        // Negative lookahead with more than 2 forks difference between lookahead init and finish.
+        Regex<ECMA262> re(":(?!\\^\\)|1)", ECMAScriptFlags::Global);
+        EXPECT_EQ(re.match(":^)").success, false);
+        EXPECT_EQ(re.match(":1").success, false);
+        EXPECT_EQ(re.match(":foobar").success, true);
+    }
+}

+ 9 - 4
Userland/Libraries/LibRegex/RegexByteCode.cpp

@@ -139,6 +139,7 @@ static void reverse_string_position(MatchState& state, RegexStringView view, siz
 static void save_string_position(MatchInput const& input, MatchState const& state)
 {
     input.saved_positions.append(state.string_position);
+    input.saved_forks_since_last_save.append(state.forks_since_last_save);
     input.saved_code_unit_positions.append(state.string_position_in_code_units);
 }
 
@@ -149,6 +150,7 @@ static bool restore_string_position(MatchInput const& input, MatchState& state)
 
     state.string_position = input.saved_positions.take_last();
     state.string_position_in_code_units = input.saved_code_unit_positions.take_last();
+    state.forks_since_last_save = input.saved_forks_since_last_save.take_last();
     return true;
 }
 
@@ -207,6 +209,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, Matc
 ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state) const
 {
     save_string_position(input, state);
+    state.forks_since_last_save = 0;
     return ExecutionResult::Continue;
 }
 
@@ -226,11 +229,9 @@ ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const& input, Ma
     return ExecutionResult::Continue;
 }
 
-ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState&) const
+ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState& state) const
 {
-    VERIFY(count() > 0);
-
-    input.fail_counter += count() - 1;
+    input.fail_counter += state.forks_since_last_save;
     return ExecutionResult::Failed_ExecuteLowPrioForks;
 }
 
@@ -243,6 +244,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Jump::execute(MatchInput const&, MatchState
 ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state) const
 {
     state.fork_at_position = state.instruction_position + size() + offset();
+    state.forks_since_last_save++;
     return ExecutionResult::Fork_PrioHigh;
 }
 
@@ -250,12 +252,14 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceJump::execute(MatchInput const&
 {
     state.fork_at_position = state.instruction_position + size() + offset();
     input.fork_to_replace = state.instruction_position;
+    state.forks_since_last_save++;
     return ExecutionResult::Fork_PrioHigh;
 }
 
 ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state) const
 {
     state.fork_at_position = state.instruction_position + size() + offset();
+    state.forks_since_last_save++;
     return ExecutionResult::Fork_PrioLow;
 }
 
@@ -263,6 +267,7 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceStay::execute(MatchInput const&
 {
     state.fork_at_position = state.instruction_position + size() + offset();
     input.fork_to_replace = state.instruction_position;
+    state.forks_since_last_save++;
     return ExecutionResult::Fork_PrioLow;
 }
 

+ 8 - 11
Userland/Libraries/LibRegex/RegexByteCode.h

@@ -267,20 +267,19 @@ public:
             // JUMP _A
             // LABEL _L
             // REGEXP BODY
-            // FAIL 2
+            // FAIL
             // LABEL _A
             // SAVE
             // FORKJUMP _L
             // RESTORE
             auto body_length = lookaround_body.size();
             empend((ByteCodeValueType)OpCodeId::Jump);
-            empend((ByteCodeValueType)body_length + 2); // JUMP to label _A
+            empend((ByteCodeValueType)body_length + 1); // JUMP to label _A
             extend(move(lookaround_body));
             empend((ByteCodeValueType)OpCodeId::FailForks);
-            empend((ByteCodeValueType)2); // Fail two forks
             empend((ByteCodeValueType)OpCodeId::Save);
             empend((ByteCodeValueType)OpCodeId::ForkJump);
-            empend((ByteCodeValueType) - (body_length + 5)); // JUMP to label _L
+            empend((ByteCodeValueType) - (body_length + 4)); // JUMP to label _L
             empend((ByteCodeValueType)OpCodeId::Restore);
             return;
         }
@@ -300,22 +299,21 @@ public:
             // LABEL _L
             // GOBACK match_length(BODY)
             // REGEXP BODY
-            // FAIL 2
+            // FAIL
             // LABEL _A
             // SAVE
             // FORKJUMP _L
             // RESTORE
             auto body_length = lookaround_body.size();
             empend((ByteCodeValueType)OpCodeId::Jump);
-            empend((ByteCodeValueType)body_length + 4); // JUMP to label _A
+            empend((ByteCodeValueType)body_length + 3); // JUMP to label _A
             empend((ByteCodeValueType)OpCodeId::GoBack);
             empend((ByteCodeValueType)match_length);
             extend(move(lookaround_body));
             empend((ByteCodeValueType)OpCodeId::FailForks);
-            empend((ByteCodeValueType)2); // Fail two forks
             empend((ByteCodeValueType)OpCodeId::Save);
             empend((ByteCodeValueType)OpCodeId::ForkJump);
-            empend((ByteCodeValueType) - (body_length + 7)); // JUMP to label _L
+            empend((ByteCodeValueType) - (body_length + 6)); // JUMP to label _L
             empend((ByteCodeValueType)OpCodeId::Restore);
             return;
         }
@@ -593,9 +591,8 @@ class OpCode_FailForks final : public OpCode {
 public:
     ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
     ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; }
-    ALWAYS_INLINE size_t size() const override { return 2; }
-    ALWAYS_INLINE size_t count() const { return argument(0); }
-    String arguments_string() const override { return String::formatted("count={}", count()); }
+    ALWAYS_INLINE size_t size() const override { return 1; }
+    String arguments_string() const override { return String::empty(); }
 };
 
 class OpCode_Save final : public OpCode {

+ 2 - 0
Userland/Libraries/LibRegex/RegexMatch.h

@@ -514,6 +514,7 @@ struct MatchInput {
     mutable size_t fail_counter { 0 };
     mutable Vector<size_t> saved_positions;
     mutable Vector<size_t> saved_code_unit_positions;
+    mutable Vector<size_t> saved_forks_since_last_save;
     mutable HashMap<u64, u64> checkpoints;
     mutable Optional<size_t> fork_to_replace;
 };
@@ -524,6 +525,7 @@ struct MatchState {
     size_t string_position_in_code_units { 0 };
     size_t instruction_position { 0 };
     size_t fork_at_position { 0 };
+    size_t forks_since_last_save { 0 };
     Optional<size_t> initiating_fork;
     Vector<Match> matches;
     Vector<Vector<Match>> capture_group_matches;