Browse Source

LibRegex: Consider the inverse=true case when finding pattern overlap

Previously we were only checking for overlap when the range wasn't in
inverse mode, which made us miss things like /[^x]x/; this patch makes
it so we don't miss that.
Ali Mohammad Pur 2 years ago
parent
commit
af441bb939
2 changed files with 5 additions and 3 deletions
  1. 2 0
      Tests/LibRegex/Regex.cpp
  2. 3 3
      Userland/Libraries/LibRegex/RegexOptimizer.cpp

+ 2 - 0
Tests/LibRegex/Regex.cpp

@@ -984,6 +984,8 @@ TEST_CASE(optimizer_atomic_groups)
         Tuple { "(1+)0"sv, "10"sv, true },
         Tuple { "(1+)0"sv, "10"sv, true },
         // Rewrite should not skip over first required iteration of <x>+.
         // Rewrite should not skip over first required iteration of <x>+.
         Tuple { "a+"sv, ""sv, false },
         Tuple { "a+"sv, ""sv, false },
+        // 'y' and [^x] have an overlap ('y'), the loop should not be rewritten here.
+        Tuple { "[^x]+y"sv, "ay"sv, true },
     };
     };
 
 
     for (auto& test : tests) {
     for (auto& test : tests) {

+ 3 - 3
Userland/Libraries/LibRegex/RegexOptimizer.cpp

@@ -251,7 +251,7 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
                 return true;
                 return true;
             break;
             break;
         case CharacterCompareType::Char:
         case CharacterCompareType::Char:
-            if (!current_lhs_inversion_state() && range_contains(pair.value))
+            if (current_lhs_inversion_state() ^ range_contains(pair.value))
                 return true;
                 return true;
             break;
             break;
         case CharacterCompareType::String:
         case CharacterCompareType::String:
@@ -259,12 +259,12 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
             //        Just bail out to avoid false positives.
             //        Just bail out to avoid false positives.
             return true;
             return true;
         case CharacterCompareType::CharClass:
         case CharacterCompareType::CharClass:
-            if (!current_lhs_inversion_state() && char_class_contains(static_cast<CharClass>(pair.value)))
+            if (current_lhs_inversion_state() ^ char_class_contains(static_cast<CharClass>(pair.value)))
                 return true;
                 return true;
             break;
             break;
         case CharacterCompareType::CharRange: {
         case CharacterCompareType::CharRange: {
             auto range = CharRange(pair.value);
             auto range = CharRange(pair.value);
-            if (!current_lhs_inversion_state() && range_contains(range))
+            if (current_lhs_inversion_state() ^ range_contains(range))
                 return true;
                 return true;
             break;
             break;
         }
         }