瀏覽代碼

LibRegex: Don't blindly accept inverted charclasses for atomic rewrite

Ali Mohammad Pur 9 月之前
父節點
當前提交
00c45243bd
共有 2 個文件被更改,包括 19 次插入3 次删除
  1. 1 0
      Tests/LibRegex/Regex.cpp
  2. 18 3
      Userland/Libraries/LibRegex/RegexOptimizer.cpp

+ 1 - 0
Tests/LibRegex/Regex.cpp

@@ -707,6 +707,7 @@ TEST_CASE(ECMA262_match)
         { "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
         { "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv,
             "?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever.
+        { "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities.
     };
     // clang-format on
 

+ 18 - 3
Userland/Libraries/LibRegex/RegexOptimizer.cpp

@@ -198,8 +198,19 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
         if (lhs_negated_char_classes.contains(value))
             return false;
 
-        // This char class might match something in the ranges we have, and checking that is far too expensive, so just bail out.
-        return true;
+        if (lhs_ranges.is_empty())
+            return false;
+
+        for (auto it = lhs_ranges.begin(); it != lhs_ranges.end(); ++it) {
+            auto start = it.key();
+            auto end = *it;
+            for (u32 ch = start; ch <= end; ++ch) {
+                if (OpCode_Compare::matches_character_class(value, ch, false))
+                    return true;
+            }
+        }
+
+        return false;
     };
 
     for (auto const& pair : lhs) {
@@ -302,6 +313,10 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
             dbgln("  {}..{}", it.key(), *it);
     }
 
+    temporary_inverse = false;
+    reset_temporary_inverse = false;
+    inverse = false;
+
     for (auto const& pair : rhs) {
         if (reset_temporary_inverse) {
             reset_temporary_inverse = false;
@@ -310,7 +325,7 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
             reset_temporary_inverse = true;
         }
 
-        dbgln_if(REGEX_DEBUG, "check {} ({})...", character_compare_type_name(pair.type), pair.value);
+        dbgln_if(REGEX_DEBUG, "check {} ({}) [inverted? {}]...", character_compare_type_name(pair.type), pair.value, current_lhs_inversion_state());
 
         switch (pair.type) {
         case CharacterCompareType::Inverse: