Преглед на файлове

LibRegex: Prevent patterns from matching the empty string twice

Previously, if a pattern matched the empty string (e.g. ".*"), it would
match the string twice instead of once. Among other issues, this caused
a Regex replacement to duplicate its expected output, since it would
replace "both" empty matches.
Eli Youngs преди 2 години
родител
ревизия
87a961534f
променени са 2 файла, в които са добавени 17 реда и са изтрити 0 реда
  1. 12 0
      Tests/LibRegex/Regex.cpp
  2. 5 0
      Userland/Libraries/LibRegex/RegexMatcher.cpp

+ 12 - 0
Tests/LibRegex/Regex.cpp

@@ -1089,6 +1089,18 @@ TEST_CASE(single_match_flag)
     }
 }
 
+TEST_CASE(empty_string_wildcard_match)
+{
+    {
+        // Ensure that the wildcard ".*" matches the empty string exactly once
+        Regex<ECMA262> re(".*"sv, ECMAScriptFlags::Global);
+        auto result = re.match(""sv);
+        EXPECT_EQ(result.success, true);
+        EXPECT_EQ(result.matches.size(), 1u);
+        EXPECT_EQ(result.matches.first().view.to_deprecated_string(), ""sv);
+    }
+}
+
 TEST_CASE(inversion_state_in_char_class)
 {
     {

+ 5 - 0
Userland/Libraries/LibRegex/RegexMatcher.cpp

@@ -222,6 +222,11 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
                     // Nothing was *actually* matched, so append an empty match.
                     append_match(input, state, view_index);
                     ++match_count;
+
+                    // This prevents a regex pattern like ".*" from matching the empty string
+                    // multiple times, once in this block and once in the following for loop.
+                    if (view_index == 0 && view_length == 0)
+                        ++view_index;
                 }
             }
         }