Переглянути джерело

LibRegex: Treat inverted Compare entries as disjunctions

[^XYZ] is not(X | Y | Z), we used to translate this to
not(X) | not(Y) | not(Z), this commit makes LibRegex interpret this
pattern as not(X) & not(Y) & not(Z).
Ali Mohammad Pur 3 роки тому
батько
коміт
d348eaf305
2 змінених файлів з 33 додано та 13 видалено
  1. 18 6
      Tests/LibRegex/Regex.cpp
  2. 15 7
      Userland/Libraries/LibRegex/RegexByteCode.cpp

+ 18 - 6
Tests/LibRegex/Regex.cpp

@@ -1040,11 +1040,23 @@ TEST_CASE(single_match_flag)
 
 TEST_CASE(inversion_state_in_char_class)
 {
-    // #13755, /[\S\s]/.exec("hello") should be [ "h" ], not null.
-    Regex<ECMA262> re("[\\S\\s]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
+    {
+        // #13755, /[\S\s]/.exec("hello") should be [ "h" ], not null.
+        Regex<ECMA262> re("[\\S\\s]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
 
-    auto result = re.match("hello");
-    EXPECT_EQ(result.success, true);
-    EXPECT_EQ(result.matches.size(), 1u);
-    EXPECT_EQ(result.matches.first().view.to_string(), "h"sv);
+        auto result = re.match("hello");
+        EXPECT_EQ(result.success, true);
+        EXPECT_EQ(result.matches.size(), 1u);
+        EXPECT_EQ(result.matches.first().view.to_string(), "h"sv);
+    }
+    {
+        Regex<ECMA262> re("^(?:([^\\s!\"#%-,\\./;->@\\[-\\^`\\{-~]+(?=([=~}\\s/.)|]))))"sv, ECMAScriptFlags::Global);
+
+        auto result = re.match("slideNumbers}}"sv);
+        EXPECT_EQ(result.success, true);
+        EXPECT_EQ(result.matches.size(), 1u);
+        EXPECT_EQ(result.matches.first().view.to_string(), "slideNumbers"sv);
+        EXPECT_EQ(result.capture_group_matches.first()[0].view.to_string(), "slideNumbers"sv);
+        EXPECT_EQ(result.capture_group_matches.first()[1].view.to_string(), "}"sv);
+    }
 }

+ 15 - 7
Userland/Libraries/LibRegex/RegexByteCode.cpp

@@ -486,8 +486,12 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
                 return ExecutionResult::Failed_ExecuteLowPrioForks;
 
             auto input_view = input.view.substring_view(state.string_position, 1)[0];
-            if (input_view != '\n' || (input.regex_options.has_flag_set(AllFlags::SingleLine) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)))
-                advance_string_position(state, input.view, input_view);
+            if (input_view != '\n' || (input.regex_options.has_flag_set(AllFlags::SingleLine) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline))) {
+                if (current_inversion_state())
+                    inverse_matched = true;
+                else
+                    advance_string_position(state, input.view, input_view);
+            }
 
         } else if (compare_type == CharacterCompareType::String) {
             VERIFY(!current_inversion_state());
@@ -507,8 +511,10 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
 
             auto view = input.view.construct_as_same(data, str, utf16);
             offset += length;
-            if (!compare_string(input, state, view, had_zero_length_match))
-                return ExecutionResult::Failed_ExecuteLowPrioForks;
+            if (compare_string(input, state, view, had_zero_length_match)) {
+                if (current_inversion_state())
+                    inverse_matched = true;
+            }
 
         } else if (compare_type == CharacterCompareType::CharClass) {
 
@@ -575,8 +581,10 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
             if (input.view.length() < state.string_position + str.length())
                 return ExecutionResult::Failed_ExecuteLowPrioForks;
 
-            if (!compare_string(input, state, str, had_zero_length_match))
-                return ExecutionResult::Failed_ExecuteLowPrioForks;
+            if (compare_string(input, state, str, had_zero_length_match)) {
+                if (current_inversion_state())
+                    inverse_matched = true;
+            }
 
         } else if (compare_type == CharacterCompareType::Property) {
             auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
@@ -600,7 +608,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
             break;
         }
 
-        if (current_inversion_state() && !inverse_matched) {
+        if (current_inversion_state() && !inverse && !inverse_matched) {
             advance_string_position(state, input.view);
             inverse_matched = true;
         }