Quellcode durchsuchen

LibRegex: Fix lookup table-based range checks in Compare

The lowercase version of a range is not required to be a valid range,
instead of casefolding the range and making it invalid, check twice with
both cases of the input character (which are the same as the input if
not insensitive).
This time includes an actual test :^)
Ali Mohammad Pur vor 3 Jahren
Ursprung
Commit
b85666b3d2
2 geänderte Dateien mit 7 neuen und 7 gelöschten Zeilen
  1. 1 0
      Tests/LibRegex/Regex.cpp
  2. 6 7
      Userland/Libraries/LibRegex/RegexByteCode.cpp

+ 1 - 0
Tests/LibRegex/Regex.cpp

@@ -690,6 +690,7 @@ TEST_CASE(ECMA262_match)
         { "a|$"sv, "x"sv, true, (ECMAScriptFlags)regex::AllFlags::Global }, // #11940, Global (not the 'g' flag) regexps should attempt to match the zero-length end of the string too.
         { "foo\nbar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match literal newlines without the 's' flag.
         { "foo[^]bar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match newline with [^].
+        { "^[_A-Z]+$"sv, "_aA"sv, true, ECMAScriptFlags::Insensitive } // Insensitive lookup table: characters in a range do not necessarily lie in the same range after being converted to lowercase.
     };
     // clang-format on
 

+ 6 - 7
Userland/Libraries/LibRegex/RegexByteCode.cpp

@@ -531,16 +531,15 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
             auto ch = input.view.substring_view(state.string_position, 1)[0];
 
             auto const* matching_range = binary_search(range_data, ch, nullptr, [insensitive = input.regex_options & AllFlags::Insensitive](auto needle, CharRange range) {
-                auto from = range.from;
-                auto to = range.to;
+                auto upper_case_needle = needle;
+                auto lower_case_needle = needle;
                 if (insensitive) {
-                    from = to_ascii_lowercase(from);
-                    to = to_ascii_lowercase(to);
-                    needle = to_ascii_lowercase(needle);
+                    upper_case_needle = to_ascii_uppercase(needle);
+                    lower_case_needle = to_ascii_lowercase(needle);
                 }
-                if (needle > to)
+                if (lower_case_needle > range.to && upper_case_needle > range.to)
                     return 1;
-                if (needle < from)
+                if (lower_case_needle < range.from && upper_case_needle < range.from)
                     return -1;
                 return 0;
             });