From b85666b3d203bb39e75da8d582e428800f4cb26c Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Thu, 7 Jul 2022 23:27:51 +0430 Subject: [PATCH] LibRegex: Fix lookup table-based range checks in Compare The lowercase version of a range is not required to be a valid range, instead of casefolding the range and making it invalid, check twice with both cases of the input character (which are the same as the input if not insensitive). This time includes an actual test :^) --- Tests/LibRegex/Regex.cpp | 1 + Userland/Libraries/LibRegex/RegexByteCode.cpp | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index eac87090852..237b2f1efd4 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -690,6 +690,7 @@ TEST_CASE(ECMA262_match) { "a|$"sv, "x"sv, true, (ECMAScriptFlags)regex::AllFlags::Global }, // #11940, Global (not the 'g' flag) regexps should attempt to match the zero-length end of the string too. { "foo\nbar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match literal newlines without the 's' flag. { "foo[^]bar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match newline with [^]. + { "^[_A-Z]+$"sv, "_aA"sv, true, ECMAScriptFlags::Insensitive } // Insensitive lookup table: characters in a range do not necessarily lie in the same range after being converted to lowercase. }; // clang-format on diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index de7ce26444d..cccdb939823 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -531,16 +531,15 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M auto ch = input.view.substring_view(state.string_position, 1)[0]; auto const* matching_range = binary_search(range_data, ch, nullptr, [insensitive = input.regex_options & AllFlags::Insensitive](auto needle, CharRange range) { - auto from = range.from; - auto to = range.to; + auto upper_case_needle = needle; + auto lower_case_needle = needle; if (insensitive) { - from = to_ascii_lowercase(from); - to = to_ascii_lowercase(to); - needle = to_ascii_lowercase(needle); + upper_case_needle = to_ascii_uppercase(needle); + lower_case_needle = to_ascii_lowercase(needle); } - if (needle > to) + if (lower_case_needle > range.to && upper_case_needle > range.to) return 1; - if (needle < from) + if (lower_case_needle < range.from && upper_case_needle < range.from) return -1; return 0; });