mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibRegex: Fix lookup table-based range checks in Compare
The lowercase version of a range is not required to be a valid range, instead of casefolding the range and making it invalid, check twice with both cases of the input character (which are the same as the input if not insensitive). This time includes an actual test :^)
This commit is contained in:
parent
b7c50f7094
commit
b85666b3d2
Notes:
sideshowbarker
2024-07-17 09:35:00 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/b85666b3d2 Pull-request: https://github.com/SerenityOS/serenity/pull/14513 Reviewed-by: https://github.com/Lubrsi
2 changed files with 7 additions and 7 deletions
|
@ -690,6 +690,7 @@ TEST_CASE(ECMA262_match)
|
|||
{ "a|$"sv, "x"sv, true, (ECMAScriptFlags)regex::AllFlags::Global }, // #11940, Global (not the 'g' flag) regexps should attempt to match the zero-length end of the string too.
|
||||
{ "foo\nbar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match literal newlines without the 's' flag.
|
||||
{ "foo[^]bar"sv, "foo\nbar"sv, true }, // #12126, ECMA262 regexp should match newline with [^].
|
||||
{ "^[_A-Z]+$"sv, "_aA"sv, true, ECMAScriptFlags::Insensitive } // Insensitive lookup table: characters in a range do not necessarily lie in the same range after being converted to lowercase.
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
|
|
|
@ -531,16 +531,15 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
auto ch = input.view.substring_view(state.string_position, 1)[0];
|
||||
|
||||
auto const* matching_range = binary_search(range_data, ch, nullptr, [insensitive = input.regex_options & AllFlags::Insensitive](auto needle, CharRange range) {
|
||||
auto from = range.from;
|
||||
auto to = range.to;
|
||||
auto upper_case_needle = needle;
|
||||
auto lower_case_needle = needle;
|
||||
if (insensitive) {
|
||||
from = to_ascii_lowercase(from);
|
||||
to = to_ascii_lowercase(to);
|
||||
needle = to_ascii_lowercase(needle);
|
||||
upper_case_needle = to_ascii_uppercase(needle);
|
||||
lower_case_needle = to_ascii_lowercase(needle);
|
||||
}
|
||||
if (needle > to)
|
||||
if (lower_case_needle > range.to && upper_case_needle > range.to)
|
||||
return 1;
|
||||
if (needle < from)
|
||||
if (lower_case_needle < range.from && upper_case_needle < range.from)
|
||||
return -1;
|
||||
return 0;
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue