LibRegex: Don't blindly accept inverted charclasses for atomic rewrite

This commit is contained in:
Ali Mohammad Pur 2024-10-24 11:39:26 +02:00 committed by Tim Flynn
parent 0cc8ba305d
commit 00c45243bd
Notes: github-actions[bot] 2024-10-24 11:37:46 +00:00
2 changed files with 19 additions and 3 deletions

View file

@ -707,6 +707,7 @@ TEST_CASE(ECMA262_match)
{ "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
{ "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv,
"?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever.
{ "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities.
};
// clang-format on

View file

@ -198,8 +198,19 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
if (lhs_negated_char_classes.contains(value))
return false;
// This char class might match something in the ranges we have, and checking that is far too expensive, so just bail out.
return true;
if (lhs_ranges.is_empty())
return false;
for (auto it = lhs_ranges.begin(); it != lhs_ranges.end(); ++it) {
auto start = it.key();
auto end = *it;
for (u32 ch = start; ch <= end; ++ch) {
if (OpCode_Compare::matches_character_class(value, ch, false))
return true;
}
}
return false;
};
for (auto const& pair : lhs) {
@ -302,6 +313,10 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
dbgln(" {}..{}", it.key(), *it);
}
temporary_inverse = false;
reset_temporary_inverse = false;
inverse = false;
for (auto const& pair : rhs) {
if (reset_temporary_inverse) {
reset_temporary_inverse = false;
@ -310,7 +325,7 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
reset_temporary_inverse = true;
}
dbgln_if(REGEX_DEBUG, "check {} ({})...", character_compare_type_name(pair.type), pair.value);
dbgln_if(REGEX_DEBUG, "check {} ({}) [inverted? {}]...", character_compare_type_name(pair.type), pair.value, current_lhs_inversion_state());
switch (pair.type) {
case CharacterCompareType::Inverse: