mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibRegex: Don't blindly accept inverted charclasses for atomic rewrite
This commit is contained in:
parent
0cc8ba305d
commit
00c45243bd
Notes:
github-actions[bot]
2024-10-24 11:37:46 +00:00
Author: https://github.com/alimpfard Commit: https://github.com/LadybirdBrowser/ladybird/commit/00c45243bd6 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1939
2 changed files with 19 additions and 3 deletions
|
@ -707,6 +707,7 @@ TEST_CASE(ECMA262_match)
|
||||||
{ "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
|
{ "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() },
|
||||||
{ "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv,
|
{ "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv,
|
||||||
"?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever.
|
"?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever.
|
||||||
|
{ "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities.
|
||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
|
|
|
@ -198,8 +198,19 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
|
||||||
if (lhs_negated_char_classes.contains(value))
|
if (lhs_negated_char_classes.contains(value))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// This char class might match something in the ranges we have, and checking that is far too expensive, so just bail out.
|
if (lhs_ranges.is_empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (auto it = lhs_ranges.begin(); it != lhs_ranges.end(); ++it) {
|
||||||
|
auto start = it.key();
|
||||||
|
auto end = *it;
|
||||||
|
for (u32 ch = start; ch <= end; ++ch) {
|
||||||
|
if (OpCode_Compare::matches_character_class(value, ch, false))
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto const& pair : lhs) {
|
for (auto const& pair : lhs) {
|
||||||
|
@ -302,6 +313,10 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
|
||||||
dbgln(" {}..{}", it.key(), *it);
|
dbgln(" {}..{}", it.key(), *it);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
temporary_inverse = false;
|
||||||
|
reset_temporary_inverse = false;
|
||||||
|
inverse = false;
|
||||||
|
|
||||||
for (auto const& pair : rhs) {
|
for (auto const& pair : rhs) {
|
||||||
if (reset_temporary_inverse) {
|
if (reset_temporary_inverse) {
|
||||||
reset_temporary_inverse = false;
|
reset_temporary_inverse = false;
|
||||||
|
@ -310,7 +325,7 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
|
||||||
reset_temporary_inverse = true;
|
reset_temporary_inverse = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
dbgln_if(REGEX_DEBUG, "check {} ({})...", character_compare_type_name(pair.type), pair.value);
|
dbgln_if(REGEX_DEBUG, "check {} ({}) [inverted? {}]...", character_compare_type_name(pair.type), pair.value, current_lhs_inversion_state());
|
||||||
|
|
||||||
switch (pair.type) {
|
switch (pair.type) {
|
||||||
case CharacterCompareType::Inverse:
|
case CharacterCompareType::Inverse:
|
||||||
|
|
Loading…
Reference in a new issue