LibRegex: Correct And/Or and inversion interplay semantics
This commit also fixes an incorrect test case from very early on, our behaviour now matches the ECMA262 spec in this case. Fixes #21786.
This commit is contained in:
parent
89315787ae
commit
e265d81277
Notes:
sideshowbarker
2024-07-16 23:55:09 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/e265d81277 Pull-request: https://github.com/SerenityOS/serenity/pull/22698 Issue: https://github.com/SerenityOS/serenity/issues/21786
3 changed files with 29 additions and 8 deletions
|
@ -648,7 +648,7 @@ TEST_CASE(ECMA262_match)
|
|||
{ "^[\\0-\\x1f]$"sv, "\n"sv },
|
||||
{ .pattern = "\\bhello\\B"sv, .subject = "hello1"sv, .options = ECMAScriptFlags::Global },
|
||||
{ "\\b.*\\b"sv, "hello1"sv },
|
||||
{ "[^\\D\\S]{2}"sv, "1 "sv },
|
||||
{ "[^\\D\\S]{2}"sv, "1 "sv, false },
|
||||
{ "bar(?=f.)foo"sv, "barfoo"sv },
|
||||
{ "bar(?=foo)bar"sv, "barbar"sv, false },
|
||||
{ "bar(?!foo)bar"sv, "barbar"sv, true },
|
||||
|
@ -1174,6 +1174,14 @@ TEST_CASE(inversion_state_in_char_class)
|
|||
EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "slideNumbers"sv);
|
||||
EXPECT_EQ(result.capture_group_matches.first()[1].view.to_byte_string(), "}"sv);
|
||||
}
|
||||
{
|
||||
// #21786, /[^\S\n]/.exec("\n") should be null, not [ "\n" ].
|
||||
// This was a general confusion between the inversion state and the negation state (temp inverse).
|
||||
Regex<ECMA262> re("[^\\S\\n]", ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
|
||||
|
||||
auto result = re.match("\n"sv);
|
||||
EXPECT_EQ(result.success, false);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(mismatching_brackets)
|
||||
|
|
|
@ -426,6 +426,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
bool active { false };
|
||||
bool is_conjunction { false };
|
||||
bool fail { false };
|
||||
bool inverse_matched { false };
|
||||
size_t initial_position;
|
||||
size_t initial_code_unit_position;
|
||||
Optional<size_t> last_accepted_position {};
|
||||
|
@ -623,8 +624,9 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
case CharacterCompareType::And:
|
||||
disjunction_states.append({
|
||||
.active = true,
|
||||
.is_conjunction = false,
|
||||
.fail = false,
|
||||
.is_conjunction = current_inversion_state(),
|
||||
.fail = current_inversion_state(),
|
||||
.inverse_matched = current_inversion_state(),
|
||||
.initial_position = state.string_position,
|
||||
.initial_code_unit_position = state.string_position_in_code_units,
|
||||
});
|
||||
|
@ -632,8 +634,9 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
case CharacterCompareType::Or:
|
||||
disjunction_states.append({
|
||||
.active = true,
|
||||
.is_conjunction = true,
|
||||
.fail = true,
|
||||
.is_conjunction = !current_inversion_state(),
|
||||
.fail = !current_inversion_state(),
|
||||
.inverse_matched = !current_inversion_state(),
|
||||
.initial_position = state.string_position,
|
||||
.initial_code_unit_position = state.string_position_in_code_units,
|
||||
});
|
||||
|
@ -644,6 +647,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
state.string_position = disjunction_state.last_accepted_position.value_or(disjunction_state.initial_position);
|
||||
state.string_position_in_code_units = disjunction_state.last_accepted_code_unit_position.value_or(disjunction_state.initial_code_unit_position);
|
||||
}
|
||||
inverse_matched = disjunction_state.inverse_matched || disjunction_state.fail;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -664,6 +668,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
if (!failed) {
|
||||
new_disjunction_state.last_accepted_position = state.string_position;
|
||||
new_disjunction_state.last_accepted_code_unit_position = state.string_position_in_code_units;
|
||||
new_disjunction_state.inverse_matched |= inverse_matched;
|
||||
}
|
||||
|
||||
if (new_disjunction_state.is_conjunction)
|
||||
|
@ -673,6 +678,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
|
||||
state.string_position = new_disjunction_state.initial_position;
|
||||
state.string_position_in_code_units = new_disjunction_state.initial_code_unit_position;
|
||||
inverse_matched = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1777,10 +1777,12 @@ bool ECMA262Parser::parse_character_class(ByteCode& stack, size_t& match_length_
|
|||
|
||||
Vector<CompareTypeAndValuePair> compares;
|
||||
|
||||
auto uses_explicit_or_semantics = false;
|
||||
if (match(TokenType::Circumflex)) {
|
||||
// Negated charclass
|
||||
consume();
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Inverse, 0 });
|
||||
uses_explicit_or_semantics = true;
|
||||
}
|
||||
|
||||
// ClassContents :: [empty]
|
||||
|
@ -1800,6 +1802,11 @@ bool ECMA262Parser::parse_character_class(ByteCode& stack, size_t& match_length_
|
|||
if (flags.unicode_sets && !parse_class_set_expression(compares))
|
||||
return false;
|
||||
|
||||
if (uses_explicit_or_semantics && compares.size() > 2) {
|
||||
compares.insert(1, CompareTypeAndValuePair { CharacterCompareType::Or, 0 });
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::EndAndOr, 0 });
|
||||
}
|
||||
|
||||
match_length_minimum += 1;
|
||||
stack.insert_bytecode_compare_values(move(compares));
|
||||
return true;
|
||||
|
@ -2466,9 +2473,9 @@ DeprecatedFlyString ECMA262Parser::read_capture_group_specifier(bool take_starti
|
|||
{
|
||||
static auto id_start_category = Unicode::property_from_string("ID_Start"sv);
|
||||
static auto id_continue_category = Unicode::property_from_string("ID_Continue"sv);
|
||||
static constexpr const u32 REPLACEMENT_CHARACTER = 0xFFFD;
|
||||
constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
|
||||
constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
|
||||
static constexpr u32 const REPLACEMENT_CHARACTER = 0xFFFD;
|
||||
constexpr u32 const ZERO_WIDTH_NON_JOINER { 0x200C };
|
||||
constexpr u32 const ZERO_WIDTH_JOINER { 0x200D };
|
||||
|
||||
if (take_starting_angle_bracket && !consume("<"))
|
||||
return {};
|
||||
|
|
Loading…
Add table
Reference in a new issue