LibRegex: Ensure the GoBack operation decrements the code unit index

This was missed in commit 27d555bab0.
This commit is contained in:
Timothy Flynn 2021-08-16 10:28:26 -04:00 committed by Ali Mohammad Pur
parent c4ee576531
commit 325eabc770
Notes: sideshowbarker 2024-07-18 05:34:00 +09:00
3 changed files with 33 additions and 2 deletions

View file

@ -637,6 +637,8 @@ TEST_CASE(ECMA262_match)
{ "(a{4}){2}"sv, "aaaaaaaa"sv },
{ "(a{4}){2}"sv, "aaaaaabaa"sv, false },
{ "\\u{4}"sv, "uuuu" },
{ "(?<=.{3})f"sv, "abcdef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
{ "(?<=.{3})f"sv, "abc😀ef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
// ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
{ "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended },
{ "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },
@ -694,6 +696,8 @@ TEST_CASE(ECMA262_unicode_match)
{ "\\u{1f600}"sv, "😀"sv, true, ECMAScriptFlags::Unicode },
{ "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true },
{ "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true, ECMAScriptFlags::Unicode },
{ "(?<=.{3})f"sv, "abcdef"sv, true, ECMAScriptFlags::Unicode },
{ "(?<=.{3})f"sv, "abc😀ef"sv, true, ECMAScriptFlags::Unicode },
};
for (auto& test : tests) {

View file

@ -102,6 +102,17 @@ static void advance_string_position(MatchState& state, RegexStringView const& vi
}
}
static void reverse_string_position(MatchState& state, RegexStringView const& view, size_t amount)
{
VERIFY(state.string_position >= amount);
state.string_position -= amount;
if (view.unicode())
state.string_position_in_code_units = view.code_unit_offset_of(state.string_position);
else
state.string_position_in_code_units -= amount;
}
static void save_string_position(MatchInput const& input, MatchState const& state)
{
input.saved_positions.append(state.string_position);
@ -226,12 +237,12 @@ ALWAYS_INLINE ExecutionResult OpCode_Restore::execute(MatchInput const& input, M
return ExecutionResult::Continue;
}
ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const&, MatchState& state) const
ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const& input, MatchState& state) const
{
if (count() > state.string_position)
return ExecutionResult::Failed_ExecuteLowPrioForks;
state.string_position -= count();
reverse_string_position(state, input.view, count());
return ExecutionResult::Continue;
}

View file

@ -274,6 +274,22 @@ public:
});
}
size_t code_unit_offset_of(size_t code_point_index) const
{
return m_view.visit(
[&](StringView const& view) -> u32 {
Utf8View utf8_view { view };
return utf8_view.byte_offset_of(code_point_index);
},
[&](Utf32View const&) -> u32 { return code_point_index; },
[&](Utf16View const& view) -> u32 {
return view.code_unit_offset_of(code_point_index);
},
[&](Utf8View const& view) -> u32 {
return view.byte_offset_of(code_point_index);
});
}
bool operator==(char const* cstring) const
{
return m_view.visit(