From 74d76528d6db4d3291465496d7c3bf26936f514f Mon Sep 17 00:00:00 2001 From: sin-ack Date: Wed, 16 Jun 2021 10:14:12 +0000 Subject: [PATCH] LibRegex: Display correct position for Compare in REGEX_DEBUG When REGEX_DEBUG is enabled, LibRegex dumps a table of information regarding the state of the regex bytecode execution. The Compare opcode manipulates state.string_position directly, so the string_position value cannot be used to display where the comparison started; therefore, this patch introduces a new variable to keep track of where we were before the comparison happened. --- Userland/Libraries/LibRegex/RegexByteCode.cpp | 16 +++++++++------- Userland/Libraries/LibRegex/RegexMatch.h | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index 5d831867084..56dd43da1b0 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -383,6 +383,8 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(const MatchInput& input, M bool inverse_matched { false }; bool had_zero_length_match { false }; + state.string_position_before_match = state.string_position; + size_t offset { state.instruction_position + 3 }; for (size_t i = 0; i < arguments_count(); ++i) { if (state.string_position > string_position) @@ -686,7 +688,7 @@ const Vector OpCode_Compare::variable_arguments_to_string(Optionalat(offset++); result.empend(String::formatted("type={} [{}]", (size_t)compare_type, character_compare_type_name(compare_type))); - auto compared_against_string_start_offset = state().string_position > 0 ? state().string_position - 1 : state().string_position; + auto string_start_offset = state().string_position_before_match; if (compare_type == CharacterCompareType::Char) { auto ch = m_bytecode->at(offset++); @@ -696,13 +698,13 @@ const Vector OpCode_Compare::variable_arguments_to_string(Optional state().string_position) { + if (!view.is_null() && view.length() > string_start_offset) { if (is_ascii) { result.empend(String::formatted( "compare against: '{}'", - view.substring_view(compared_against_string_start_offset, state().string_position > view.length() ? 0 : 1).to_string())); + view.substring_view(string_start_offset, string_start_offset > view.length() ? 0 : 1).to_string())); } else { - auto str = view.substring_view(compared_against_string_start_offset, state().string_position > view.length() ? 0 : 1).to_string(); + auto str = view.substring_view(string_start_offset, string_start_offset > view.length() ? 0 : 1).to_string(); u8 buf[8] { 0 }; __builtin_memcpy(buf, str.characters(), min(str.length(), sizeof(buf))); result.empend(String::formatted("compare against: {:x},{:x},{:x},{:x},{:x},{:x},{:x},{:x}", @@ -725,21 +727,21 @@ const Vector OpCode_Compare::variable_arguments_to_string(Optional state().string_position) result.empend(String::formatted( "compare against: \"{}\"", - input.value().view.substring_view(compared_against_string_start_offset, compared_against_string_start_offset + length > view.length() ? 0 : length).to_string())); + input.value().view.substring_view(string_start_offset, string_start_offset + length > view.length() ? 0 : length).to_string())); } else if (compare_type == CharacterCompareType::CharClass) { auto character_class = (CharClass)m_bytecode->at(offset++); result.empend(String::formatted("ch_class={} [{}]", (size_t)character_class, character_class_name(character_class))); if (!view.is_null() && view.length() > state().string_position) result.empend(String::formatted( "compare against: '{}'", - input.value().view.substring_view(compared_against_string_start_offset, state().string_position > view.length() ? 0 : 1).to_string())); + input.value().view.substring_view(string_start_offset, state().string_position > view.length() ? 0 : 1).to_string())); } else if (compare_type == CharacterCompareType::CharRange) { auto value = (CharRange)m_bytecode->at(offset++); result.empend(String::formatted("ch_range='{:c}'-'{:c}'", value.from, value.to)); if (!view.is_null() && view.length() > state().string_position) result.empend(String::formatted( "compare against: '{}'", - input.value().view.substring_view(compared_against_string_start_offset, state().string_position > view.length() ? 0 : 1).to_string())); + input.value().view.substring_view(string_start_offset, state().string_position > view.length() ? 0 : 1).to_string())); } } return result; diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index 5529047223e..b1da9a454b8 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -256,6 +256,7 @@ struct MatchInput { }; struct MatchState { + size_t string_position_before_match { 0 }; size_t string_position { 0 }; size_t instruction_position { 0 }; size_t fork_at_position { 0 };