LibRegex: Change bytecode value type to a 64-bit value

To allow storing unicode ranges compactly; this is not utilised at the
moment, but changing this later would've been significantly more
difficult.
Also fixes a few debug logs.
This commit is contained in:
AnotherTest 2020-11-19 18:45:36 +03:30 committed by Andreas Kling
parent 92ea9ed4a5
commit 3db8ced4c7
Notes: sideshowbarker 2024-07-19 01:14:48 +09:00
5 changed files with 19 additions and 20 deletions

View file

@ -275,7 +275,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(const M
auto& map = output.named_capture_group_matches.at(input.match_index);
#ifdef REGEX_DEBUG
ASSERT(start_position + length < input.view.length());
ASSERT(start_position + length <= input.view.length());
dbg() << "Save named capture group with name=" << capture_group_name << " and content: " << input.view.substring_view(start_position, length).to_string();
#endif
@ -415,7 +415,7 @@ ALWAYS_INLINE bool OpCode_Compare::compare_string(const MatchInput& input, Match
return false;
}
ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched)
ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched)
{
switch (character_class) {
case CharClass::Alnum:
@ -513,7 +513,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
}
}
ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched)
ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched)
{
if (input.regex_options & AllFlags::Insensitive) {
from = tolower(from);
@ -553,7 +553,7 @@ const Vector<String> OpCode_Compare::variable_arguments_to_string(Optional<Match
} else if (compare_type == CharacterCompareType::String) {
char* str = reinterpret_cast<char*>(m_bytecode->at(offset++));
auto& length = m_bytecode->at(offset++);
result.empend(String::format("value=\"%s\"", String { str, length }.characters()));
result.empend(String::format("value=\"%.*s\"", length, str));
if (!view.is_null())
result.empend(String::format("compare against: \"%s\"", input.value().view.substring_view(state().string_position, state().string_position + length > view.length() ? 0 : length).to_string().characters()));
} else if (compare_type == CharacterCompareType::CharClass) {

View file

@ -39,7 +39,7 @@
namespace regex {
using ByteCodeValueType = size_t;
using ByteCodeValueType = u64;
#define ENUMERATE_OPCODES \
__ENUMERATE_OPCODE(Compare) \
@ -102,22 +102,22 @@ enum class CharClass : ByteCodeValueType {
};
struct CharRange {
const char from;
const char to;
const u32 from;
const u32 to;
CharRange(size_t value)
: from(value >> 8)
, to(value & 0xFF)
CharRange(u64 value)
: from(value >> 32)
, to(value & 0xffffffff)
{
}
CharRange(char from, char to)
CharRange(u32 from, u32 to)
: from(from)
, to(to)
{
}
operator ByteCodeValueType() const { return (from << 8) | to; }
operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
};
struct CompareTypeAndValuePair {
@ -568,8 +568,8 @@ public:
private:
ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, const char* str, size_t length);
ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched);
ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched);
ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
};
template<typename T>

View file

@ -131,7 +131,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
for (auto& view : views) {
input.view = view;
#ifdef REGEX_DEBUG
dbg() << "[match] Starting match with view (" << view.length() << "): _" << view << "_";
dbg() << "[match] Starting match with view (" << view.length() << "): _" << view.to_string() << "_";
#endif
auto view_length = view.length();
@ -171,7 +171,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
#ifdef REGEX_DEBUG
dbg() << "state.string_position: " << state.string_position << " view_index: " << view_index;
dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index);
dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index).to_string();
#endif
++match_count;

View file

@ -365,7 +365,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_bracket_expression(ByteCode& stack
values.take_last(); // RangeExpressionDummy
auto value1 = values.take_last();
values.append({ CharacterCompareType::CharRange, static_cast<ByteCodeValueType>(CharRange { (char)value1.value, (char)value2.value }) });
values.append({ CharacterCompareType::CharRange, static_cast<ByteCodeValueType>(CharRange { (u32)value1.value, (u32)value2.value }) });
}
}

View file

@ -1,10 +1,9 @@
file(GLOB TEST_SOURCES CONFIGURE_DEPENDS "*.cpp")
file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp")
file(GLOB C_REGEX_SOURCES CONFIGURE_DEPENDS "../C/*.cpp")
file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp" "../C/*.cpp")
foreach(source ${TEST_SOURCES})
get_filename_component(name ${source} NAME_WE)
add_executable(${name} ${source} ${REGEX_SOURCES} ${C_REGEX_SOURCES})
add_executable(${name} ${source} ${REGEX_SOURCES})
target_link_libraries(${name} LagomCore)
add_test(
NAME ${name}