mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
LibRegex: Change bytecode value type to a 64-bit value
To allow storing unicode ranges compactly; this is not utilised at the moment, but changing this later would've been significantly more difficult. Also fixes a few debug logs.
This commit is contained in:
parent
92ea9ed4a5
commit
3db8ced4c7
Notes:
sideshowbarker
2024-07-19 01:14:48 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/3db8ced4c7a Pull-request: https://github.com/SerenityOS/serenity/pull/4103 Reviewed-by: https://github.com/linusg ✅
5 changed files with 19 additions and 20 deletions
|
@ -275,7 +275,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(const M
|
|||
auto& map = output.named_capture_group_matches.at(input.match_index);
|
||||
|
||||
#ifdef REGEX_DEBUG
|
||||
ASSERT(start_position + length < input.view.length());
|
||||
ASSERT(start_position + length <= input.view.length());
|
||||
dbg() << "Save named capture group with name=" << capture_group_name << " and content: " << input.view.substring_view(start_position, length).to_string();
|
||||
#endif
|
||||
|
||||
|
@ -415,7 +415,7 @@ ALWAYS_INLINE bool OpCode_Compare::compare_string(const MatchInput& input, Match
|
|||
return false;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched)
|
||||
ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched)
|
||||
{
|
||||
switch (character_class) {
|
||||
case CharClass::Alnum:
|
||||
|
@ -513,7 +513,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
|
|||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched)
|
||||
ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched)
|
||||
{
|
||||
if (input.regex_options & AllFlags::Insensitive) {
|
||||
from = tolower(from);
|
||||
|
@ -553,7 +553,7 @@ const Vector<String> OpCode_Compare::variable_arguments_to_string(Optional<Match
|
|||
} else if (compare_type == CharacterCompareType::String) {
|
||||
char* str = reinterpret_cast<char*>(m_bytecode->at(offset++));
|
||||
auto& length = m_bytecode->at(offset++);
|
||||
result.empend(String::format("value=\"%s\"", String { str, length }.characters()));
|
||||
result.empend(String::format("value=\"%.*s\"", length, str));
|
||||
if (!view.is_null())
|
||||
result.empend(String::format("compare against: \"%s\"", input.value().view.substring_view(state().string_position, state().string_position + length > view.length() ? 0 : length).to_string().characters()));
|
||||
} else if (compare_type == CharacterCompareType::CharClass) {
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
namespace regex {
|
||||
|
||||
using ByteCodeValueType = size_t;
|
||||
using ByteCodeValueType = u64;
|
||||
|
||||
#define ENUMERATE_OPCODES \
|
||||
__ENUMERATE_OPCODE(Compare) \
|
||||
|
@ -102,22 +102,22 @@ enum class CharClass : ByteCodeValueType {
|
|||
};
|
||||
|
||||
struct CharRange {
|
||||
const char from;
|
||||
const char to;
|
||||
const u32 from;
|
||||
const u32 to;
|
||||
|
||||
CharRange(size_t value)
|
||||
: from(value >> 8)
|
||||
, to(value & 0xFF)
|
||||
CharRange(u64 value)
|
||||
: from(value >> 32)
|
||||
, to(value & 0xffffffff)
|
||||
{
|
||||
}
|
||||
|
||||
CharRange(char from, char to)
|
||||
CharRange(u32 from, u32 to)
|
||||
: from(from)
|
||||
, to(to)
|
||||
{
|
||||
}
|
||||
|
||||
operator ByteCodeValueType() const { return (from << 8) | to; }
|
||||
operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
|
||||
};
|
||||
|
||||
struct CompareTypeAndValuePair {
|
||||
|
@ -568,8 +568,8 @@ public:
|
|||
private:
|
||||
ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
|
||||
ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, const char* str, size_t length);
|
||||
ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched);
|
||||
ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched);
|
||||
ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
|
||||
ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
|
|
|
@ -131,7 +131,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
for (auto& view : views) {
|
||||
input.view = view;
|
||||
#ifdef REGEX_DEBUG
|
||||
dbg() << "[match] Starting match with view (" << view.length() << "): _" << view << "_";
|
||||
dbg() << "[match] Starting match with view (" << view.length() << "): _" << view.to_string() << "_";
|
||||
#endif
|
||||
|
||||
auto view_length = view.length();
|
||||
|
@ -171,7 +171,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
|
||||
#ifdef REGEX_DEBUG
|
||||
dbg() << "state.string_position: " << state.string_position << " view_index: " << view_index;
|
||||
dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index);
|
||||
dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index).to_string();
|
||||
#endif
|
||||
++match_count;
|
||||
|
||||
|
|
|
@ -365,7 +365,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_bracket_expression(ByteCode& stack
|
|||
values.take_last(); // RangeExpressionDummy
|
||||
auto value1 = values.take_last();
|
||||
|
||||
values.append({ CharacterCompareType::CharRange, static_cast<ByteCodeValueType>(CharRange { (char)value1.value, (char)value2.value }) });
|
||||
values.append({ CharacterCompareType::CharRange, static_cast<ByteCodeValueType>(CharRange { (u32)value1.value, (u32)value2.value }) });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
file(GLOB TEST_SOURCES CONFIGURE_DEPENDS "*.cpp")
|
||||
file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp")
|
||||
file(GLOB C_REGEX_SOURCES CONFIGURE_DEPENDS "../C/*.cpp")
|
||||
file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp" "../C/*.cpp")
|
||||
|
||||
foreach(source ${TEST_SOURCES})
|
||||
get_filename_component(name ${source} NAME_WE)
|
||||
add_executable(${name} ${source} ${REGEX_SOURCES} ${C_REGEX_SOURCES})
|
||||
add_executable(${name} ${source} ${REGEX_SOURCES})
|
||||
target_link_libraries(${name} LagomCore)
|
||||
add_test(
|
||||
NAME ${name}
|
||||
|
|
Loading…
Reference in a new issue