LibRegex: Store 'String' matches inside the bytecode

Also removes an unnecessary 'length' argument (StringView has a length!)
This commit is contained in:
AnotherTest 2020-12-06 17:02:03 +03:30 committed by Andreas Kling
parent 6b4281c3aa
commit 19bf7734a4
Notes: sideshowbarker 2024-07-19 01:02:01 +09:00
3 changed files with 22 additions and 12 deletions

View file

@ -444,14 +444,16 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(const MatchInput& input, M
} else if (compare_type == CharacterCompareType::String) {
ASSERT(!current_inversion_state());
char* str = reinterpret_cast<char*>(m_bytecode->at(offset++));
auto& length = m_bytecode->at(offset++);
const auto& length = m_bytecode->at(offset++);
StringBuilder str_builder;
for (size_t i = 0; i < length; ++i)
str_builder.append(m_bytecode->at(offset++));
// We want to compare a string that is definitely longer than the available string
if (input.view.length() - state.string_position < length)
return ExecutionResult::Failed_ExecuteLowPrioForks;
if (!compare_string(input, state, str, length))
if (!compare_string(input, state, str_builder.string_view().characters_without_null_termination(), length))
return ExecutionResult::Failed_ExecuteLowPrioForks;
} else if (compare_type == CharacterCompareType::CharClass) {
@ -717,9 +719,11 @@ const Vector<String> OpCode_Compare::variable_arguments_to_string(Optional<Match
auto ref = m_bytecode->at(offset++);
result.empend(String::format("number=%lu", ref));
} else if (compare_type == CharacterCompareType::String) {
char* str = reinterpret_cast<char*>(m_bytecode->at(offset++));
auto& length = m_bytecode->at(offset++);
result.empend(String::format("value=\"%.*s\"", length, str));
StringBuilder str_builder;
for (size_t i = 0; i < length; ++i)
str_builder.append(m_bytecode->at(offset++));
result.empend(String::format("value=\"%.*s\"", length, str_builder.string_view().characters_without_null_termination()));
if (!view.is_null() && view.length() > state().string_position)
result.empend(String::format(
"compare against: \"%s\"",

View file

@ -186,7 +186,7 @@ public:
append(move(bytecode));
}
void insert_bytecode_compare_string(StringView view, size_t length)
void insert_bytecode_compare_string(StringView view)
{
ByteCode bytecode;
@ -196,8 +196,7 @@ public:
ByteCode arguments;
arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
arguments.empend(reinterpret_cast<ByteCodeValueType>(view.characters_without_null_termination()));
arguments.empend(length);
arguments.insert_string(view);
bytecode.empend(arguments.size()); // size of arguments
bytecode.append(move(arguments));
@ -205,7 +204,7 @@ public:
append(move(bytecode));
}
void insert_bytecode_compare_named_reference(StringView name, size_t length)
void insert_bytecode_compare_named_reference(StringView name)
{
ByteCode bytecode;
@ -216,7 +215,7 @@ public:
arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::NamedReference));
arguments.empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
arguments.empend(length);
arguments.empend(name.length());
bytecode.empend(arguments.size()); // size of arguments
bytecode.append(move(arguments));
@ -458,6 +457,13 @@ public:
OpCode* get_opcode(MatchState& state) const;
private:
void insert_string(const StringView& view)
{
empend((ByteCodeValueType)view.length());
for (size_t i = 0; i < view.length(); ++i)
empend((ByteCodeValueType)view[i]);
}
ALWAYS_INLINE OpCode* get_opcode_by_id(OpCodeId id) const;
static HashMap<u32, OwnPtr<OpCode>> s_opcodes;
};

View file

@ -430,7 +430,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
if (length > 1) {
// last character is inserted into 'bytecode' for duplication symbol handling
auto new_length = length - ((match_repetition_symbol() && length > 1) ? 1 : 0);
stack.insert_bytecode_compare_string(start_token.value(), new_length);
stack.insert_bytecode_compare_string({ start_token.value().characters_without_null_termination(), new_length });
}
if ((match_repetition_symbol() && length > 1) || length == 1) // Create own compare opcode for last character before duplication symbol
@ -1099,7 +1099,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
}
match_length_minimum += maybe_length.value();
stack.insert_bytecode_compare_named_reference(name, name.length());
stack.insert_bytecode_compare_named_reference(name);
return true;
}