|
@@ -27,6 +27,7 @@ using ByteCodeValueType = u64;
|
|
#define ENUMERATE_OPCODES \
|
|
#define ENUMERATE_OPCODES \
|
|
__ENUMERATE_OPCODE(Compare) \
|
|
__ENUMERATE_OPCODE(Compare) \
|
|
__ENUMERATE_OPCODE(Jump) \
|
|
__ENUMERATE_OPCODE(Jump) \
|
|
|
|
+ __ENUMERATE_OPCODE(JumpNonEmpty) \
|
|
__ENUMERATE_OPCODE(ForkJump) \
|
|
__ENUMERATE_OPCODE(ForkJump) \
|
|
__ENUMERATE_OPCODE(ForkStay) \
|
|
__ENUMERATE_OPCODE(ForkStay) \
|
|
__ENUMERATE_OPCODE(FailForks) \
|
|
__ENUMERATE_OPCODE(FailForks) \
|
|
@@ -42,6 +43,7 @@ using ByteCodeValueType = u64;
|
|
__ENUMERATE_OPCODE(ClearCaptureGroup) \
|
|
__ENUMERATE_OPCODE(ClearCaptureGroup) \
|
|
__ENUMERATE_OPCODE(Repeat) \
|
|
__ENUMERATE_OPCODE(Repeat) \
|
|
__ENUMERATE_OPCODE(ResetRepeat) \
|
|
__ENUMERATE_OPCODE(ResetRepeat) \
|
|
|
|
+ __ENUMERATE_OPCODE(Checkpoint) \
|
|
__ENUMERATE_OPCODE(Exit)
|
|
__ENUMERATE_OPCODE(Exit)
|
|
|
|
|
|
// clang-format off
|
|
// clang-format off
|
|
@@ -319,16 +321,14 @@ public:
|
|
empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
|
empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
|
empend(right.size() + 2); // Jump to the _ALT label
|
|
empend(right.size() + 2); // Jump to the _ALT label
|
|
|
|
|
|
- for (auto& op : right)
|
|
|
|
- append(move(op));
|
|
|
|
|
|
+ extend(right);
|
|
|
|
|
|
empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
|
|
empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
|
|
empend(left.size()); // Jump to the _END label
|
|
empend(left.size()); // Jump to the _END label
|
|
|
|
|
|
// LABEL _ALT = bytecode.size() + 2
|
|
// LABEL _ALT = bytecode.size() + 2
|
|
|
|
|
|
- for (auto& op : left)
|
|
|
|
- append(move(op));
|
|
|
|
|
|
+ extend(left);
|
|
|
|
|
|
// LABEL _END = alterantive_bytecode.size
|
|
// LABEL _END = alterantive_bytecode.size
|
|
}
|
|
}
|
|
@@ -376,10 +376,21 @@ public:
|
|
new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
|
|
new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- // no maximum value set, repeat finding if possible
|
|
|
|
|
|
+ // no maximum value set, repeat finding if possible:
|
|
|
|
+ // (REPEAT REGEXP MIN)
|
|
|
|
+ // LABEL _START
|
|
|
|
+ // CHECKPOINT _C
|
|
|
|
+ // REGEXP
|
|
|
|
+ // JUMP_NONEMPTY _C _START FORK
|
|
|
|
+
|
|
|
|
+ // Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
|
|
|
|
+ new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
|
|
|
|
+
|
|
auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
|
|
auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
|
|
|
|
+ new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
|
|
|
|
+ new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 1); // Jump to the last iteration
|
|
|
|
+ new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 1); // if _C is not empty.
|
|
new_bytecode.empend(jump_kind);
|
|
new_bytecode.empend(jump_kind);
|
|
- new_bytecode.empend(-bytecode_to_repeat.size() - 2); // Jump to the last iteration
|
|
|
|
}
|
|
}
|
|
|
|
|
|
bytecode_to_repeat = move(new_bytecode);
|
|
bytecode_to_repeat = move(new_bytecode);
|
|
@@ -412,23 +423,29 @@ public:
|
|
static void transform_bytecode_repetition_min_one(ByteCode& bytecode_to_repeat, bool greedy)
|
|
static void transform_bytecode_repetition_min_one(ByteCode& bytecode_to_repeat, bool greedy)
|
|
{
|
|
{
|
|
// LABEL _START = -bytecode_to_repeat.size()
|
|
// LABEL _START = -bytecode_to_repeat.size()
|
|
|
|
+ // CHECKPOINT _C
|
|
// REGEXP
|
|
// REGEXP
|
|
- // FORKSTAY _START (FORKJUMP -> Greedy)
|
|
|
|
|
|
+ // JUMP_NONEMPTY _C _START FORKSTAY (FORKJUMP -> Greedy)
|
|
|
|
+
|
|
|
|
+ bytecode_to_repeat.prepend((ByteCodeValueType)OpCodeId::Checkpoint);
|
|
|
|
+
|
|
|
|
+ bytecode_to_repeat.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
|
|
|
|
+ bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 3); // Jump to the _START label...
|
|
|
|
+ bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 2); // ...if _C is not empty
|
|
|
|
|
|
if (greedy)
|
|
if (greedy)
|
|
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
|
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
|
else
|
|
else
|
|
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
|
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
|
|
-
|
|
|
|
- bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label
|
|
|
|
}
|
|
}
|
|
|
|
|
|
static void transform_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
|
|
static void transform_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
|
|
{
|
|
{
|
|
// LABEL _START
|
|
// LABEL _START
|
|
// FORKJUMP _END (FORKSTAY -> Greedy)
|
|
// FORKJUMP _END (FORKSTAY -> Greedy)
|
|
|
|
+ // CHECKPOINT _C
|
|
// REGEXP
|
|
// REGEXP
|
|
- // JUMP _START
|
|
|
|
|
|
+ // JUMP_NONEMPTY _C _START JUMP
|
|
// LABEL _END
|
|
// LABEL _END
|
|
|
|
|
|
// LABEL _START = m_bytes.size();
|
|
// LABEL _START = m_bytes.size();
|
|
@@ -439,13 +456,17 @@ public:
|
|
else
|
|
else
|
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
|
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
|
|
|
|
|
|
- bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label
|
|
|
|
|
|
+ bytecode.empend(bytecode_to_repeat.size() + 1 + 4); // Jump to the _END label
|
|
|
|
|
|
- for (auto& op : bytecode_to_repeat)
|
|
|
|
- bytecode.append(move(op));
|
|
|
|
|
|
+ auto c_label = bytecode.size();
|
|
|
|
+ bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Checkpoint));
|
|
|
|
+
|
|
|
|
+ bytecode.extend(bytecode_to_repeat);
|
|
|
|
|
|
- bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
|
|
|
|
- bytecode.empend(-bytecode.size() - 1); // Jump to the _START label
|
|
|
|
|
|
+ bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::JumpNonEmpty));
|
|
|
|
+ bytecode.empend(-bytecode.size() - 3); // Jump(...) to the _START label...
|
|
|
|
+ bytecode.empend(c_label - bytecode.size() - 2); // ...only if _C passes.
|
|
|
|
+ bytecode.empend((ByteCodeValueType)OpCodeId::Jump);
|
|
// LABEL _END = bytecode.size()
|
|
// LABEL _END = bytecode.size()
|
|
|
|
|
|
bytecode_to_repeat = move(bytecode);
|
|
bytecode_to_repeat = move(bytecode);
|
|
@@ -744,6 +765,31 @@ public:
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+class OpCode_Checkpoint final : public OpCode {
|
|
|
|
+public:
|
|
|
|
+ ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
|
|
|
|
+ ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Checkpoint; }
|
|
|
|
+ ALWAYS_INLINE size_t size() const override { return 1; }
|
|
|
|
+ String const arguments_string() const override { return ""; }
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+class OpCode_JumpNonEmpty final : public OpCode {
|
|
|
|
+public:
|
|
|
|
+ ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
|
|
|
|
+ ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::JumpNonEmpty; }
|
|
|
|
+ ALWAYS_INLINE size_t size() const override { return 4; }
|
|
|
|
+ ALWAYS_INLINE ssize_t offset() const { return argument(0); }
|
|
|
|
+ ALWAYS_INLINE ssize_t checkpoint() const { return argument(1); }
|
|
|
|
+ ALWAYS_INLINE OpCodeId form() const { return (OpCodeId)argument(2); }
|
|
|
|
+ String const arguments_string() const override
|
|
|
|
+ {
|
|
|
|
+ return String::formatted("{} offset={} [&{}], cp={} [&{}]",
|
|
|
|
+ opcode_id_name(form()),
|
|
|
|
+ offset(), state().instruction_position + size() + offset(),
|
|
|
|
+ checkpoint(), state().instruction_position + size() + checkpoint());
|
|
|
|
+ }
|
|
|
|
+};
|
|
|
|
+
|
|
template<typename T>
|
|
template<typename T>
|
|
bool is(OpCode const&);
|
|
bool is(OpCode const&);
|
|
|
|
|