RegexByteCode.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include "RegexMatch.h"
  8. #include "RegexOptions.h"
  9. #include <AK/Format.h>
  10. #include <AK/Forward.h>
  11. #include <AK/HashMap.h>
  12. #include <AK/NonnullOwnPtr.h>
  13. #include <AK/OwnPtr.h>
  14. #include <AK/Traits.h>
  15. #include <AK/TypeCasts.h>
  16. #include <AK/Types.h>
  17. #include <AK/Vector.h>
  18. namespace regex {
  19. using ByteCodeValueType = u64;
  20. #define ENUMERATE_OPCODES \
  21. __ENUMERATE_OPCODE(Compare) \
  22. __ENUMERATE_OPCODE(Jump) \
  23. __ENUMERATE_OPCODE(ForkJump) \
  24. __ENUMERATE_OPCODE(ForkStay) \
  25. __ENUMERATE_OPCODE(FailForks) \
  26. __ENUMERATE_OPCODE(SaveLeftCaptureGroup) \
  27. __ENUMERATE_OPCODE(SaveRightCaptureGroup) \
  28. __ENUMERATE_OPCODE(SaveLeftNamedCaptureGroup) \
  29. __ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \
  30. __ENUMERATE_OPCODE(CheckBegin) \
  31. __ENUMERATE_OPCODE(CheckEnd) \
  32. __ENUMERATE_OPCODE(CheckBoundary) \
  33. __ENUMERATE_OPCODE(Save) \
  34. __ENUMERATE_OPCODE(Restore) \
  35. __ENUMERATE_OPCODE(GoBack) \
  36. __ENUMERATE_OPCODE(Exit)
  37. // clang-format off
  38. enum class OpCodeId : ByteCodeValueType {
  39. #define __ENUMERATE_OPCODE(x) x,
  40. ENUMERATE_OPCODES
  41. #undef __ENUMERATE_OPCODE
  42. First = Compare,
  43. Last = Exit,
  44. };
  45. // clang-format on
  46. #define ENUMERATE_CHARACTER_COMPARE_TYPES \
  47. __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined) \
  48. __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse) \
  49. __ENUMERATE_CHARACTER_COMPARE_TYPE(TemporaryInverse) \
  50. __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar) \
  51. __ENUMERATE_CHARACTER_COMPARE_TYPE(Char) \
  52. __ENUMERATE_CHARACTER_COMPARE_TYPE(String) \
  53. __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
  54. __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
  55. __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
  56. __ENUMERATE_CHARACTER_COMPARE_TYPE(NamedReference) \
  57. __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy)
  58. enum class CharacterCompareType : ByteCodeValueType {
  59. #define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x,
  60. ENUMERATE_CHARACTER_COMPARE_TYPES
  61. #undef __ENUMERATE_CHARACTER_COMPARE_TYPE
  62. };
  63. #define ENUMERATE_CHARACTER_CLASSES \
  64. __ENUMERATE_CHARACTER_CLASS(Alnum) \
  65. __ENUMERATE_CHARACTER_CLASS(Cntrl) \
  66. __ENUMERATE_CHARACTER_CLASS(Lower) \
  67. __ENUMERATE_CHARACTER_CLASS(Space) \
  68. __ENUMERATE_CHARACTER_CLASS(Alpha) \
  69. __ENUMERATE_CHARACTER_CLASS(Digit) \
  70. __ENUMERATE_CHARACTER_CLASS(Print) \
  71. __ENUMERATE_CHARACTER_CLASS(Upper) \
  72. __ENUMERATE_CHARACTER_CLASS(Blank) \
  73. __ENUMERATE_CHARACTER_CLASS(Graph) \
  74. __ENUMERATE_CHARACTER_CLASS(Punct) \
  75. __ENUMERATE_CHARACTER_CLASS(Word) \
  76. __ENUMERATE_CHARACTER_CLASS(Xdigit)
  77. enum class CharClass : ByteCodeValueType {
  78. #define __ENUMERATE_CHARACTER_CLASS(x) x,
  79. ENUMERATE_CHARACTER_CLASSES
  80. #undef __ENUMERATE_CHARACTER_CLASS
  81. };
  82. #define ENUMERATE_BOUNDARY_CHECK_TYPES \
  83. __ENUMERATE_BOUNDARY_CHECK_TYPE(Word) \
  84. __ENUMERATE_BOUNDARY_CHECK_TYPE(NonWord)
  85. enum class BoundaryCheckType : ByteCodeValueType {
  86. #define __ENUMERATE_BOUNDARY_CHECK_TYPE(x) x,
  87. ENUMERATE_BOUNDARY_CHECK_TYPES
  88. #undef __ENUMERATE_BOUNDARY_CHECK_TYPE
  89. };
  90. struct CharRange {
  91. const u32 from;
  92. const u32 to;
  93. CharRange(u64 value)
  94. : from(value >> 32)
  95. , to(value & 0xffffffff)
  96. {
  97. }
  98. CharRange(u32 from, u32 to)
  99. : from(from)
  100. , to(to)
  101. {
  102. }
  103. operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
  104. };
  105. struct CompareTypeAndValuePair {
  106. CharacterCompareType type;
  107. ByteCodeValueType value;
  108. };
  109. class OpCode;
  110. class ByteCode : public Vector<ByteCodeValueType> {
  111. public:
  112. ByteCode()
  113. {
  114. ensure_opcodes_initialized();
  115. }
  116. ByteCode(const ByteCode&) = default;
  117. virtual ~ByteCode() = default;
  118. ByteCode& operator=(ByteCode&&) = default;
  119. void insert_bytecode_compare_values(Vector<CompareTypeAndValuePair>&& pairs)
  120. {
  121. ByteCode bytecode;
  122. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
  123. bytecode.empend(pairs.size()); // number of arguments
  124. ByteCode arguments;
  125. for (auto& value : pairs) {
  126. VERIFY(value.type != CharacterCompareType::RangeExpressionDummy);
  127. VERIFY(value.type != CharacterCompareType::Undefined);
  128. VERIFY(value.type != CharacterCompareType::String);
  129. VERIFY(value.type != CharacterCompareType::NamedReference);
  130. arguments.append((ByteCodeValueType)value.type);
  131. if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar && value.type != CharacterCompareType::TemporaryInverse)
  132. arguments.append(move(value.value));
  133. }
  134. bytecode.empend(arguments.size()); // size of arguments
  135. bytecode.extend(move(arguments));
  136. extend(move(bytecode));
  137. }
  138. void insert_bytecode_check_boundary(BoundaryCheckType type)
  139. {
  140. ByteCode bytecode;
  141. bytecode.empend((ByteCodeValueType)OpCodeId::CheckBoundary);
  142. bytecode.empend((ByteCodeValueType)type);
  143. extend(move(bytecode));
  144. }
  145. void insert_bytecode_compare_string(StringView view)
  146. {
  147. ByteCode bytecode;
  148. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
  149. bytecode.empend(static_cast<u64>(1)); // number of arguments
  150. ByteCode arguments;
  151. arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
  152. arguments.insert_string(view);
  153. bytecode.empend(arguments.size()); // size of arguments
  154. bytecode.extend(move(arguments));
  155. extend(move(bytecode));
  156. }
  157. void insert_bytecode_compare_named_reference(StringView name)
  158. {
  159. ByteCode bytecode;
  160. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
  161. bytecode.empend(static_cast<u64>(1)); // number of arguments
  162. ByteCode arguments;
  163. arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::NamedReference));
  164. arguments.empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  165. arguments.empend(name.length());
  166. bytecode.empend(arguments.size()); // size of arguments
  167. bytecode.extend(move(arguments));
  168. extend(move(bytecode));
  169. }
  170. void insert_bytecode_group_capture_left(size_t capture_groups_count)
  171. {
  172. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup));
  173. empend(capture_groups_count);
  174. }
  175. void insert_bytecode_group_capture_left(const StringView& name)
  176. {
  177. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftNamedCaptureGroup));
  178. empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  179. empend(name.length());
  180. }
  181. void insert_bytecode_group_capture_right(size_t capture_groups_count)
  182. {
  183. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup));
  184. empend(capture_groups_count);
  185. }
  186. void insert_bytecode_group_capture_right(const StringView& name)
  187. {
  188. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
  189. empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  190. empend(name.length());
  191. }
  192. enum class LookAroundType {
  193. LookAhead,
  194. LookBehind,
  195. NegatedLookAhead,
  196. NegatedLookBehind,
  197. };
  198. void insert_bytecode_lookaround(ByteCode&& lookaround_body, LookAroundType type, size_t match_length = 0)
  199. {
  200. // FIXME: The save stack will grow infinitely with repeated failures
  201. // as we do not discard that on failure (we don't necessarily know how many to pop with the current architecture).
  202. switch (type) {
  203. case LookAroundType::LookAhead: {
  204. // SAVE
  205. // REGEXP BODY
  206. // RESTORE
  207. empend((ByteCodeValueType)OpCodeId::Save);
  208. extend(move(lookaround_body));
  209. empend((ByteCodeValueType)OpCodeId::Restore);
  210. return;
  211. }
  212. case LookAroundType::NegatedLookAhead: {
  213. // JUMP _A
  214. // LABEL _L
  215. // REGEXP BODY
  216. // FAIL 2
  217. // LABEL _A
  218. // SAVE
  219. // FORKJUMP _L
  220. // RESTORE
  221. auto body_length = lookaround_body.size();
  222. empend((ByteCodeValueType)OpCodeId::Jump);
  223. empend((ByteCodeValueType)body_length + 2); // JUMP to label _A
  224. extend(move(lookaround_body));
  225. empend((ByteCodeValueType)OpCodeId::FailForks);
  226. empend((ByteCodeValueType)2); // Fail two forks
  227. empend((ByteCodeValueType)OpCodeId::Save);
  228. empend((ByteCodeValueType)OpCodeId::ForkJump);
  229. empend((ByteCodeValueType) - (body_length + 5)); // JUMP to label _L
  230. empend((ByteCodeValueType)OpCodeId::Restore);
  231. return;
  232. }
  233. case LookAroundType::LookBehind:
  234. // SAVE
  235. // GOBACK match_length(BODY)
  236. // REGEXP BODY
  237. // RESTORE
  238. empend((ByteCodeValueType)OpCodeId::Save);
  239. empend((ByteCodeValueType)OpCodeId::GoBack);
  240. empend((ByteCodeValueType)match_length);
  241. extend(move(lookaround_body));
  242. empend((ByteCodeValueType)OpCodeId::Restore);
  243. return;
  244. case LookAroundType::NegatedLookBehind: {
  245. // JUMP _A
  246. // LABEL _L
  247. // GOBACK match_length(BODY)
  248. // REGEXP BODY
  249. // FAIL 2
  250. // LABEL _A
  251. // SAVE
  252. // FORKJUMP _L
  253. // RESTORE
  254. auto body_length = lookaround_body.size();
  255. empend((ByteCodeValueType)OpCodeId::Jump);
  256. empend((ByteCodeValueType)body_length + 4); // JUMP to label _A
  257. empend((ByteCodeValueType)OpCodeId::GoBack);
  258. empend((ByteCodeValueType)match_length);
  259. extend(move(lookaround_body));
  260. empend((ByteCodeValueType)OpCodeId::FailForks);
  261. empend((ByteCodeValueType)2); // Fail two forks
  262. empend((ByteCodeValueType)OpCodeId::Save);
  263. empend((ByteCodeValueType)OpCodeId::ForkJump);
  264. empend((ByteCodeValueType) - (body_length + 7)); // JUMP to label _L
  265. empend((ByteCodeValueType)OpCodeId::Restore);
  266. return;
  267. }
  268. }
  269. VERIFY_NOT_REACHED();
  270. }
  271. void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
  272. {
  273. // FORKJUMP _ALT
  274. // REGEXP ALT2
  275. // JUMP _END
  276. // LABEL _ALT
  277. // REGEXP ALT1
  278. // LABEL _END
  279. ByteCode byte_code;
  280. empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  281. empend(right.size() + 2); // Jump to the _ALT label
  282. for (auto& op : right)
  283. append(move(op));
  284. empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
  285. empend(left.size()); // Jump to the _END label
  286. // LABEL _ALT = bytecode.size() + 2
  287. for (auto& op : left)
  288. append(move(op));
  289. // LABEL _END = alterantive_bytecode.size
  290. }
  291. static void transform_bytecode_repetition_min_max(ByteCode& bytecode_to_repeat, size_t minimum, Optional<size_t> maximum, bool greedy = true)
  292. {
  293. ByteCode new_bytecode;
  294. new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum);
  295. if (maximum.has_value()) {
  296. auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
  297. if (maximum.value() > minimum) {
  298. auto diff = maximum.value() - minimum;
  299. new_bytecode.empend(jump_kind);
  300. new_bytecode.empend(diff * (bytecode_to_repeat.size() + 2)); // Jump to the _END label
  301. for (size_t i = 0; i < diff; ++i) {
  302. new_bytecode.extend(bytecode_to_repeat);
  303. new_bytecode.empend(jump_kind);
  304. new_bytecode.empend((diff - i - 1) * (bytecode_to_repeat.size() + 2)); // Jump to the _END label
  305. }
  306. }
  307. } else {
  308. // no maximum value set, repeat finding if possible
  309. auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
  310. new_bytecode.empend(jump_kind);
  311. new_bytecode.empend(-bytecode_to_repeat.size() - 2); // Jump to the last iteration
  312. }
  313. bytecode_to_repeat = move(new_bytecode);
  314. }
  315. void insert_bytecode_repetition_n(ByteCode& bytecode_to_repeat, size_t n)
  316. {
  317. for (size_t i = 0; i < n; ++i)
  318. extend(bytecode_to_repeat);
  319. }
  320. static void transform_bytecode_repetition_min_one(ByteCode& bytecode_to_repeat, bool greedy)
  321. {
  322. // LABEL _START = -bytecode_to_repeat.size()
  323. // REGEXP
  324. // FORKSTAY _START (FORKJUMP -> Greedy)
  325. if (greedy)
  326. bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  327. else
  328. bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  329. bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label
  330. }
  331. static void transform_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
  332. {
  333. // LABEL _START
  334. // FORKJUMP _END (FORKSTAY -> Greedy)
  335. // REGEXP
  336. // JUMP _START
  337. // LABEL _END
  338. // LABEL _START = m_bytes.size();
  339. ByteCode bytecode;
  340. if (greedy)
  341. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  342. else
  343. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  344. bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label
  345. for (auto& op : bytecode_to_repeat)
  346. bytecode.append(move(op));
  347. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
  348. bytecode.empend(-bytecode.size() - 1); // Jump to the _START label
  349. // LABEL _END = bytecode.size()
  350. bytecode_to_repeat = move(bytecode);
  351. }
  352. static void transform_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
  353. {
  354. // FORKJUMP _END (FORKSTAY -> Greedy)
  355. // REGEXP
  356. // LABEL _END
  357. ByteCode bytecode;
  358. if (greedy)
  359. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  360. else
  361. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  362. bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
  363. for (auto& op : bytecode_to_repeat)
  364. bytecode.append(move(op));
  365. // LABEL _END = bytecode.size()
  366. bytecode_to_repeat = move(bytecode);
  367. }
  368. OpCode& get_opcode(MatchState& state) const;
  369. private:
  370. void insert_string(const StringView& view)
  371. {
  372. empend((ByteCodeValueType)view.length());
  373. for (size_t i = 0; i < view.length(); ++i)
  374. empend((ByteCodeValueType)view[i]);
  375. }
  376. void ensure_opcodes_initialized();
  377. ALWAYS_INLINE OpCode& get_opcode_by_id(OpCodeId id) const;
  378. static OwnPtr<OpCode> s_opcodes[(size_t)OpCodeId::Last + 1];
  379. static bool s_opcodes_initialized;
  380. };
  381. #define ENUMERATE_EXECUTION_RESULTS \
  382. __ENUMERATE_EXECUTION_RESULT(Continue) \
  383. __ENUMERATE_EXECUTION_RESULT(Fork_PrioHigh) \
  384. __ENUMERATE_EXECUTION_RESULT(Fork_PrioLow) \
  385. __ENUMERATE_EXECUTION_RESULT(Failed) \
  386. __ENUMERATE_EXECUTION_RESULT(Failed_ExecuteLowPrioForks) \
  387. __ENUMERATE_EXECUTION_RESULT(Succeeded)
  388. enum class ExecutionResult : u8 {
  389. #define __ENUMERATE_EXECUTION_RESULT(x) x,
  390. ENUMERATE_EXECUTION_RESULTS
  391. #undef __ENUMERATE_EXECUTION_RESULT
  392. };
  393. const char* execution_result_name(ExecutionResult result);
  394. const char* opcode_id_name(OpCodeId opcode_id);
  395. const char* boundary_check_type_name(BoundaryCheckType);
  396. const char* character_compare_type_name(CharacterCompareType result);
  397. const char* execution_result_name(ExecutionResult result);
  398. class OpCode {
  399. public:
  400. OpCode() = default;
  401. virtual ~OpCode() = default;
  402. virtual OpCodeId opcode_id() const = 0;
  403. virtual size_t size() const = 0;
  404. virtual ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const = 0;
  405. ALWAYS_INLINE ByteCodeValueType argument(size_t offset) const
  406. {
  407. VERIFY(state().instruction_position + offset <= m_bytecode->size());
  408. return m_bytecode->at(state().instruction_position + 1 + offset);
  409. }
  410. ALWAYS_INLINE const char* name() const;
  411. static const char* name(const OpCodeId);
  412. ALWAYS_INLINE void set_state(MatchState& state) { m_state = &state; }
  413. ALWAYS_INLINE void set_bytecode(ByteCode& bytecode) { m_bytecode = &bytecode; }
  414. ALWAYS_INLINE const MatchState& state() const
  415. {
  416. VERIFY(m_state);
  417. return *m_state;
  418. }
  419. const String to_string() const
  420. {
  421. return String::formatted("[{:#02X}] {}", (int)opcode_id(), name(opcode_id()));
  422. }
  423. virtual const String arguments_string() const = 0;
  424. ALWAYS_INLINE const ByteCode& bytecode() const { return *m_bytecode; }
  425. protected:
  426. ByteCode* m_bytecode { nullptr };
  427. MatchState* m_state { nullptr };
  428. };
  429. class OpCode_Exit final : public OpCode {
  430. public:
  431. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  432. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Exit; }
  433. ALWAYS_INLINE size_t size() const override { return 1; }
  434. const String arguments_string() const override { return ""; }
  435. };
  436. class OpCode_FailForks final : public OpCode {
  437. public:
  438. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  439. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; }
  440. ALWAYS_INLINE size_t size() const override { return 2; }
  441. ALWAYS_INLINE size_t count() const { return argument(0); }
  442. const String arguments_string() const override { return String::formatted("count={}", count()); }
  443. };
  444. class OpCode_Save final : public OpCode {
  445. public:
  446. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  447. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Save; }
  448. ALWAYS_INLINE size_t size() const override { return 1; }
  449. const String arguments_string() const override { return ""; }
  450. };
  451. class OpCode_Restore final : public OpCode {
  452. public:
  453. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  454. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Restore; }
  455. ALWAYS_INLINE size_t size() const override { return 1; }
  456. const String arguments_string() const override { return ""; }
  457. };
  458. class OpCode_GoBack final : public OpCode {
  459. public:
  460. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  461. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::GoBack; }
  462. ALWAYS_INLINE size_t size() const override { return 2; }
  463. ALWAYS_INLINE size_t count() const { return argument(0); }
  464. const String arguments_string() const override { return String::formatted("count={}", count()); }
  465. };
  466. class OpCode_Jump final : public OpCode {
  467. public:
  468. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  469. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Jump; }
  470. ALWAYS_INLINE size_t size() const override { return 2; }
  471. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  472. const String arguments_string() const override
  473. {
  474. return String::formatted("offset={} [&{}]", offset(), state().instruction_position + size() + offset());
  475. }
  476. };
  477. class OpCode_ForkJump final : public OpCode {
  478. public:
  479. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  480. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkJump; }
  481. ALWAYS_INLINE size_t size() const override { return 2; }
  482. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  483. const String arguments_string() const override
  484. {
  485. return String::formatted("offset={} [&{}], sp: {}", offset(), state().instruction_position + size() + offset(), state().string_position);
  486. }
  487. };
  488. class OpCode_ForkStay final : public OpCode {
  489. public:
  490. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  491. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkStay; }
  492. ALWAYS_INLINE size_t size() const override { return 2; }
  493. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  494. const String arguments_string() const override
  495. {
  496. return String::formatted("offset={} [&{}], sp: {}", offset(), state().instruction_position + size() + offset(), state().string_position);
  497. }
  498. };
  499. class OpCode_CheckBegin final : public OpCode {
  500. public:
  501. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  502. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBegin; }
  503. ALWAYS_INLINE size_t size() const override { return 1; }
  504. const String arguments_string() const override { return ""; }
  505. };
  506. class OpCode_CheckEnd final : public OpCode {
  507. public:
  508. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  509. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckEnd; }
  510. ALWAYS_INLINE size_t size() const override { return 1; }
  511. const String arguments_string() const override { return ""; }
  512. };
  513. class OpCode_CheckBoundary final : public OpCode {
  514. public:
  515. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  516. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBoundary; }
  517. ALWAYS_INLINE size_t size() const override { return 2; }
  518. ALWAYS_INLINE size_t arguments_count() const { return 1; }
  519. ALWAYS_INLINE BoundaryCheckType type() const { return static_cast<BoundaryCheckType>(argument(0)); }
  520. const String arguments_string() const override { return String::formatted("kind={} ({})", (long unsigned int)argument(0), boundary_check_type_name(type())); }
  521. };
  522. class OpCode_SaveLeftCaptureGroup final : public OpCode {
  523. public:
  524. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  525. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftCaptureGroup; }
  526. ALWAYS_INLINE size_t size() const override { return 2; }
  527. ALWAYS_INLINE size_t id() const { return argument(0); }
  528. const String arguments_string() const override { return String::formatted("id={}", id()); }
  529. };
  530. class OpCode_SaveRightCaptureGroup final : public OpCode {
  531. public:
  532. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  533. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightCaptureGroup; }
  534. ALWAYS_INLINE size_t size() const override { return 2; }
  535. ALWAYS_INLINE size_t id() const { return argument(0); }
  536. const String arguments_string() const override { return String::formatted("id={}", id()); }
  537. };
  538. class OpCode_SaveLeftNamedCaptureGroup final : public OpCode {
  539. public:
  540. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  541. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftNamedCaptureGroup; }
  542. ALWAYS_INLINE size_t size() const override { return 3; }
  543. ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
  544. ALWAYS_INLINE size_t length() const { return argument(1); }
  545. const String arguments_string() const override
  546. {
  547. return String::formatted("name={}, length={}", name(), length());
  548. }
  549. };
  550. class OpCode_SaveRightNamedCaptureGroup final : public OpCode {
  551. public:
  552. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  553. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; }
  554. ALWAYS_INLINE size_t size() const override { return 3; }
  555. ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
  556. ALWAYS_INLINE size_t length() const { return argument(1); }
  557. const String arguments_string() const override
  558. {
  559. return String::formatted("name={}, length={}", name(), length());
  560. }
  561. };
  562. class OpCode_Compare final : public OpCode {
  563. public:
  564. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  565. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Compare; }
  566. ALWAYS_INLINE size_t size() const override { return arguments_size() + 3; }
  567. ALWAYS_INLINE size_t arguments_count() const { return argument(0); }
  568. ALWAYS_INLINE size_t arguments_size() const { return argument(1); }
  569. const String arguments_string() const override;
  570. const Vector<String> variable_arguments_to_string(Optional<MatchInput> input = {}) const;
  571. private:
  572. ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
  573. ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, RegexStringView const& str, bool& had_zero_length_match);
  574. ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
  575. ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
  576. };
  577. template<typename T>
  578. bool is(const OpCode&);
  579. template<typename T>
  580. ALWAYS_INLINE bool is(const OpCode&)
  581. {
  582. return false;
  583. }
  584. template<typename T>
  585. ALWAYS_INLINE bool is(const OpCode* opcode)
  586. {
  587. return is<T>(*opcode);
  588. }
  589. template<>
  590. ALWAYS_INLINE bool is<OpCode_ForkStay>(const OpCode& opcode)
  591. {
  592. return opcode.opcode_id() == OpCodeId::ForkStay;
  593. }
  594. template<>
  595. ALWAYS_INLINE bool is<OpCode_Exit>(const OpCode& opcode)
  596. {
  597. return opcode.opcode_id() == OpCodeId::Exit;
  598. }
  599. template<>
  600. ALWAYS_INLINE bool is<OpCode_Compare>(const OpCode& opcode)
  601. {
  602. return opcode.opcode_id() == OpCodeId::Compare;
  603. }
  604. template<typename T>
  605. ALWAYS_INLINE T const& to(OpCode const& opcode)
  606. {
  607. return verify_cast<T>(opcode);
  608. }
  609. template<typename T>
  610. ALWAYS_INLINE T* to(OpCode* opcode)
  611. {
  612. return verify_cast<T>(opcode);
  613. }
  614. template<typename T>
  615. ALWAYS_INLINE T const* to(OpCode const* opcode)
  616. {
  617. return verify_cast<T>(opcode);
  618. }
  619. template<typename T>
  620. ALWAYS_INLINE T& to(OpCode& opcode)
  621. {
  622. return verify_cast<T>(opcode);
  623. }
  624. }