RegexByteCode.h 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #pragma once
  7. #include "RegexMatch.h"
  8. #include "RegexOptions.h"
  9. #include <AK/Format.h>
  10. #include <AK/Forward.h>
  11. #include <AK/HashMap.h>
  12. #include <AK/NonnullOwnPtr.h>
  13. #include <AK/OwnPtr.h>
  14. #include <AK/Traits.h>
  15. #include <AK/TypeCasts.h>
  16. #include <AK/Types.h>
  17. #include <AK/Vector.h>
  18. #include <LibUnicode/Forward.h>
  19. namespace regex {
  20. using ByteCodeValueType = u64;
  21. #define ENUMERATE_OPCODES \
  22. __ENUMERATE_OPCODE(Compare) \
  23. __ENUMERATE_OPCODE(Jump) \
  24. __ENUMERATE_OPCODE(ForkJump) \
  25. __ENUMERATE_OPCODE(ForkStay) \
  26. __ENUMERATE_OPCODE(FailForks) \
  27. __ENUMERATE_OPCODE(SaveLeftCaptureGroup) \
  28. __ENUMERATE_OPCODE(SaveRightCaptureGroup) \
  29. __ENUMERATE_OPCODE(SaveLeftNamedCaptureGroup) \
  30. __ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \
  31. __ENUMERATE_OPCODE(CheckBegin) \
  32. __ENUMERATE_OPCODE(CheckEnd) \
  33. __ENUMERATE_OPCODE(CheckBoundary) \
  34. __ENUMERATE_OPCODE(Save) \
  35. __ENUMERATE_OPCODE(Restore) \
  36. __ENUMERATE_OPCODE(GoBack) \
  37. __ENUMERATE_OPCODE(ClearCaptureGroup) \
  38. __ENUMERATE_OPCODE(ClearNamedCaptureGroup) \
  39. __ENUMERATE_OPCODE(Exit)
  40. // clang-format off
  41. enum class OpCodeId : ByteCodeValueType {
  42. #define __ENUMERATE_OPCODE(x) x,
  43. ENUMERATE_OPCODES
  44. #undef __ENUMERATE_OPCODE
  45. First = Compare,
  46. Last = Exit,
  47. };
  48. // clang-format on
  49. #define ENUMERATE_CHARACTER_COMPARE_TYPES \
  50. __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined) \
  51. __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse) \
  52. __ENUMERATE_CHARACTER_COMPARE_TYPE(TemporaryInverse) \
  53. __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar) \
  54. __ENUMERATE_CHARACTER_COMPARE_TYPE(Char) \
  55. __ENUMERATE_CHARACTER_COMPARE_TYPE(String) \
  56. __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
  57. __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
  58. __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference) \
  59. __ENUMERATE_CHARACTER_COMPARE_TYPE(NamedReference) \
  60. __ENUMERATE_CHARACTER_COMPARE_TYPE(Property) \
  61. __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy)
  62. enum class CharacterCompareType : ByteCodeValueType {
  63. #define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x,
  64. ENUMERATE_CHARACTER_COMPARE_TYPES
  65. #undef __ENUMERATE_CHARACTER_COMPARE_TYPE
  66. };
  67. #define ENUMERATE_CHARACTER_CLASSES \
  68. __ENUMERATE_CHARACTER_CLASS(Alnum) \
  69. __ENUMERATE_CHARACTER_CLASS(Cntrl) \
  70. __ENUMERATE_CHARACTER_CLASS(Lower) \
  71. __ENUMERATE_CHARACTER_CLASS(Space) \
  72. __ENUMERATE_CHARACTER_CLASS(Alpha) \
  73. __ENUMERATE_CHARACTER_CLASS(Digit) \
  74. __ENUMERATE_CHARACTER_CLASS(Print) \
  75. __ENUMERATE_CHARACTER_CLASS(Upper) \
  76. __ENUMERATE_CHARACTER_CLASS(Blank) \
  77. __ENUMERATE_CHARACTER_CLASS(Graph) \
  78. __ENUMERATE_CHARACTER_CLASS(Punct) \
  79. __ENUMERATE_CHARACTER_CLASS(Word) \
  80. __ENUMERATE_CHARACTER_CLASS(Xdigit)
  81. enum class CharClass : ByteCodeValueType {
  82. #define __ENUMERATE_CHARACTER_CLASS(x) x,
  83. ENUMERATE_CHARACTER_CLASSES
  84. #undef __ENUMERATE_CHARACTER_CLASS
  85. };
  86. #define ENUMERATE_BOUNDARY_CHECK_TYPES \
  87. __ENUMERATE_BOUNDARY_CHECK_TYPE(Word) \
  88. __ENUMERATE_BOUNDARY_CHECK_TYPE(NonWord)
  89. enum class BoundaryCheckType : ByteCodeValueType {
  90. #define __ENUMERATE_BOUNDARY_CHECK_TYPE(x) x,
  91. ENUMERATE_BOUNDARY_CHECK_TYPES
  92. #undef __ENUMERATE_BOUNDARY_CHECK_TYPE
  93. };
  94. struct CharRange {
  95. u32 const from;
  96. u32 const to;
  97. CharRange(u64 value)
  98. : from(value >> 32)
  99. , to(value & 0xffffffff)
  100. {
  101. }
  102. CharRange(u32 from, u32 to)
  103. : from(from)
  104. , to(to)
  105. {
  106. }
  107. operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
  108. };
  109. struct CompareTypeAndValuePair {
  110. CharacterCompareType type;
  111. ByteCodeValueType value;
  112. };
  113. class OpCode;
  114. class ByteCode : public Vector<ByteCodeValueType> {
  115. public:
  116. ByteCode()
  117. {
  118. ensure_opcodes_initialized();
  119. }
  120. ByteCode(ByteCode const&) = default;
  121. virtual ~ByteCode() = default;
  122. ByteCode& operator=(ByteCode&&) = default;
  123. void insert_bytecode_compare_values(Vector<CompareTypeAndValuePair>&& pairs)
  124. {
  125. ByteCode bytecode;
  126. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
  127. bytecode.empend(pairs.size()); // number of arguments
  128. ByteCode arguments;
  129. for (auto& value : pairs) {
  130. VERIFY(value.type != CharacterCompareType::RangeExpressionDummy);
  131. VERIFY(value.type != CharacterCompareType::Undefined);
  132. VERIFY(value.type != CharacterCompareType::String);
  133. VERIFY(value.type != CharacterCompareType::NamedReference);
  134. arguments.append((ByteCodeValueType)value.type);
  135. if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar && value.type != CharacterCompareType::TemporaryInverse)
  136. arguments.append(move(value.value));
  137. }
  138. bytecode.empend(arguments.size()); // size of arguments
  139. bytecode.extend(move(arguments));
  140. extend(move(bytecode));
  141. }
  142. void insert_bytecode_check_boundary(BoundaryCheckType type)
  143. {
  144. ByteCode bytecode;
  145. bytecode.empend((ByteCodeValueType)OpCodeId::CheckBoundary);
  146. bytecode.empend((ByteCodeValueType)type);
  147. extend(move(bytecode));
  148. }
  149. void insert_bytecode_clear_capture_group(size_t index)
  150. {
  151. empend(static_cast<ByteCodeValueType>(OpCodeId::ClearCaptureGroup));
  152. empend(index);
  153. }
  154. void insert_bytecode_clear_named_capture_group(StringView name)
  155. {
  156. empend(static_cast<ByteCodeValueType>(OpCodeId::ClearNamedCaptureGroup));
  157. empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  158. empend(name.length());
  159. }
  160. void insert_bytecode_compare_string(StringView view)
  161. {
  162. ByteCode bytecode;
  163. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
  164. bytecode.empend(static_cast<u64>(1)); // number of arguments
  165. ByteCode arguments;
  166. arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
  167. arguments.insert_string(view);
  168. bytecode.empend(arguments.size()); // size of arguments
  169. bytecode.extend(move(arguments));
  170. extend(move(bytecode));
  171. }
  172. void insert_bytecode_compare_named_reference(StringView name)
  173. {
  174. ByteCode bytecode;
  175. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
  176. bytecode.empend(static_cast<u64>(1)); // number of arguments
  177. ByteCode arguments;
  178. arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::NamedReference));
  179. arguments.empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  180. arguments.empend(name.length());
  181. bytecode.empend(arguments.size()); // size of arguments
  182. bytecode.extend(move(arguments));
  183. extend(move(bytecode));
  184. }
  185. void insert_bytecode_group_capture_left(size_t capture_groups_count)
  186. {
  187. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup));
  188. empend(capture_groups_count);
  189. }
  190. void insert_bytecode_group_capture_left(StringView const& name)
  191. {
  192. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftNamedCaptureGroup));
  193. empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  194. empend(name.length());
  195. }
  196. void insert_bytecode_group_capture_right(size_t capture_groups_count)
  197. {
  198. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup));
  199. empend(capture_groups_count);
  200. }
  201. void insert_bytecode_group_capture_right(StringView const& name)
  202. {
  203. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
  204. empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  205. empend(name.length());
  206. }
  207. enum class LookAroundType {
  208. LookAhead,
  209. LookBehind,
  210. NegatedLookAhead,
  211. NegatedLookBehind,
  212. };
  213. void insert_bytecode_lookaround(ByteCode&& lookaround_body, LookAroundType type, size_t match_length = 0)
  214. {
  215. // FIXME: The save stack will grow infinitely with repeated failures
  216. // as we do not discard that on failure (we don't necessarily know how many to pop with the current architecture).
  217. switch (type) {
  218. case LookAroundType::LookAhead: {
  219. // SAVE
  220. // REGEXP BODY
  221. // RESTORE
  222. empend((ByteCodeValueType)OpCodeId::Save);
  223. extend(move(lookaround_body));
  224. empend((ByteCodeValueType)OpCodeId::Restore);
  225. return;
  226. }
  227. case LookAroundType::NegatedLookAhead: {
  228. // JUMP _A
  229. // LABEL _L
  230. // REGEXP BODY
  231. // FAIL 2
  232. // LABEL _A
  233. // SAVE
  234. // FORKJUMP _L
  235. // RESTORE
  236. auto body_length = lookaround_body.size();
  237. empend((ByteCodeValueType)OpCodeId::Jump);
  238. empend((ByteCodeValueType)body_length + 2); // JUMP to label _A
  239. extend(move(lookaround_body));
  240. empend((ByteCodeValueType)OpCodeId::FailForks);
  241. empend((ByteCodeValueType)2); // Fail two forks
  242. empend((ByteCodeValueType)OpCodeId::Save);
  243. empend((ByteCodeValueType)OpCodeId::ForkJump);
  244. empend((ByteCodeValueType) - (body_length + 5)); // JUMP to label _L
  245. empend((ByteCodeValueType)OpCodeId::Restore);
  246. return;
  247. }
  248. case LookAroundType::LookBehind:
  249. // SAVE
  250. // GOBACK match_length(BODY)
  251. // REGEXP BODY
  252. // RESTORE
  253. empend((ByteCodeValueType)OpCodeId::Save);
  254. empend((ByteCodeValueType)OpCodeId::GoBack);
  255. empend((ByteCodeValueType)match_length);
  256. extend(move(lookaround_body));
  257. empend((ByteCodeValueType)OpCodeId::Restore);
  258. return;
  259. case LookAroundType::NegatedLookBehind: {
  260. // JUMP _A
  261. // LABEL _L
  262. // GOBACK match_length(BODY)
  263. // REGEXP BODY
  264. // FAIL 2
  265. // LABEL _A
  266. // SAVE
  267. // FORKJUMP _L
  268. // RESTORE
  269. auto body_length = lookaround_body.size();
  270. empend((ByteCodeValueType)OpCodeId::Jump);
  271. empend((ByteCodeValueType)body_length + 4); // JUMP to label _A
  272. empend((ByteCodeValueType)OpCodeId::GoBack);
  273. empend((ByteCodeValueType)match_length);
  274. extend(move(lookaround_body));
  275. empend((ByteCodeValueType)OpCodeId::FailForks);
  276. empend((ByteCodeValueType)2); // Fail two forks
  277. empend((ByteCodeValueType)OpCodeId::Save);
  278. empend((ByteCodeValueType)OpCodeId::ForkJump);
  279. empend((ByteCodeValueType) - (body_length + 7)); // JUMP to label _L
  280. empend((ByteCodeValueType)OpCodeId::Restore);
  281. return;
  282. }
  283. }
  284. VERIFY_NOT_REACHED();
  285. }
  286. void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
  287. {
  288. // FORKJUMP _ALT
  289. // REGEXP ALT2
  290. // JUMP _END
  291. // LABEL _ALT
  292. // REGEXP ALT1
  293. // LABEL _END
  294. ByteCode byte_code;
  295. empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  296. empend(right.size() + 2); // Jump to the _ALT label
  297. for (auto& op : right)
  298. append(move(op));
  299. empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
  300. empend(left.size()); // Jump to the _END label
  301. // LABEL _ALT = bytecode.size() + 2
  302. for (auto& op : left)
  303. append(move(op));
  304. // LABEL _END = alterantive_bytecode.size
  305. }
  306. static void transform_bytecode_repetition_min_max(ByteCode& bytecode_to_repeat, size_t minimum, Optional<size_t> maximum, bool greedy = true)
  307. {
  308. ByteCode new_bytecode;
  309. new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum);
  310. if (maximum.has_value()) {
  311. auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
  312. if (maximum.value() > minimum) {
  313. auto diff = maximum.value() - minimum;
  314. new_bytecode.empend(jump_kind);
  315. new_bytecode.empend(diff * (bytecode_to_repeat.size() + 2)); // Jump to the _END label
  316. for (size_t i = 0; i < diff; ++i) {
  317. new_bytecode.extend(bytecode_to_repeat);
  318. new_bytecode.empend(jump_kind);
  319. new_bytecode.empend((diff - i - 1) * (bytecode_to_repeat.size() + 2)); // Jump to the _END label
  320. }
  321. }
  322. } else {
  323. // no maximum value set, repeat finding if possible
  324. auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
  325. new_bytecode.empend(jump_kind);
  326. new_bytecode.empend(-bytecode_to_repeat.size() - 2); // Jump to the last iteration
  327. }
  328. bytecode_to_repeat = move(new_bytecode);
  329. }
  330. void insert_bytecode_repetition_n(ByteCode& bytecode_to_repeat, size_t n)
  331. {
  332. for (size_t i = 0; i < n; ++i)
  333. extend(bytecode_to_repeat);
  334. }
  335. static void transform_bytecode_repetition_min_one(ByteCode& bytecode_to_repeat, bool greedy)
  336. {
  337. // LABEL _START = -bytecode_to_repeat.size()
  338. // REGEXP
  339. // FORKSTAY _START (FORKJUMP -> Greedy)
  340. if (greedy)
  341. bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  342. else
  343. bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  344. bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label
  345. }
  346. static void transform_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
  347. {
  348. // LABEL _START
  349. // FORKJUMP _END (FORKSTAY -> Greedy)
  350. // REGEXP
  351. // JUMP _START
  352. // LABEL _END
  353. // LABEL _START = m_bytes.size();
  354. ByteCode bytecode;
  355. if (greedy)
  356. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  357. else
  358. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  359. bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label
  360. for (auto& op : bytecode_to_repeat)
  361. bytecode.append(move(op));
  362. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
  363. bytecode.empend(-bytecode.size() - 1); // Jump to the _START label
  364. // LABEL _END = bytecode.size()
  365. bytecode_to_repeat = move(bytecode);
  366. }
  367. static void transform_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
  368. {
  369. // FORKJUMP _END (FORKSTAY -> Greedy)
  370. // REGEXP
  371. // LABEL _END
  372. ByteCode bytecode;
  373. if (greedy)
  374. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  375. else
  376. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  377. bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
  378. for (auto& op : bytecode_to_repeat)
  379. bytecode.append(move(op));
  380. // LABEL _END = bytecode.size()
  381. bytecode_to_repeat = move(bytecode);
  382. }
  383. OpCode& get_opcode(MatchState& state) const;
  384. private:
  385. void insert_string(StringView const& view)
  386. {
  387. empend((ByteCodeValueType)view.length());
  388. for (size_t i = 0; i < view.length(); ++i)
  389. empend((ByteCodeValueType)view[i]);
  390. }
  391. void ensure_opcodes_initialized();
  392. ALWAYS_INLINE OpCode& get_opcode_by_id(OpCodeId id) const;
  393. static OwnPtr<OpCode> s_opcodes[(size_t)OpCodeId::Last + 1];
  394. static bool s_opcodes_initialized;
  395. };
  396. #define ENUMERATE_EXECUTION_RESULTS \
  397. __ENUMERATE_EXECUTION_RESULT(Continue) \
  398. __ENUMERATE_EXECUTION_RESULT(Fork_PrioHigh) \
  399. __ENUMERATE_EXECUTION_RESULT(Fork_PrioLow) \
  400. __ENUMERATE_EXECUTION_RESULT(Failed) \
  401. __ENUMERATE_EXECUTION_RESULT(Failed_ExecuteLowPrioForks) \
  402. __ENUMERATE_EXECUTION_RESULT(Succeeded)
  403. enum class ExecutionResult : u8 {
  404. #define __ENUMERATE_EXECUTION_RESULT(x) x,
  405. ENUMERATE_EXECUTION_RESULTS
  406. #undef __ENUMERATE_EXECUTION_RESULT
  407. };
  408. char const* execution_result_name(ExecutionResult result);
  409. char const* opcode_id_name(OpCodeId opcode_id);
  410. char const* boundary_check_type_name(BoundaryCheckType);
  411. char const* character_compare_type_name(CharacterCompareType result);
  412. char const* execution_result_name(ExecutionResult result);
  413. class OpCode {
  414. public:
  415. OpCode() = default;
  416. virtual ~OpCode() = default;
  417. virtual OpCodeId opcode_id() const = 0;
  418. virtual size_t size() const = 0;
  419. virtual ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const = 0;
  420. ALWAYS_INLINE ByteCodeValueType argument(size_t offset) const
  421. {
  422. VERIFY(state().instruction_position + offset <= m_bytecode->size());
  423. return m_bytecode->at(state().instruction_position + 1 + offset);
  424. }
  425. ALWAYS_INLINE char const* name() const;
  426. static char const* name(OpCodeId const);
  427. ALWAYS_INLINE void set_state(MatchState& state) { m_state = &state; }
  428. ALWAYS_INLINE void set_bytecode(ByteCode& bytecode) { m_bytecode = &bytecode; }
  429. ALWAYS_INLINE MatchState const& state() const
  430. {
  431. VERIFY(m_state);
  432. return *m_state;
  433. }
  434. String const to_string() const
  435. {
  436. return String::formatted("[{:#02X}] {}", (int)opcode_id(), name(opcode_id()));
  437. }
  438. virtual String const arguments_string() const = 0;
  439. ALWAYS_INLINE ByteCode const& bytecode() const { return *m_bytecode; }
  440. protected:
  441. ByteCode* m_bytecode { nullptr };
  442. MatchState* m_state { nullptr };
  443. };
  444. class OpCode_Exit final : public OpCode {
  445. public:
  446. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  447. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Exit; }
  448. ALWAYS_INLINE size_t size() const override { return 1; }
  449. String const arguments_string() const override { return ""; }
  450. };
  451. class OpCode_FailForks final : public OpCode {
  452. public:
  453. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  454. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; }
  455. ALWAYS_INLINE size_t size() const override { return 2; }
  456. ALWAYS_INLINE size_t count() const { return argument(0); }
  457. String const arguments_string() const override { return String::formatted("count={}", count()); }
  458. };
  459. class OpCode_Save final : public OpCode {
  460. public:
  461. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  462. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Save; }
  463. ALWAYS_INLINE size_t size() const override { return 1; }
  464. String const arguments_string() const override { return ""; }
  465. };
  466. class OpCode_Restore final : public OpCode {
  467. public:
  468. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  469. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Restore; }
  470. ALWAYS_INLINE size_t size() const override { return 1; }
  471. String const arguments_string() const override { return ""; }
  472. };
  473. class OpCode_GoBack final : public OpCode {
  474. public:
  475. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  476. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::GoBack; }
  477. ALWAYS_INLINE size_t size() const override { return 2; }
  478. ALWAYS_INLINE size_t count() const { return argument(0); }
  479. String const arguments_string() const override { return String::formatted("count={}", count()); }
  480. };
  481. class OpCode_Jump final : public OpCode {
  482. public:
  483. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  484. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Jump; }
  485. ALWAYS_INLINE size_t size() const override { return 2; }
  486. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  487. String const arguments_string() const override
  488. {
  489. return String::formatted("offset={} [&{}]", offset(), state().instruction_position + size() + offset());
  490. }
  491. };
  492. class OpCode_ForkJump final : public OpCode {
  493. public:
  494. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  495. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkJump; }
  496. ALWAYS_INLINE size_t size() const override { return 2; }
  497. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  498. String const arguments_string() const override
  499. {
  500. return String::formatted("offset={} [&{}], sp: {}", offset(), state().instruction_position + size() + offset(), state().string_position);
  501. }
  502. };
  503. class OpCode_ForkStay final : public OpCode {
  504. public:
  505. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  506. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkStay; }
  507. ALWAYS_INLINE size_t size() const override { return 2; }
  508. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  509. String const arguments_string() const override
  510. {
  511. return String::formatted("offset={} [&{}], sp: {}", offset(), state().instruction_position + size() + offset(), state().string_position);
  512. }
  513. };
  514. class OpCode_CheckBegin final : public OpCode {
  515. public:
  516. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  517. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBegin; }
  518. ALWAYS_INLINE size_t size() const override { return 1; }
  519. String const arguments_string() const override { return ""; }
  520. };
  521. class OpCode_CheckEnd final : public OpCode {
  522. public:
  523. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  524. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckEnd; }
  525. ALWAYS_INLINE size_t size() const override { return 1; }
  526. String const arguments_string() const override { return ""; }
  527. };
  528. class OpCode_CheckBoundary final : public OpCode {
  529. public:
  530. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  531. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBoundary; }
  532. ALWAYS_INLINE size_t size() const override { return 2; }
  533. ALWAYS_INLINE size_t arguments_count() const { return 1; }
  534. ALWAYS_INLINE BoundaryCheckType type() const { return static_cast<BoundaryCheckType>(argument(0)); }
  535. String const arguments_string() const override { return String::formatted("kind={} ({})", (long unsigned int)argument(0), boundary_check_type_name(type())); }
  536. };
  537. class OpCode_ClearCaptureGroup final : public OpCode {
  538. public:
  539. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  540. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearCaptureGroup; }
  541. ALWAYS_INLINE size_t size() const override { return 2; }
  542. ALWAYS_INLINE size_t id() const { return argument(0); }
  543. String const arguments_string() const override { return String::formatted("id={}", id()); }
  544. };
  545. class OpCode_ClearNamedCaptureGroup final : public OpCode {
  546. public:
  547. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  548. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearNamedCaptureGroup; }
  549. ALWAYS_INLINE size_t size() const override { return 3; }
  550. ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
  551. ALWAYS_INLINE size_t length() const { return argument(1); }
  552. String const arguments_string() const override
  553. {
  554. return String::formatted("name={}, length={}", name(), length());
  555. }
  556. };
  557. class OpCode_SaveLeftCaptureGroup final : public OpCode {
  558. public:
  559. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  560. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftCaptureGroup; }
  561. ALWAYS_INLINE size_t size() const override { return 2; }
  562. ALWAYS_INLINE size_t id() const { return argument(0); }
  563. String const arguments_string() const override { return String::formatted("id={}", id()); }
  564. };
  565. class OpCode_SaveRightCaptureGroup final : public OpCode {
  566. public:
  567. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  568. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightCaptureGroup; }
  569. ALWAYS_INLINE size_t size() const override { return 2; }
  570. ALWAYS_INLINE size_t id() const { return argument(0); }
  571. String const arguments_string() const override { return String::formatted("id={}", id()); }
  572. };
  573. class OpCode_SaveLeftNamedCaptureGroup final : public OpCode {
  574. public:
  575. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  576. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftNamedCaptureGroup; }
  577. ALWAYS_INLINE size_t size() const override { return 3; }
  578. ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
  579. ALWAYS_INLINE size_t length() const { return argument(1); }
  580. String const arguments_string() const override
  581. {
  582. return String::formatted("name={}, length={}", name(), length());
  583. }
  584. };
  585. class OpCode_SaveRightNamedCaptureGroup final : public OpCode {
  586. public:
  587. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  588. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; }
  589. ALWAYS_INLINE size_t size() const override { return 3; }
  590. ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
  591. ALWAYS_INLINE size_t length() const { return argument(1); }
  592. String const arguments_string() const override
  593. {
  594. return String::formatted("name={}, length={}", name(), length());
  595. }
  596. };
  597. class OpCode_Compare final : public OpCode {
  598. public:
  599. ExecutionResult execute(MatchInput const& input, MatchState& state, MatchOutput& output) const override;
  600. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Compare; }
  601. ALWAYS_INLINE size_t size() const override { return arguments_size() + 3; }
  602. ALWAYS_INLINE size_t arguments_count() const { return argument(0); }
  603. ALWAYS_INLINE size_t arguments_size() const { return argument(1); }
  604. String const arguments_string() const override;
  605. Vector<String> const variable_arguments_to_string(Optional<MatchInput> input = {}) const;
  606. private:
  607. ALWAYS_INLINE static void compare_char(MatchInput const& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
  608. ALWAYS_INLINE static bool compare_string(MatchInput const& input, MatchState& state, RegexStringView const& str, bool& had_zero_length_match);
  609. ALWAYS_INLINE static void compare_character_class(MatchInput const& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
  610. ALWAYS_INLINE static void compare_character_range(MatchInput const& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
  611. ALWAYS_INLINE static void compare_property(MatchInput const& input, MatchState& state, Unicode::Property property, bool inverse, bool& inverse_matched);
  612. };
  613. template<typename T>
  614. bool is(OpCode const&);
  615. template<typename T>
  616. ALWAYS_INLINE bool is(OpCode const&)
  617. {
  618. return false;
  619. }
  620. template<typename T>
  621. ALWAYS_INLINE bool is(OpCode const* opcode)
  622. {
  623. return is<T>(*opcode);
  624. }
  625. template<>
  626. ALWAYS_INLINE bool is<OpCode_ForkStay>(OpCode const& opcode)
  627. {
  628. return opcode.opcode_id() == OpCodeId::ForkStay;
  629. }
  630. template<>
  631. ALWAYS_INLINE bool is<OpCode_Exit>(OpCode const& opcode)
  632. {
  633. return opcode.opcode_id() == OpCodeId::Exit;
  634. }
  635. template<>
  636. ALWAYS_INLINE bool is<OpCode_Compare>(OpCode const& opcode)
  637. {
  638. return opcode.opcode_id() == OpCodeId::Compare;
  639. }
  640. template<typename T>
  641. ALWAYS_INLINE T const& to(OpCode const& opcode)
  642. {
  643. return verify_cast<T>(opcode);
  644. }
  645. template<typename T>
  646. ALWAYS_INLINE T* to(OpCode* opcode)
  647. {
  648. return verify_cast<T>(opcode);
  649. }
  650. template<typename T>
  651. ALWAYS_INLINE T const* to(OpCode const* opcode)
  652. {
  653. return verify_cast<T>(opcode);
  654. }
  655. template<typename T>
  656. ALWAYS_INLINE T& to(OpCode& opcode)
  657. {
  658. return verify_cast<T>(opcode);
  659. }
  660. }