RegexByteCode.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. /*
  2. * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #pragma once
  27. #include "RegexMatch.h"
  28. #include "RegexOptions.h"
  29. #include <AK/Forward.h>
  30. #include <AK/HashMap.h>
  31. #include <AK/NonnullOwnPtr.h>
  32. #include <AK/OwnPtr.h>
  33. #include <AK/Traits.h>
  34. #include <AK/Types.h>
  35. #include <AK/Vector.h>
  36. namespace regex {
  37. using ByteCodeValueType = u64;
  38. #define ENUMERATE_OPCODES \
  39. __ENUMERATE_OPCODE(Compare) \
  40. __ENUMERATE_OPCODE(Jump) \
  41. __ENUMERATE_OPCODE(ForkJump) \
  42. __ENUMERATE_OPCODE(ForkStay) \
  43. __ENUMERATE_OPCODE(SaveLeftCaptureGroup) \
  44. __ENUMERATE_OPCODE(SaveRightCaptureGroup) \
  45. __ENUMERATE_OPCODE(SaveLeftNamedCaptureGroup) \
  46. __ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \
  47. __ENUMERATE_OPCODE(CheckBegin) \
  48. __ENUMERATE_OPCODE(CheckEnd) \
  49. __ENUMERATE_OPCODE(Exit)
  50. enum class OpCodeId : ByteCodeValueType {
  51. #define __ENUMERATE_OPCODE(x) x,
  52. ENUMERATE_OPCODES
  53. #undef __ENUMERATE_OPCODE
  54. First
  55. = Compare,
  56. Last
  57. = Exit,
  58. };
  59. #define ENUMERATE_CHARACTER_COMPARE_TYPES \
  60. __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined) \
  61. __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse) \
  62. __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar) \
  63. __ENUMERATE_CHARACTER_COMPARE_TYPE(Char) \
  64. __ENUMERATE_CHARACTER_COMPARE_TYPE(String) \
  65. __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass) \
  66. __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange) \
  67. __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy)
  68. enum class CharacterCompareType : ByteCodeValueType {
  69. #define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x,
  70. ENUMERATE_CHARACTER_COMPARE_TYPES
  71. #undef __ENUMERATE_CHARACTER_COMPARE_TYPE
  72. };
  73. #define ENUMERATE_CHARACTER_CLASSES \
  74. __ENUMERATE_CHARACTER_CLASS(Alnum) \
  75. __ENUMERATE_CHARACTER_CLASS(Cntrl) \
  76. __ENUMERATE_CHARACTER_CLASS(Lower) \
  77. __ENUMERATE_CHARACTER_CLASS(Space) \
  78. __ENUMERATE_CHARACTER_CLASS(Alpha) \
  79. __ENUMERATE_CHARACTER_CLASS(Digit) \
  80. __ENUMERATE_CHARACTER_CLASS(Print) \
  81. __ENUMERATE_CHARACTER_CLASS(Upper) \
  82. __ENUMERATE_CHARACTER_CLASS(Blank) \
  83. __ENUMERATE_CHARACTER_CLASS(Graph) \
  84. __ENUMERATE_CHARACTER_CLASS(Punct) \
  85. __ENUMERATE_CHARACTER_CLASS(Xdigit)
  86. enum class CharClass : ByteCodeValueType {
  87. #define __ENUMERATE_CHARACTER_CLASS(x) x,
  88. ENUMERATE_CHARACTER_CLASSES
  89. #undef __ENUMERATE_CHARACTER_CLASS
  90. };
  91. struct CharRange {
  92. const u32 from;
  93. const u32 to;
  94. CharRange(u64 value)
  95. : from(value >> 32)
  96. , to(value & 0xffffffff)
  97. {
  98. }
  99. CharRange(u32 from, u32 to)
  100. : from(from)
  101. , to(to)
  102. {
  103. }
  104. operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
  105. };
  106. struct CompareTypeAndValuePair {
  107. CharacterCompareType type;
  108. ByteCodeValueType value;
  109. };
  110. class OpCode;
  111. class ByteCode : public Vector<ByteCodeValueType> {
  112. public:
  113. ByteCode() = default;
  114. virtual ~ByteCode() = default;
  115. void insert_bytecode_compare_values(Vector<CompareTypeAndValuePair>&& pairs)
  116. {
  117. ByteCode bytecode;
  118. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
  119. bytecode.empend(pairs.size()); // number of arguments
  120. ByteCode arguments;
  121. for (auto& value : pairs) {
  122. ASSERT(value.type != CharacterCompareType::RangeExpressionDummy);
  123. ASSERT(value.type != CharacterCompareType::Undefined);
  124. ASSERT(value.type != CharacterCompareType::String);
  125. arguments.append((ByteCodeValueType)value.type);
  126. if (value.type != CharacterCompareType::Inverse && value.type != CharacterCompareType::AnyChar)
  127. arguments.append(move(value.value));
  128. }
  129. bytecode.empend(arguments.size()); // size of arguments
  130. bytecode.append(move(arguments));
  131. append(move(bytecode));
  132. }
  133. void insert_bytecode_compare_string(StringView view, size_t length)
  134. {
  135. ByteCode bytecode;
  136. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
  137. bytecode.empend(1); // number of arguments
  138. ByteCode arguments;
  139. arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
  140. arguments.empend(reinterpret_cast<ByteCodeValueType>(view.characters_without_null_termination()));
  141. arguments.empend(length);
  142. bytecode.empend(arguments.size()); // size of arguments
  143. bytecode.append(move(arguments));
  144. append(move(bytecode));
  145. }
  146. void insert_bytecode_group_capture_left(size_t capture_groups_count)
  147. {
  148. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup));
  149. empend(capture_groups_count);
  150. }
  151. void insert_bytecode_group_capture_left(const StringView& name)
  152. {
  153. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftNamedCaptureGroup));
  154. empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  155. empend(name.length());
  156. }
  157. void insert_bytecode_group_capture_right(size_t capture_groups_count)
  158. {
  159. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup));
  160. empend(capture_groups_count);
  161. }
  162. void insert_bytecode_group_capture_right(const StringView& name)
  163. {
  164. empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
  165. empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
  166. empend(name.length());
  167. }
  168. void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
  169. {
  170. // FORKJUMP _ALT
  171. // REGEXP ALT1
  172. // JUMP _END
  173. // LABEL _ALT
  174. // REGEXP ALT2
  175. // LABEL _END
  176. ByteCode byte_code;
  177. empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  178. empend(left.size() + 2); // Jump to the _ALT label
  179. for (auto& op : left)
  180. append(move(op));
  181. empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
  182. empend(right.size()); // Jump to the _END label
  183. // LABEL _ALT = bytecode.size() + 2
  184. for (auto& op : right)
  185. append(move(op));
  186. // LABEL _END = alterantive_bytecode.size
  187. }
  188. void insert_bytecode_repetition_min_max(ByteCode& bytecode_to_repeat, size_t minimum, Optional<size_t> maximum)
  189. {
  190. ByteCode new_bytecode;
  191. new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum);
  192. if (maximum.has_value()) {
  193. if (maximum.value() > minimum) {
  194. auto diff = maximum.value() - minimum;
  195. new_bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  196. new_bytecode.empend(diff * (bytecode_to_repeat.size() + 2)); // Jump to the _END label
  197. for (size_t i = 0; i < diff; ++i) {
  198. new_bytecode.append(bytecode_to_repeat);
  199. new_bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  200. new_bytecode.empend((diff - i - 1) * (bytecode_to_repeat.size() + 2)); // Jump to the _END label
  201. }
  202. }
  203. } else {
  204. // no maximum value set, repeat finding if possible
  205. new_bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  206. new_bytecode.empend(-bytecode_to_repeat.size() - 2); // Jump to the last iteration
  207. }
  208. bytecode_to_repeat = move(new_bytecode);
  209. }
  210. void insert_bytecode_repetition_n(ByteCode& bytecode_to_repeat, size_t n)
  211. {
  212. for (size_t i = 0; i < n; ++i)
  213. append(bytecode_to_repeat);
  214. }
  215. void insert_bytecode_repetition_min_one(ByteCode& bytecode_to_repeat, bool greedy)
  216. {
  217. // LABEL _START = -bytecode_to_repeat.size()
  218. // REGEXP
  219. // FORKSTAY _START (FORKJUMP -> Greedy)
  220. if (greedy)
  221. bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  222. else
  223. bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  224. bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label
  225. }
  226. void insert_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
  227. {
  228. // LABEL _START
  229. // FORKJUMP _END (FORKSTAY -> Greedy)
  230. // REGEXP
  231. // JUMP _START
  232. // LABEL _END
  233. // LABEL _START = m_bytes.size();
  234. ByteCode bytecode;
  235. if (greedy)
  236. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  237. else
  238. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  239. bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label
  240. for (auto& op : bytecode_to_repeat)
  241. bytecode.append(move(op));
  242. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Jump));
  243. bytecode.empend(-bytecode.size() - 1); // Jump to the _START label
  244. // LABEL _END = bytecode.size()
  245. bytecode_to_repeat = move(bytecode);
  246. }
  247. void insert_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
  248. {
  249. // FORKJUMP _END (FORKSTAY -> Greedy)
  250. // REGEXP
  251. // LABEL _END
  252. ByteCode bytecode;
  253. if (greedy)
  254. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
  255. else
  256. bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
  257. bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
  258. for (auto& op : bytecode_to_repeat)
  259. bytecode.append(move(op));
  260. // LABEL _END = bytecode.size()
  261. bytecode_to_repeat = move(bytecode);
  262. }
  263. OpCode* get_opcode(MatchState& state) const;
  264. private:
  265. ALWAYS_INLINE OpCode* get_opcode_by_id(OpCodeId id) const;
  266. static HashMap<u32, OwnPtr<OpCode>> s_opcodes;
  267. };
  268. #define ENUMERATE_EXECUTION_RESULTS \
  269. __ENUMERATE_EXECUTION_RESULT(Continue) \
  270. __ENUMERATE_EXECUTION_RESULT(Fork_PrioHigh) \
  271. __ENUMERATE_EXECUTION_RESULT(Fork_PrioLow) \
  272. __ENUMERATE_EXECUTION_RESULT(Failed) \
  273. __ENUMERATE_EXECUTION_RESULT(Failed_ExecuteLowPrioForks) \
  274. __ENUMERATE_EXECUTION_RESULT(Succeeded)
  275. enum class ExecutionResult : u8 {
  276. #define __ENUMERATE_EXECUTION_RESULT(x) x,
  277. ENUMERATE_EXECUTION_RESULTS
  278. #undef __ENUMERATE_EXECUTION_RESULT
  279. };
  280. const char* execution_result_name(ExecutionResult result);
  281. const char* opcode_id_name(OpCodeId opcode_id);
  282. const char* character_compare_type_name(CharacterCompareType result);
  283. const char* execution_result_name(ExecutionResult result);
  284. class OpCode {
  285. public:
  286. OpCode(ByteCode& bytecode)
  287. : m_bytecode(&bytecode)
  288. {
  289. }
  290. virtual ~OpCode() = default;
  291. virtual OpCodeId opcode_id() const = 0;
  292. virtual size_t size() const = 0;
  293. virtual ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const = 0;
  294. ALWAYS_INLINE ByteCodeValueType argument(size_t offset) const
  295. {
  296. ASSERT(state().instruction_position + offset <= m_bytecode->size());
  297. return m_bytecode->at(state().instruction_position + 1 + offset);
  298. }
  299. ALWAYS_INLINE const char* name() const;
  300. static const char* name(const OpCodeId);
  301. ALWAYS_INLINE OpCode* set_state(MatchState& state)
  302. {
  303. m_state = &state;
  304. return this;
  305. }
  306. ALWAYS_INLINE OpCode* set_bytecode(ByteCode& bytecode)
  307. {
  308. m_bytecode = &bytecode;
  309. return this;
  310. }
  311. ALWAYS_INLINE void reset_state() { m_state.clear(); }
  312. ALWAYS_INLINE const MatchState& state() const
  313. {
  314. ASSERT(m_state.has_value());
  315. return *m_state.value();
  316. }
  317. const String to_string() const
  318. {
  319. return String::format("[0x%02X] %s", opcode_id(), name(opcode_id()));
  320. }
  321. virtual const String arguments_string() const = 0;
  322. ALWAYS_INLINE const ByteCode& bytecode() const { return *m_bytecode; }
  323. protected:
  324. ByteCode* m_bytecode;
  325. Optional<MatchState*> m_state;
  326. };
  327. class OpCode_Exit final : public OpCode {
  328. public:
  329. OpCode_Exit(ByteCode& bytecode)
  330. : OpCode(bytecode)
  331. {
  332. }
  333. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  334. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Exit; }
  335. ALWAYS_INLINE size_t size() const override { return 1; }
  336. const String arguments_string() const override { return ""; }
  337. };
  338. class OpCode_Jump final : public OpCode {
  339. public:
  340. OpCode_Jump(ByteCode& bytecode)
  341. : OpCode(bytecode)
  342. {
  343. }
  344. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  345. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Jump; }
  346. ALWAYS_INLINE size_t size() const override { return 2; }
  347. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  348. const String arguments_string() const override
  349. {
  350. return String::format("offset=%i [&%lu]", offset(), state().instruction_position + size() + offset());
  351. }
  352. };
  353. class OpCode_ForkJump final : public OpCode {
  354. public:
  355. OpCode_ForkJump(ByteCode& bytecode)
  356. : OpCode(bytecode)
  357. {
  358. }
  359. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  360. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkJump; }
  361. ALWAYS_INLINE size_t size() const override { return 2; }
  362. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  363. const String arguments_string() const override
  364. {
  365. return String::format("offset=%i [&%lu], sp: %lu", offset(), state().instruction_position + size() + offset(), state().string_position);
  366. }
  367. };
  368. class OpCode_ForkStay final : public OpCode {
  369. public:
  370. OpCode_ForkStay(ByteCode& bytecode)
  371. : OpCode(bytecode)
  372. {
  373. }
  374. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  375. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkStay; }
  376. ALWAYS_INLINE size_t size() const override { return 2; }
  377. ALWAYS_INLINE ssize_t offset() const { return argument(0); }
  378. const String arguments_string() const override
  379. {
  380. return String::format("offset=%i [&%lu], sp: %lu", offset(), state().instruction_position + size() + offset(), state().string_position);
  381. }
  382. };
  383. class OpCode_CheckBegin final : public OpCode {
  384. public:
  385. OpCode_CheckBegin(ByteCode& bytecode)
  386. : OpCode(bytecode)
  387. {
  388. }
  389. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  390. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBegin; }
  391. ALWAYS_INLINE size_t size() const override { return 1; }
  392. const String arguments_string() const override { return ""; }
  393. };
  394. class OpCode_CheckEnd final : public OpCode {
  395. public:
  396. OpCode_CheckEnd(ByteCode& bytecode)
  397. : OpCode(bytecode)
  398. {
  399. }
  400. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  401. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckEnd; }
  402. ALWAYS_INLINE size_t size() const override { return 1; }
  403. const String arguments_string() const override { return ""; }
  404. };
  405. class OpCode_SaveLeftCaptureGroup final : public OpCode {
  406. public:
  407. OpCode_SaveLeftCaptureGroup(ByteCode& bytecode)
  408. : OpCode(bytecode)
  409. {
  410. }
  411. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  412. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftCaptureGroup; }
  413. ALWAYS_INLINE size_t size() const override { return 2; }
  414. ALWAYS_INLINE size_t id() const { return argument(0); }
  415. const String arguments_string() const override { return String::format("id=%lu", id()); }
  416. };
  417. class OpCode_SaveRightCaptureGroup final : public OpCode {
  418. public:
  419. OpCode_SaveRightCaptureGroup(ByteCode& bytecode)
  420. : OpCode(bytecode)
  421. {
  422. }
  423. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  424. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightCaptureGroup; }
  425. ALWAYS_INLINE size_t size() const override { return 2; }
  426. ALWAYS_INLINE size_t id() const { return argument(0); }
  427. const String arguments_string() const override { return String::format("id=%lu", id()); }
  428. };
  429. class OpCode_SaveLeftNamedCaptureGroup final : public OpCode {
  430. public:
  431. OpCode_SaveLeftNamedCaptureGroup(ByteCode& bytecode)
  432. : OpCode(bytecode)
  433. {
  434. }
  435. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  436. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftNamedCaptureGroup; }
  437. ALWAYS_INLINE size_t size() const override { return 3; }
  438. ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
  439. ALWAYS_INLINE size_t length() const { return argument(1); }
  440. const String arguments_string() const override
  441. {
  442. return String::format("name=%s, length=%lu", name().to_string().characters(), length());
  443. }
  444. };
  445. class OpCode_SaveRightNamedCaptureGroup final : public OpCode {
  446. public:
  447. OpCode_SaveRightNamedCaptureGroup(ByteCode& bytecode)
  448. : OpCode(bytecode)
  449. {
  450. }
  451. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  452. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; }
  453. ALWAYS_INLINE size_t size() const override { return 3; }
  454. ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
  455. ALWAYS_INLINE size_t length() const { return argument(1); }
  456. const String arguments_string() const override
  457. {
  458. return String::format("name=%s, length=%lu", name().to_string().characters(), length());
  459. }
  460. };
  461. class OpCode_Compare final : public OpCode {
  462. public:
  463. OpCode_Compare(ByteCode& bytecode)
  464. : OpCode(bytecode)
  465. {
  466. }
  467. ExecutionResult execute(const MatchInput& input, MatchState& state, MatchOutput& output) const override;
  468. ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Compare; }
  469. ALWAYS_INLINE size_t size() const override { return arguments_size() + 3; }
  470. ALWAYS_INLINE size_t arguments_count() const { return argument(0); }
  471. ALWAYS_INLINE size_t arguments_size() const { return argument(1); }
  472. const String arguments_string() const override;
  473. const Vector<String> variable_arguments_to_string(Optional<MatchInput> input = {}) const;
  474. private:
  475. ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
  476. ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, const char* str, size_t length);
  477. ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
  478. ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
  479. };
  480. template<typename T>
  481. bool is(const OpCode&);
  482. template<typename T>
  483. ALWAYS_INLINE bool is(const OpCode&)
  484. {
  485. return false;
  486. }
  487. template<typename T>
  488. ALWAYS_INLINE bool is(const OpCode* opcode)
  489. {
  490. return is<T>(*opcode);
  491. }
  492. template<>
  493. ALWAYS_INLINE bool is<OpCode_ForkStay>(const OpCode& opcode)
  494. {
  495. return opcode.opcode_id() == OpCodeId::ForkStay;
  496. }
  497. template<>
  498. ALWAYS_INLINE bool is<OpCode_Exit>(const OpCode& opcode)
  499. {
  500. return opcode.opcode_id() == OpCodeId::Exit;
  501. }
  502. template<>
  503. ALWAYS_INLINE bool is<OpCode_Compare>(const OpCode& opcode)
  504. {
  505. return opcode.opcode_id() == OpCodeId::Compare;
  506. }
  507. template<typename T>
  508. ALWAYS_INLINE const T& to(const OpCode& opcode)
  509. {
  510. ASSERT(is<T>(opcode));
  511. return static_cast<const T&>(opcode);
  512. }
  513. template<typename T>
  514. ALWAYS_INLINE T* to(OpCode* opcode)
  515. {
  516. ASSERT(is<T>(opcode));
  517. return static_cast<T*>(opcode);
  518. }
  519. template<typename T>
  520. ALWAYS_INLINE const T* to(const OpCode* opcode)
  521. {
  522. ASSERT(is<T>(opcode));
  523. return static_cast<const T*>(opcode);
  524. }
  525. template<typename T>
  526. ALWAYS_INLINE T& to(OpCode& opcode)
  527. {
  528. ASSERT(is<T>(opcode));
  529. return static_cast<T&>(opcode);
  530. }
  531. }