Instruction.h 35 KB


  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/DeprecatedString.h>
  9. #include <AK/Optional.h>
  10. #include <AK/StdLibExtras.h>
  11. #include <AK/Types.h>
  12. #include <stdio.h>
  13. namespace X86 {
  14. class Instruction;
  15. class Interpreter;
  16. typedef void (Interpreter::*InstructionHandler)(Instruction const&);
  17. class SymbolProvider {
  18. public:
  19. virtual DeprecatedString symbolicate(FlatPtr, u32* offset = nullptr) const = 0;
  20. protected:
  21. virtual ~SymbolProvider() = default;
  22. };
  23. template<typename T>
  24. struct TypeTrivia {
  25. static constexpr size_t bits = sizeof(T) * 8;
  26. static constexpr T sign_bit = 1 << (bits - 1);
  27. static constexpr T mask = MakeUnsigned<T>(-1);
  28. };
  29. template<typename T, typename U>
  30. constexpr T sign_extended_to(U value)
  31. {
  32. if (!(value & TypeTrivia<U>::sign_bit))
  33. return value;
  34. return (TypeTrivia<T>::mask & ~TypeTrivia<U>::mask) | value;
  35. }
  36. enum class OperandSize : u8 {
  37. Size16,
  38. Size32,
  39. Size64,
  40. };
  41. enum class AddressSize : u8 {
  42. Size16,
  43. Size32,
  44. Size64,
  45. };
  46. enum class ProcessorMode : u8 {
  47. Protected,
  48. Long,
  49. };
  50. enum IsLockPrefixAllowed {
  51. LockPrefixNotAllowed = 0,
  52. LockPrefixAllowed
  53. };
  54. enum InstructionFormat {
  55. InvalidFormat,
  56. MultibyteWithSlash,
  57. InstructionPrefix,
  58. __BeginFormatsWithRMByte,
  59. OP_RM16_reg16,
  60. OP_reg8_RM8,
  61. OP_reg16_RM16,
  62. OP_RM16_seg,
  63. OP_RM32_seg,
  64. OP_RM8_imm8,
  65. OP_RM16_imm16,
  66. OP_RM16_imm8,
  67. OP_RM32_imm8,
  68. OP_RM8,
  69. OP_RM16,
  70. OP_RM32,
  71. OP_FPU,
  72. OP_FPU_reg,
  73. OP_FPU_mem,
  74. OP_FPU_AX16,
  75. OP_FPU_RM16,
  76. OP_FPU_RM32,
  77. OP_FPU_RM64,
  78. OP_FPU_M80,
  79. OP_RM8_reg8,
  80. OP_RM32_reg32,
  81. OP_reg32_RM32,
  82. OP_RM32_imm32,
  83. OP_reg16_RM16_imm8,
  84. OP_reg32_RM32_imm8,
  85. OP_reg16_RM16_imm16,
  86. OP_reg32_RM32_imm32,
  87. OP_reg16_mem16,
  88. OP_reg32_mem32,
  89. OP_seg_RM16,
  90. OP_seg_RM32,
  91. OP_RM8_1,
  92. OP_RM16_1,
  93. OP_RM32_1,
  94. OP_FAR_mem16,
  95. OP_FAR_mem32,
  96. OP_RM8_CL,
  97. OP_RM16_CL,
  98. OP_RM32_CL,
  99. OP_reg32_CR,
  100. OP_CR_reg32,
  101. OP_reg32_DR,
  102. OP_DR_reg32,
  103. OP_reg16_RM8,
  104. OP_reg32_RM8,
  105. OP_reg32_RM16,
  106. OP_RM16_reg16_imm8,
  107. OP_RM32_reg32_imm8,
  108. OP_RM16_reg16_CL,
  109. OP_RM32_reg32_CL,
  110. OP_reg,
  111. OP_m64,
  112. // SSE instructions mutate on some prefixes, so we have to mark them
  113. // for further parsing
  114. __SSE,
  115. OP_mm1_rm32,
  116. OP_rm32_mm2,
  117. OP_mm1_mm2m64,
  118. OP_mm1_mm2m32,
  119. OP_mm1_mm2m64_imm8,
  120. OP_mm1_imm8,
  121. OP_mm1m64_mm2,
  122. OP_reg_mm1,
  123. OP_reg_mm1_imm8,
  124. OP_mm1_r32m16_imm8,
  125. OP_xmm1_imm8,
  126. OP_xmm1_xmm2m32,
  127. OP_xmm1_xmm2m64,
  128. OP_xmm1_xmm2m128,
  129. OP_xmm1_xmm2m32_imm8,
  130. OP_xmm1_xmm2m128_imm8,
  131. OP_xmm1m32_xmm2,
  132. OP_xmm1m64_xmm2,
  133. OP_xmm1m128_xmm2,
  134. OP_reg_xmm1,
  135. OP_reg_xmm1_imm8,
  136. OP_r32_xmm2m32,
  137. OP_r32_xmm2m64,
  138. OP_rm32_xmm2,
  139. OP_xmm1_rm32,
  140. OP_xmm1_m64,
  141. OP_m64_xmm2,
  142. OP_rm8_xmm2m32,
  143. OP_xmm_mm,
  144. OP_xmm1_mm2m64,
  145. OP_mm1m64_xmm2,
  146. OP_mm_xmm,
  147. OP_mm1_xmm2m64,
  148. OP_mm1_xmm2m128,
  149. OP_xmm1_r32m16_imm8,
  150. __EndFormatsWithRMByte,
  151. OP_reg32_imm32,
  152. OP_regW_immW,
  153. OP_AL_imm8,
  154. OP_AX_imm16,
  155. OP_EAX_imm32,
  156. OP_CS,
  157. OP_DS,
  158. OP_ES,
  159. OP_SS,
  160. OP_FS,
  161. OP_GS,
  162. OP,
  163. OP_reg16,
  164. OP_imm16,
  165. OP_relimm16,
  166. OP_relimm32,
  167. OP_imm8,
  168. OP_imm16_imm16,
  169. OP_imm16_imm32,
  170. OP_AX_reg16,
  171. OP_EAX_reg32,
  172. OP_AL_moff8,
  173. OP_AX_moff16,
  174. OP_EAX_moff32,
  175. OP_moff8_AL,
  176. OP_moff16_AX,
  177. OP_moff32_EAX,
  178. OP_reg8_imm8,
  179. OP_reg16_imm16,
  180. OP_3,
  181. OP_AX_imm8,
  182. OP_EAX_imm8,
  183. OP_short_imm8,
  184. OP_AL_DX,
  185. OP_AX_DX,
  186. OP_EAX_DX,
  187. OP_DX_AL,
  188. OP_DX_AX,
  189. OP_DX_EAX,
  190. OP_imm8_AL,
  191. OP_imm8_AX,
  192. OP_imm8_EAX,
  193. OP_reg8_CL,
  194. OP_reg32,
  195. OP_imm32,
  196. OP_imm16_imm8,
  197. OP_NEAR_imm,
  198. };
  199. static constexpr unsigned CurrentAddressSize = 0xB33FBABE;
  200. static constexpr unsigned CurrentOperandSize = 0xB33FB00F;
  201. struct InstructionDescriptor {
  202. InstructionHandler handler { nullptr };
  203. bool opcode_has_register_index { false };
  204. char const* mnemonic { nullptr };
  205. InstructionFormat format { InvalidFormat };
  206. bool has_rm { false };
  207. unsigned imm1_bytes { 0 };
  208. unsigned imm2_bytes { 0 };
  209. bool long_mode_default_64 { false };
  210. bool long_mode_force_64 { false };
  211. // Addressed by the 3 REG bits in the MOD-REG-R/M byte.
  212. // Some slash instructions have further subgroups when MOD is 11,
  213. // in that case the InstructionDescriptors in slashes have themselves
  214. // a non-null slashes member that's indexed by the three R/M bits.
  215. InstructionDescriptor* slashes { nullptr };
  216. unsigned imm1_bytes_for(AddressSize address_size, OperandSize operand_size) const
  217. {
  218. if (imm1_bytes == CurrentAddressSize) {
  219. switch (address_size) {
  220. case AddressSize::Size64:
  221. return 8;
  222. case AddressSize::Size32:
  223. return 4;
  224. case AddressSize::Size16:
  225. return 2;
  226. }
  227. VERIFY_NOT_REACHED();
  228. }
  229. if (imm1_bytes == CurrentOperandSize) {
  230. switch (operand_size) {
  231. case OperandSize::Size64:
  232. return 8;
  233. case OperandSize::Size32:
  234. return 4;
  235. case OperandSize::Size16:
  236. return 2;
  237. }
  238. VERIFY_NOT_REACHED();
  239. }
  240. return imm1_bytes;
  241. }
  242. unsigned imm2_bytes_for(AddressSize address_size, OperandSize operand_size) const
  243. {
  244. if (imm2_bytes == CurrentAddressSize) {
  245. switch (address_size) {
  246. case AddressSize::Size64:
  247. return 8;
  248. case AddressSize::Size32:
  249. return 4;
  250. case AddressSize::Size16:
  251. return 2;
  252. }
  253. VERIFY_NOT_REACHED();
  254. }
  255. if (imm2_bytes == CurrentOperandSize) {
  256. switch (operand_size) {
  257. case OperandSize::Size64:
  258. return 8;
  259. case OperandSize::Size32:
  260. return 4;
  261. case OperandSize::Size16:
  262. return 2;
  263. }
  264. VERIFY_NOT_REACHED();
  265. }
  266. return imm2_bytes;
  267. }
  268. IsLockPrefixAllowed lock_prefix_allowed { LockPrefixNotAllowed };
  269. };
  270. extern InstructionDescriptor s_table[3][256];
  271. extern InstructionDescriptor s_0f_table[3][256];
  272. extern InstructionDescriptor s_sse_table_np[256];
  273. extern InstructionDescriptor s_sse_table_66[256];
  274. extern InstructionDescriptor s_sse_table_f3[256];
  275. struct Prefix {
  276. enum Op {
  277. REX_Mask = 0xf0,
  278. REX_Base = 0x40,
  279. OperandSizeOverride = 0x66,
  280. AddressSizeOverride = 0x67,
  281. REP = 0xf3,
  282. REPZ = 0xf3,
  283. REPNZ = 0xf2,
  284. LOCK = 0xf0,
  285. };
  286. };
  287. enum class SegmentRegister {
  288. ES = 0,
  289. CS,
  290. SS,
  291. DS,
  292. FS,
  293. GS,
  294. SegR6,
  295. SegR7,
  296. };
  297. enum RegisterIndex8 {
  298. RegisterAL = 0,
  299. RegisterCL,
  300. RegisterDL,
  301. RegisterBL,
  302. RegisterAH,
  303. RegisterCH,
  304. RegisterDH,
  305. RegisterBH,
  306. RegisterR8B,
  307. RegisterR9B,
  308. RegisterR10B,
  309. RegisterR11B,
  310. RegisterR12B,
  311. RegisterR13B,
  312. RegisterR14B,
  313. RegisterR15B,
  314. };
  315. enum RegisterIndex16 {
  316. RegisterAX = 0,
  317. RegisterCX,
  318. RegisterDX,
  319. RegisterBX,
  320. RegisterSP,
  321. RegisterBP,
  322. RegisterSI,
  323. RegisterDI,
  324. RegisterR8W,
  325. RegisterR9W,
  326. RegisterR10W,
  327. RegisterR11W,
  328. RegisterR12W,
  329. RegisterR13W,
  330. RegisterR14W,
  331. RegisterR15W,
  332. };
  333. enum RegisterIndex32 {
  334. RegisterEAX = 0,
  335. RegisterECX,
  336. RegisterEDX,
  337. RegisterEBX,
  338. RegisterESP,
  339. RegisterEBP,
  340. RegisterESI,
  341. RegisterEDI,
  342. RegisterR8D,
  343. RegisterR9D,
  344. RegisterR10D,
  345. RegisterR11D,
  346. RegisterR12D,
  347. RegisterR13D,
  348. RegisterR14D,
  349. RegisterR15D,
  350. };
  351. enum RegisterIndex64 {
  352. RegisterRAX = 0,
  353. RegisterRCX,
  354. RegisterRDX,
  355. RegisterRBX,
  356. RegisterRSP,
  357. RegisterRBP,
  358. RegisterRSI,
  359. RegisterRDI,
  360. RegisterR8,
  361. RegisterR9,
  362. RegisterR10,
  363. RegisterR11,
  364. RegisterR12,
  365. RegisterR13,
  366. RegisterR14,
  367. RegisterR15,
  368. };
  369. enum FpuRegisterIndex {
  370. ST0 = 0,
  371. ST1,
  372. ST2,
  373. ST3,
  374. ST4,
  375. ST5,
  376. ST6,
  377. ST7
  378. };
  379. enum MMXRegisterIndex {
  380. RegisterMM0 = 0,
  381. RegisterMM1,
  382. RegisterMM2,
  383. RegisterMM3,
  384. RegisterMM4,
  385. RegisterMM5,
  386. RegisterMM6,
  387. RegisterMM7
  388. };
  389. enum XMMRegisterIndex {
  390. RegisterXMM0 = 0,
  391. RegisterXMM1,
  392. RegisterXMM2,
  393. RegisterXMM3,
  394. RegisterXMM4,
  395. RegisterXMM5,
  396. RegisterXMM6,
  397. RegisterXMM7,
  398. RegisterXMM8,
  399. RegisterXMM9,
  400. RegisterXMM10,
  401. RegisterXMM11,
  402. RegisterXMM12,
  403. RegisterXMM13,
  404. RegisterXMM14,
  405. RegisterXMM15,
  406. };
  407. class LogicalAddress {
  408. public:
  409. LogicalAddress() = default;
  410. LogicalAddress(u16 selector, FlatPtr offset)
  411. : m_selector(selector)
  412. , m_offset(offset)
  413. {
  414. }
  415. u16 selector() const { return m_selector; }
  416. FlatPtr offset() const { return m_offset; }
  417. void set_selector(u16 selector) { m_selector = selector; }
  418. void set_offset(FlatPtr offset) { m_offset = offset; }
  419. private:
  420. u16 m_selector { 0 };
  421. FlatPtr m_offset { 0 };
  422. };
  423. class InstructionStream {
  424. public:
  425. virtual bool can_read() = 0;
  426. virtual u8 read8() = 0;
  427. virtual u16 read16() = 0;
  428. virtual u32 read32() = 0;
  429. virtual u64 read64() = 0;
  430. protected:
  431. virtual ~InstructionStream() = default;
  432. };
  433. class SimpleInstructionStream final : public InstructionStream {
  434. public:
  435. SimpleInstructionStream(u8 const* data, size_t size)
  436. : m_data(data)
  437. , m_size(size)
  438. {
  439. }
  440. virtual bool can_read() override { return m_offset < m_size; }
  441. virtual u8 read8() override
  442. {
  443. if (!can_read())
  444. return 0;
  445. return m_data[m_offset++];
  446. }
  447. virtual u16 read16() override
  448. {
  449. u8 lsb = read8();
  450. u8 msb = read8();
  451. return ((u16)msb << 8) | (u16)lsb;
  452. }
  453. virtual u32 read32() override
  454. {
  455. u16 lsw = read16();
  456. u16 msw = read16();
  457. return ((u32)msw << 16) | (u32)lsw;
  458. }
  459. virtual u64 read64() override
  460. {
  461. u32 lsw = read32();
  462. u32 msw = read32();
  463. return ((u64)msw << 32) | (u64)lsw;
  464. }
  465. size_t offset() const { return m_offset; }
  466. private:
  467. u8 const* m_data { nullptr };
  468. size_t m_offset { 0 };
  469. size_t m_size { 0 };
  470. };
  471. class MemoryOrRegisterReference {
  472. friend class Instruction;
  473. public:
  474. DeprecatedString to_deprecated_string_o8(Instruction const&) const;
  475. DeprecatedString to_deprecated_string_o16(Instruction const&) const;
  476. DeprecatedString to_deprecated_string_o32(Instruction const&) const;
  477. DeprecatedString to_deprecated_string_o64(Instruction const&) const;
  478. DeprecatedString to_deprecated_string_fpu_reg() const;
  479. DeprecatedString to_deprecated_string_fpu_mem(Instruction const&) const;
  480. DeprecatedString to_deprecated_string_fpu_ax16() const;
  481. DeprecatedString to_deprecated_string_fpu16(Instruction const&) const;
  482. DeprecatedString to_deprecated_string_fpu32(Instruction const&) const;
  483. DeprecatedString to_deprecated_string_fpu64(Instruction const&) const;
  484. DeprecatedString to_deprecated_string_fpu80(Instruction const&) const;
  485. DeprecatedString to_deprecated_string_mm(Instruction const&) const;
  486. DeprecatedString to_deprecated_string_xmm(Instruction const&) const;
  487. DeprecatedString sib_to_deprecated_string(ProcessorMode) const;
  488. bool is_register() const { return m_register_index != 0x7f; }
  489. unsigned register_index() const { return m_register_index; }
  490. RegisterIndex64 reg64() const { return static_cast<RegisterIndex64>(register_index()); }
  491. RegisterIndex32 reg32() const { return static_cast<RegisterIndex32>(register_index()); }
  492. RegisterIndex16 reg16() const { return static_cast<RegisterIndex16>(register_index()); }
  493. RegisterIndex8 reg8() const { return static_cast<RegisterIndex8>(register_index()); }
  494. FpuRegisterIndex reg_fpu() const { return static_cast<FpuRegisterIndex>(register_index()); }
  495. // helpers to get the parts by name as in the spec
  496. u8 mod() const { return m_mod; }
  497. u8 reg() const { return m_reg; }
  498. u8 rm() const { return m_rm; }
  499. u8 modrm_byte() const { return (m_mod << 6) | ((m_reg & 7) << 3) | (m_rm & 7); }
  500. template<typename CPU, typename T>
  501. void write8(CPU&, Instruction const&, T);
  502. template<typename CPU, typename T>
  503. void write16(CPU&, Instruction const&, T);
  504. template<typename CPU, typename T>
  505. void write32(CPU&, Instruction const&, T);
  506. template<typename CPU, typename T>
  507. void write64(CPU&, Instruction const&, T);
  508. template<typename CPU, typename T>
  509. void write128(CPU&, Instruction const&, T);
  510. template<typename CPU, typename T>
  511. void write256(CPU&, Instruction const&, T);
  512. template<typename CPU>
  513. typename CPU::ValueWithShadowType8 read8(CPU&, Instruction const&);
  514. template<typename CPU>
  515. typename CPU::ValueWithShadowType16 read16(CPU&, Instruction const&);
  516. template<typename CPU>
  517. typename CPU::ValueWithShadowType32 read32(CPU&, Instruction const&);
  518. template<typename CPU>
  519. typename CPU::ValueWithShadowType64 read64(CPU&, Instruction const&);
  520. template<typename CPU>
  521. typename CPU::ValueWithShadowType128 read128(CPU&, Instruction const&);
  522. template<typename CPU>
  523. typename CPU::ValueWithShadowType256 read256(CPU&, Instruction const&);
  524. template<typename CPU>
  525. LogicalAddress resolve(const CPU&, Instruction const&);
  526. private:
  527. MemoryOrRegisterReference() = default;
  528. DeprecatedString to_deprecated_string(Instruction const&) const;
  529. DeprecatedString to_deprecated_string_a16() const;
  530. DeprecatedString to_deprecated_string_a32() const;
  531. DeprecatedString to_deprecated_string_a64() const;
  532. template<typename InstructionStreamType>
  533. void decode(InstructionStreamType&, AddressSize, bool has_rex_r, bool has_rex_x, bool has_rex_b);
  534. template<typename InstructionStreamType>
  535. void decode16(InstructionStreamType&);
  536. template<typename InstructionStreamType>
  537. void decode32(InstructionStreamType&, bool has_rex_r, bool has_rex_x, bool has_rex_b);
  538. template<typename CPU>
  539. LogicalAddress resolve16(const CPU&, Optional<SegmentRegister>);
  540. template<typename CPU>
  541. LogicalAddress resolve32(const CPU&, Optional<SegmentRegister>);
  542. template<typename CPU>
  543. u32 evaluate_sib(const CPU&, SegmentRegister& default_segment) const;
  544. union {
  545. u32 m_displacement32 { 0 };
  546. u16 m_displacement16;
  547. };
  548. u8 m_mod : 2 { 0 };
  549. u8 m_reg : 4 { 0 };
  550. u8 m_rm : 4 { 0 };
  551. u8 m_sib_scale : 2 { 0 };
  552. u8 m_sib_index : 4 { 0 };
  553. u8 m_sib_base : 4 { 0 };
  554. u8 m_displacement_bytes { 0 };
  555. u8 m_register_index : 7 { 0x7f };
  556. bool m_has_sib : 1 { false };
  557. };
  558. class Instruction {
  559. public:
  560. template<typename InstructionStreamType>
  561. static Instruction from_stream(InstructionStreamType&, ProcessorMode);
  562. ~Instruction() = default;
  563. ALWAYS_INLINE MemoryOrRegisterReference& modrm() const { return m_modrm; }
  564. ALWAYS_INLINE InstructionHandler handler() const { return m_descriptor->handler; }
  565. bool has_segment_prefix() const { return m_segment_prefix != 0xff; }
  566. ALWAYS_INLINE Optional<SegmentRegister> segment_prefix() const
  567. {
  568. if (has_segment_prefix())
  569. return static_cast<SegmentRegister>(m_segment_prefix);
  570. return {};
  571. }
  572. bool has_address_size_override_prefix() const { return m_has_address_size_override_prefix; }
  573. bool has_operand_size_override_prefix() const { return m_has_operand_size_override_prefix; }
  574. bool has_lock_prefix() const { return m_has_lock_prefix; }
  575. bool has_rep_prefix() const { return m_rep_prefix; }
  576. u8 rep_prefix() const { return m_rep_prefix; }
  577. bool is_valid() const { return m_descriptor; }
  578. unsigned length() const;
  579. DeprecatedString mnemonic() const;
  580. u8 op() const { return m_op; }
  581. u8 modrm_byte() const { return m_modrm.modrm_byte(); }
  582. u8 slash() const { return m_modrm.reg() & 7; }
  583. u8 imm8() const { return m_imm1; }
  584. u16 imm16() const { return m_imm1; }
  585. u32 imm32() const { return m_imm1; }
  586. u64 imm64() const { return m_imm1; }
  587. u8 imm8_1() const { return imm8(); }
  588. u8 imm8_2() const { return m_imm2; }
  589. u16 imm16_1() const { return imm16(); }
  590. u16 imm16_2() const { return m_imm2; }
  591. u32 imm32_1() const { return imm32(); }
  592. u32 imm32_2() const { return m_imm2; }
  593. u64 imm64_1() const { return imm64(); }
  594. u64 imm64_2() const { return m_imm2; }
  595. u32 imm_address() const
  596. {
  597. switch (m_address_size) {
  598. case AddressSize::Size64:
  599. return imm64();
  600. case AddressSize::Size32:
  601. return imm32();
  602. case AddressSize::Size16:
  603. return imm16();
  604. }
  605. VERIFY_NOT_REACHED();
  606. }
  607. LogicalAddress imm_address16_16() const { return LogicalAddress(imm16_1(), imm16_2()); }
  608. LogicalAddress imm_address16_32() const { return LogicalAddress(imm16_1(), imm32_2()); }
  609. bool has_sub_op() const
  610. {
  611. return m_op == 0x0f;
  612. }
  613. unsigned register_index() const { return m_register_index; }
  614. RegisterIndex32 reg32() const { return static_cast<RegisterIndex32>(register_index()); }
  615. RegisterIndex16 reg16() const { return static_cast<RegisterIndex16>(register_index()); }
  616. RegisterIndex8 reg8() const { return static_cast<RegisterIndex8>(register_index()); }
  617. SegmentRegister segment_register() const { return static_cast<SegmentRegister>(register_index()); }
  618. u8 cc() const { return has_sub_op() ? m_sub_op & 0xf : m_op & 0xf; }
  619. AddressSize address_size() const { return m_address_size; }
  620. OperandSize operand_size() const { return m_operand_size; }
  621. ProcessorMode mode() const { return m_mode; }
  622. DeprecatedString to_deprecated_string(u32 origin, SymbolProvider const* = nullptr, bool x32 = true) const;
  623. private:
  624. template<typename InstructionStreamType>
  625. Instruction(InstructionStreamType&, ProcessorMode);
  626. void to_deprecated_string_internal(StringBuilder&, u32 origin, SymbolProvider const*, bool x32) const;
  627. StringView reg8_name() const;
  628. StringView reg16_name() const;
  629. StringView reg32_name() const;
  630. StringView reg64_name() const;
  631. InstructionDescriptor* m_descriptor { nullptr };
  632. mutable MemoryOrRegisterReference m_modrm;
  633. u64 m_imm1 { 0 };
  634. u64 m_imm2 { 0 };
  635. u8 m_segment_prefix { 0xff };
  636. u8 m_register_index { 0xff };
  637. u8 m_op { 0 };
  638. u8 m_sub_op { 0 };
  639. u8 m_extra_bytes { 0 };
  640. u8 m_rep_prefix { 0 };
  641. OperandSize m_operand_size { OperandSize::Size16 };
  642. AddressSize m_address_size { AddressSize::Size16 };
  643. ProcessorMode m_mode { ProcessorMode::Protected };
  644. bool m_has_lock_prefix : 1 { false };
  645. bool m_has_operand_size_override_prefix : 1 { false };
  646. bool m_has_address_size_override_prefix : 1 { false };
  647. bool m_has_rex_w : 1 { false };
  648. bool m_has_rex_r : 1 { false };
  649. bool m_has_rex_x : 1 { false };
  650. bool m_has_rex_b : 1 { false };
  651. };
  652. template<typename CPU>
  653. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve16(const CPU& cpu, Optional<SegmentRegister> segment_prefix)
  654. {
  655. auto default_segment = SegmentRegister::DS;
  656. u16 offset = 0;
  657. switch (rm()) {
  658. case 0:
  659. offset = cpu.bx().value() + cpu.si().value() + m_displacement16;
  660. break;
  661. case 1:
  662. offset = cpu.bx().value() + cpu.di().value() + m_displacement16;
  663. break;
  664. case 2:
  665. default_segment = SegmentRegister::SS;
  666. offset = cpu.bp().value() + cpu.si().value() + m_displacement16;
  667. break;
  668. case 3:
  669. default_segment = SegmentRegister::SS;
  670. offset = cpu.bp().value() + cpu.di().value() + m_displacement16;
  671. break;
  672. case 4:
  673. offset = cpu.si().value() + m_displacement16;
  674. break;
  675. case 5:
  676. offset = cpu.di().value() + m_displacement16;
  677. break;
  678. case 6:
  679. if (mod() == 0)
  680. offset = m_displacement16;
  681. else {
  682. default_segment = SegmentRegister::SS;
  683. offset = cpu.bp().value() + m_displacement16;
  684. }
  685. break;
  686. default:
  687. offset = cpu.bx().value() + m_displacement16;
  688. break;
  689. }
  690. u16 segment = cpu.segment(segment_prefix.value_or(default_segment));
  691. return { segment, offset };
  692. }
  693. template<typename CPU>
  694. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve32(const CPU& cpu, Optional<SegmentRegister> segment_prefix)
  695. {
  696. auto default_segment = SegmentRegister::DS;
  697. u32 offset = 0;
  698. switch (rm()) {
  699. case 0 ... 3:
  700. case 6 ... 7:
  701. offset = cpu.const_gpr32((RegisterIndex32)(rm())).value() + m_displacement32;
  702. break;
  703. case 4:
  704. offset = evaluate_sib(cpu, default_segment);
  705. break;
  706. default: // 5
  707. if (mod() == 0) {
  708. offset = m_displacement32;
  709. break;
  710. } else {
  711. default_segment = SegmentRegister::SS;
  712. offset = cpu.ebp().value() + m_displacement32;
  713. break;
  714. }
  715. break;
  716. }
  717. u16 segment = cpu.segment(segment_prefix.value_or(default_segment));
  718. return { segment, offset };
  719. }
  720. template<typename CPU>
  721. ALWAYS_INLINE u32 MemoryOrRegisterReference::evaluate_sib(const CPU& cpu, SegmentRegister& default_segment) const
  722. {
  723. u32 scale_shift = m_sib_scale;
  724. u32 index = 0;
  725. switch (m_sib_index) {
  726. case 0 ... 3:
  727. case 5 ... 15:
  728. index = cpu.const_gpr32((RegisterIndex32)m_sib_index).value();
  729. break;
  730. case 4:
  731. index = 0;
  732. break;
  733. }
  734. u32 base = m_displacement32;
  735. switch (m_sib_base) {
  736. case 0 ... 3:
  737. case 6 ... 15:
  738. base += cpu.const_gpr32((RegisterIndex32)m_sib_base).value();
  739. break;
  740. case 4:
  741. default_segment = SegmentRegister::SS;
  742. base += cpu.esp().value();
  743. break;
  744. default: // 5
  745. switch (mod()) {
  746. case 0:
  747. break;
  748. case 1:
  749. case 2:
  750. default_segment = SegmentRegister::SS;
  751. base += cpu.ebp().value();
  752. break;
  753. default:
  754. VERIFY_NOT_REACHED();
  755. }
  756. break;
  757. }
  758. return (index << scale_shift) + base;
  759. }
  760. template<typename CPU, typename T>
  761. ALWAYS_INLINE void MemoryOrRegisterReference::write8(CPU& cpu, Instruction const& insn, T value)
  762. {
  763. if (is_register()) {
  764. cpu.gpr8(reg8()) = value;
  765. return;
  766. }
  767. auto address = resolve(cpu, insn);
  768. cpu.write_memory8(address, value);
  769. }
  770. template<typename CPU, typename T>
  771. ALWAYS_INLINE void MemoryOrRegisterReference::write16(CPU& cpu, Instruction const& insn, T value)
  772. {
  773. if (is_register()) {
  774. cpu.gpr16(reg16()) = value;
  775. return;
  776. }
  777. auto address = resolve(cpu, insn);
  778. cpu.write_memory16(address, value);
  779. }
  780. template<typename CPU, typename T>
  781. ALWAYS_INLINE void MemoryOrRegisterReference::write32(CPU& cpu, Instruction const& insn, T value)
  782. {
  783. if (is_register()) {
  784. cpu.gpr32(reg32()) = value;
  785. return;
  786. }
  787. auto address = resolve(cpu, insn);
  788. cpu.write_memory32(address, value);
  789. }
  790. template<typename CPU, typename T>
  791. ALWAYS_INLINE void MemoryOrRegisterReference::write64(CPU& cpu, Instruction const& insn, T value)
  792. {
  793. VERIFY(!is_register());
  794. auto address = resolve(cpu, insn);
  795. cpu.write_memory64(address, value);
  796. }
  797. template<typename CPU, typename T>
  798. ALWAYS_INLINE void MemoryOrRegisterReference::write128(CPU& cpu, Instruction const& insn, T value)
  799. {
  800. VERIFY(!is_register());
  801. auto address = resolve(cpu, insn);
  802. cpu.write_memory128(address, value);
  803. }
  804. template<typename CPU, typename T>
  805. ALWAYS_INLINE void MemoryOrRegisterReference::write256(CPU& cpu, Instruction const& insn, T value)
  806. {
  807. VERIFY(!is_register());
  808. auto address = resolve(cpu, insn);
  809. cpu.write_memory256(address, value);
  810. }
  811. template<typename CPU>
  812. ALWAYS_INLINE typename CPU::ValueWithShadowType8 MemoryOrRegisterReference::read8(CPU& cpu, Instruction const& insn)
  813. {
  814. if (is_register())
  815. return cpu.const_gpr8(reg8());
  816. auto address = resolve(cpu, insn);
  817. return cpu.read_memory8(address);
  818. }
  819. template<typename CPU>
  820. ALWAYS_INLINE typename CPU::ValueWithShadowType16 MemoryOrRegisterReference::read16(CPU& cpu, Instruction const& insn)
  821. {
  822. if (is_register())
  823. return cpu.const_gpr16(reg16());
  824. auto address = resolve(cpu, insn);
  825. return cpu.read_memory16(address);
  826. }
  827. template<typename CPU>
  828. ALWAYS_INLINE typename CPU::ValueWithShadowType32 MemoryOrRegisterReference::read32(CPU& cpu, Instruction const& insn)
  829. {
  830. if (is_register())
  831. return cpu.const_gpr32(reg32());
  832. auto address = resolve(cpu, insn);
  833. return cpu.read_memory32(address);
  834. }
  835. template<typename CPU>
  836. ALWAYS_INLINE typename CPU::ValueWithShadowType64 MemoryOrRegisterReference::read64(CPU& cpu, Instruction const& insn)
  837. {
  838. VERIFY(!is_register());
  839. auto address = resolve(cpu, insn);
  840. return cpu.read_memory64(address);
  841. }
  842. template<typename CPU>
  843. ALWAYS_INLINE typename CPU::ValueWithShadowType128 MemoryOrRegisterReference::read128(CPU& cpu, Instruction const& insn)
  844. {
  845. VERIFY(!is_register());
  846. auto address = resolve(cpu, insn);
  847. return cpu.read_memory128(address);
  848. }
  849. template<typename CPU>
  850. ALWAYS_INLINE typename CPU::ValueWithShadowType256 MemoryOrRegisterReference::read256(CPU& cpu, Instruction const& insn)
  851. {
  852. VERIFY(!is_register());
  853. auto address = resolve(cpu, insn);
  854. return cpu.read_memory256(address);
  855. }
  856. template<typename InstructionStreamType>
  857. ALWAYS_INLINE Instruction Instruction::from_stream(InstructionStreamType& stream, ProcessorMode mode)
  858. {
  859. return Instruction(stream, mode);
  860. }
  861. ALWAYS_INLINE unsigned Instruction::length() const
  862. {
  863. unsigned len = 1;
  864. if (has_sub_op())
  865. ++len;
  866. if (m_descriptor && m_descriptor->has_rm) {
  867. ++len;
  868. if (m_modrm.m_has_sib)
  869. ++len;
  870. len += m_modrm.m_displacement_bytes;
  871. }
  872. len += m_extra_bytes;
  873. return len;
  874. }
  875. ALWAYS_INLINE Optional<SegmentRegister> to_segment_prefix(u8 op)
  876. {
  877. switch (op) {
  878. case 0x26:
  879. return SegmentRegister::ES;
  880. case 0x2e:
  881. return SegmentRegister::CS;
  882. case 0x36:
  883. return SegmentRegister::SS;
  884. case 0x3e:
  885. return SegmentRegister::DS;
  886. case 0x64:
  887. return SegmentRegister::FS;
  888. case 0x65:
  889. return SegmentRegister::GS;
  890. default:
  891. return {};
  892. }
  893. }
  894. template<typename InstructionStreamType>
  895. ALWAYS_INLINE Instruction::Instruction(InstructionStreamType& stream, ProcessorMode mode)
  896. : m_mode(mode)
  897. {
  898. m_operand_size = OperandSize::Size32;
  899. // m_address_size refers to the default size of displacements/immediates, which is 32 even in long mode (2.2.1.3 Displacement, 2.2.1.5 Immediates),
  900. // with the exception of moffset (see below).
  901. m_address_size = AddressSize::Size32;
  902. u8 prefix_bytes = 0;
  903. for (;; ++prefix_bytes) {
  904. u8 opbyte = stream.read8();
  905. if (opbyte == Prefix::OperandSizeOverride) {
  906. if (m_operand_size == OperandSize::Size32)
  907. m_operand_size = OperandSize::Size16;
  908. else if (m_operand_size == OperandSize::Size16)
  909. m_operand_size = OperandSize::Size32;
  910. m_has_operand_size_override_prefix = true;
  911. continue;
  912. }
  913. if (opbyte == Prefix::AddressSizeOverride) {
  914. if (m_address_size == AddressSize::Size32)
  915. m_address_size = AddressSize::Size16;
  916. else if (m_address_size == AddressSize::Size16)
  917. m_address_size = AddressSize::Size32;
  918. m_has_address_size_override_prefix = true;
  919. continue;
  920. }
  921. if (opbyte == Prefix::REPZ || opbyte == Prefix::REPNZ) {
  922. m_rep_prefix = opbyte;
  923. continue;
  924. }
  925. if (opbyte == Prefix::LOCK) {
  926. m_has_lock_prefix = true;
  927. continue;
  928. }
  929. if (m_mode == ProcessorMode::Long && (opbyte & Prefix::REX_Mask) == Prefix::REX_Base) {
  930. m_has_rex_w = opbyte & 8;
  931. if (m_has_rex_w)
  932. m_operand_size = OperandSize::Size64;
  933. m_has_rex_r = opbyte & 4;
  934. m_has_rex_x = opbyte & 2;
  935. m_has_rex_b = opbyte & 1;
  936. continue;
  937. }
  938. auto segment_prefix = to_segment_prefix(opbyte);
  939. if (segment_prefix.has_value()) {
  940. m_segment_prefix = (u8)segment_prefix.value();
  941. continue;
  942. }
  943. m_op = opbyte;
  944. break;
  945. }
  946. u8 table_index = to_underlying(m_operand_size);
  947. if (m_mode == ProcessorMode::Long && m_operand_size == OperandSize::Size32)
  948. table_index = to_underlying(OperandSize::Size64);
  949. if (m_op == 0x0f) {
  950. m_sub_op = stream.read8();
  951. m_descriptor = &s_0f_table[table_index][m_sub_op];
  952. } else {
  953. m_descriptor = &s_table[table_index][m_op];
  954. }
  955. if (m_descriptor->format == __SSE) {
  956. if (m_rep_prefix == 0xF3) {
  957. m_descriptor = &s_sse_table_f3[m_sub_op];
  958. } else if (m_has_operand_size_override_prefix) {
  959. // This was unset while parsing the prefix initially
  960. m_operand_size = OperandSize::Size32;
  961. m_descriptor = &s_sse_table_66[m_sub_op];
  962. } else {
  963. m_descriptor = &s_sse_table_np[m_sub_op];
  964. }
  965. }
  966. if (m_descriptor->has_rm) {
  967. // Consume ModR/M (may include SIB and displacement.)
  968. m_modrm.decode(stream, m_address_size, m_has_rex_r, m_has_rex_x, m_has_rex_b);
  969. m_register_index = m_modrm.reg();
  970. } else {
  971. if (has_sub_op())
  972. m_register_index = m_sub_op & 7;
  973. else
  974. m_register_index = m_op & 7;
  975. if (m_has_rex_b)
  976. m_register_index |= 8;
  977. }
  978. if (m_mode == ProcessorMode::Long && (m_descriptor->long_mode_force_64 || m_descriptor->long_mode_default_64)) {
  979. m_operand_size = OperandSize::Size64;
  980. if (!m_descriptor->long_mode_force_64 && m_has_operand_size_override_prefix)
  981. m_operand_size = OperandSize::Size32;
  982. }
  983. bool has_slash = m_descriptor->format == MultibyteWithSlash;
  984. if (has_slash) {
  985. m_descriptor = &m_descriptor->slashes[slash()];
  986. if ((modrm_byte() & 0xc0) == 0xc0 && m_descriptor->slashes)
  987. m_descriptor = &m_descriptor->slashes[modrm_byte() & 7];
  988. }
  989. if (!m_descriptor->mnemonic) {
  990. if (has_sub_op()) {
  991. if (has_slash)
  992. warnln("Instruction {:02X} {:02X} /{} not understood", m_op, m_sub_op, slash());
  993. else
  994. warnln("Instruction {:02X} {:02X} not understood", m_op, m_sub_op);
  995. } else {
  996. if (has_slash)
  997. warnln("Instruction {:02X} /{} not understood", m_op, slash());
  998. else
  999. warnln("Instruction {:02X} not understood", m_op);
  1000. }
  1001. m_descriptor = nullptr;
  1002. m_extra_bytes = prefix_bytes;
  1003. return;
  1004. }
  1005. // 2.2.1.4 Direct Memory-Offset MOVs
  1006. auto effective_address_size = m_address_size;
  1007. if (m_mode == ProcessorMode::Long) {
  1008. switch (m_descriptor->format) {
  1009. case OP_AL_moff8: // A0 MOV AL, moffset
  1010. case OP_EAX_moff32: // A1 MOV EAX, moffset
  1011. case OP_moff8_AL: // A2 MOV moffset, AL
  1012. case OP_moff32_EAX: // A3 MOV moffset, EAX
  1013. effective_address_size = AddressSize::Size64;
  1014. break;
  1015. default:
  1016. break;
  1017. }
  1018. }
  1019. auto imm1_bytes = m_descriptor->imm1_bytes_for(effective_address_size, m_operand_size);
  1020. auto imm2_bytes = m_descriptor->imm2_bytes_for(effective_address_size, m_operand_size);
  1021. // Consume immediates if present.
  1022. switch (imm2_bytes) {
  1023. case 1:
  1024. m_imm2 = stream.read8();
  1025. break;
  1026. case 2:
  1027. m_imm2 = stream.read16();
  1028. break;
  1029. case 4:
  1030. m_imm2 = stream.read32();
  1031. break;
  1032. case 8:
  1033. m_imm2 = stream.read64();
  1034. break;
  1035. default:
  1036. VERIFY(imm2_bytes == 0);
  1037. break;
  1038. }
  1039. switch (imm1_bytes) {
  1040. case 1:
  1041. m_imm1 = stream.read8();
  1042. break;
  1043. case 2:
  1044. m_imm1 = stream.read16();
  1045. break;
  1046. case 4:
  1047. m_imm1 = stream.read32();
  1048. break;
  1049. case 8:
  1050. m_imm1 = stream.read64();
  1051. break;
  1052. default:
  1053. VERIFY(imm1_bytes == 0);
  1054. break;
  1055. }
  1056. m_extra_bytes = prefix_bytes + imm1_bytes + imm2_bytes;
  1057. #ifdef DISALLOW_INVALID_LOCK_PREFIX
  1058. if (m_has_lock_prefix && !m_descriptor->lock_prefix_allowed) {
  1059. warnln("Instruction not allowed with LOCK prefix, this will raise #UD");
  1060. m_descriptor = nullptr;
  1061. }
  1062. #endif
  1063. }
  1064. template<typename InstructionStreamType>
  1065. ALWAYS_INLINE void MemoryOrRegisterReference::decode(InstructionStreamType& stream, AddressSize address_size, bool has_rex_r, bool has_rex_x, bool has_rex_b)
  1066. {
  1067. u8 mod_rm_byte = stream.read8();
  1068. m_mod = mod_rm_byte >> 6;
  1069. m_reg = (mod_rm_byte >> 3) & 7;
  1070. m_rm = mod_rm_byte & 7;
  1071. if (address_size == AddressSize::Size32) {
  1072. decode32(stream, has_rex_r, has_rex_x, has_rex_b);
  1073. switch (m_displacement_bytes) {
  1074. case 0:
  1075. break;
  1076. case 1:
  1077. m_displacement32 = sign_extended_to<u32>(stream.read8());
  1078. break;
  1079. case 4:
  1080. m_displacement32 = stream.read32();
  1081. break;
  1082. default:
  1083. VERIFY_NOT_REACHED();
  1084. }
  1085. } else if (address_size == AddressSize::Size16) {
  1086. decode16(stream);
  1087. switch (m_displacement_bytes) {
  1088. case 0:
  1089. break;
  1090. case 1:
  1091. m_displacement16 = sign_extended_to<u16>(stream.read8());
  1092. break;
  1093. case 2:
  1094. m_displacement16 = stream.read16();
  1095. break;
  1096. default:
  1097. VERIFY_NOT_REACHED();
  1098. }
  1099. } else {
  1100. VERIFY_NOT_REACHED();
  1101. }
  1102. }
  1103. template<typename InstructionStreamType>
  1104. ALWAYS_INLINE void MemoryOrRegisterReference::decode16(InstructionStreamType&)
  1105. {
  1106. switch (mod()) {
  1107. case 0b00:
  1108. if (rm() == 6)
  1109. m_displacement_bytes = 2;
  1110. else
  1111. VERIFY(m_displacement_bytes == 0);
  1112. break;
  1113. case 0b01:
  1114. m_displacement_bytes = 1;
  1115. break;
  1116. case 0b10:
  1117. m_displacement_bytes = 2;
  1118. break;
  1119. case 0b11:
  1120. m_register_index = rm();
  1121. break;
  1122. }
  1123. }
  1124. template<typename InstructionStreamType>
  1125. ALWAYS_INLINE void MemoryOrRegisterReference::decode32(InstructionStreamType& stream, bool has_rex_r, bool has_rex_x, bool has_rex_b)
  1126. {
  1127. m_reg |= has_rex_r << 3;
  1128. switch (m_mod) {
  1129. case 0b00:
  1130. if (m_rm == 5) {
  1131. m_displacement_bytes = 4;
  1132. return;
  1133. }
  1134. break;
  1135. case 0b01:
  1136. m_displacement_bytes = 1;
  1137. break;
  1138. case 0b10:
  1139. m_displacement_bytes = 4;
  1140. break;
  1141. case 0b11:
  1142. m_rm |= has_rex_b << 3;
  1143. m_register_index = rm();
  1144. return;
  1145. }
  1146. m_has_sib = m_rm == 4;
  1147. if (m_has_sib) {
  1148. u8 sib_byte = stream.read8();
  1149. m_sib_scale = sib_byte >> 6;
  1150. m_sib_index = (has_rex_x << 3) | ((sib_byte >> 3) & 7);
  1151. m_sib_base = (has_rex_b << 3) | (sib_byte & 7);
  1152. if (m_sib_base == 5) {
  1153. switch (mod()) {
  1154. case 0b00:
  1155. m_displacement_bytes = 4;
  1156. break;
  1157. case 0b01:
  1158. m_displacement_bytes = 1;
  1159. break;
  1160. case 0b10:
  1161. m_displacement_bytes = 4;
  1162. break;
  1163. default:
  1164. VERIFY_NOT_REACHED();
  1165. }
  1166. }
  1167. } else {
  1168. m_rm |= has_rex_b << 3;
  1169. }
  1170. }
  1171. template<typename CPU>
  1172. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve(const CPU& cpu, Instruction const& insn)
  1173. {
  1174. switch (insn.address_size()) {
  1175. case AddressSize::Size16:
  1176. return resolve16(cpu, insn.segment_prefix());
  1177. case AddressSize::Size32:
  1178. return resolve32(cpu, insn.segment_prefix());
  1179. default:
  1180. VERIFY_NOT_REACHED();
  1181. }
  1182. }
  1183. }