Instruction.h 35 KB


  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/ByteString.h>
  9. #include <AK/Optional.h>
  10. #include <AK/StdLibExtras.h>
  11. #include <AK/Types.h>
  12. #include <stdio.h>
  13. namespace X86 {
  14. class Instruction;
  15. class Interpreter;
  16. typedef void (Interpreter::*InstructionHandler)(Instruction const&);
  17. class SymbolProvider {
  18. public:
  19. virtual ByteString symbolicate(FlatPtr, u32* offset = nullptr) const = 0;
  20. protected:
  21. virtual ~SymbolProvider() = default;
  22. };
  23. template<typename T>
  24. struct TypeTrivia {
  25. static constexpr size_t bits = sizeof(T) * 8;
  26. static constexpr T sign_bit = 1 << (bits - 1);
  27. static constexpr T mask = MakeUnsigned<T>(-1);
  28. };
  29. template<typename T, typename U>
  30. constexpr T sign_extended_to(U value)
  31. {
  32. if (!(value & TypeTrivia<U>::sign_bit))
  33. return value;
  34. return (TypeTrivia<T>::mask & ~TypeTrivia<U>::mask) | value;
  35. }
  36. enum class OperandSize : u8 {
  37. Size16,
  38. Size32,
  39. Size64,
  40. };
  41. enum class AddressSize : u8 {
  42. Size16,
  43. Size32,
  44. Size64,
  45. };
  46. enum class ProcessorMode : u8 {
  47. Protected,
  48. Long,
  49. };
  50. enum IsLockPrefixAllowed {
  51. LockPrefixNotAllowed = 0,
  52. LockPrefixAllowed
  53. };
  54. enum InstructionFormat {
  55. InvalidFormat,
  56. MultibyteWithSlash,
  57. InstructionPrefix,
  58. __BeginFormatsWithRMByte,
  59. OP_RM16_reg16,
  60. OP_reg8_RM8,
  61. OP_reg16_RM16,
  62. OP_RM16_seg,
  63. OP_RM32_seg,
  64. OP_RM8_imm8,
  65. OP_RM16_imm16,
  66. OP_RM16_imm8,
  67. OP_RM32_imm8,
  68. OP_RM8,
  69. OP_RM16,
  70. OP_RM32,
  71. OP_FPU,
  72. OP_FPU_reg,
  73. OP_FPU_mem,
  74. OP_FPU_AX16,
  75. OP_FPU_RM16,
  76. OP_FPU_RM32,
  77. OP_FPU_RM64,
  78. OP_FPU_M80,
  79. OP_RM8_reg8,
  80. OP_RM32_reg32,
  81. OP_reg32_RM32,
  82. OP_RM32_imm32,
  83. OP_reg16_RM16_imm8,
  84. OP_reg32_RM32_imm8,
  85. OP_reg16_RM16_imm16,
  86. OP_reg32_RM32_imm32,
  87. OP_reg16_mem16,
  88. OP_reg32_mem32,
  89. OP_seg_RM16,
  90. OP_seg_RM32,
  91. OP_RM8_1,
  92. OP_RM16_1,
  93. OP_RM32_1,
  94. OP_FAR_mem16,
  95. OP_FAR_mem32,
  96. OP_RM8_CL,
  97. OP_RM16_CL,
  98. OP_RM32_CL,
  99. OP_reg32_CR,
  100. OP_CR_reg32,
  101. OP_reg32_DR,
  102. OP_DR_reg32,
  103. OP_reg16_RM8,
  104. OP_reg32_RM8,
  105. OP_reg32_RM16,
  106. OP_RM16_reg16_imm8,
  107. OP_RM32_reg32_imm8,
  108. OP_RM16_reg16_CL,
  109. OP_RM32_reg32_CL,
  110. OP_reg,
  111. OP_m64,
  112. // SSE instructions mutate on some prefixes, so we have to mark them
  113. // for further parsing
  114. __SSE,
  115. OP_mm1_rm32,
  116. OP_rm32_mm2,
  117. OP_mm1_mm2m64,
  118. OP_mm1_mm2m32,
  119. OP_mm1_mm2m64_imm8,
  120. OP_mm1_imm8,
  121. OP_mm1m64_mm2,
  122. OP_reg_mm1,
  123. OP_reg_mm1_imm8,
  124. OP_mm1_r32m16_imm8,
  125. OP_xmm1_imm8,
  126. OP_xmm1_xmm2m32,
  127. OP_xmm1_xmm2m64,
  128. OP_xmm1_xmm2m128,
  129. OP_xmm1_xmm2m32_imm8,
  130. OP_xmm1_xmm2m128_imm8,
  131. OP_xmm1m32_xmm2,
  132. OP_xmm1m64_xmm2,
  133. OP_xmm1m128_xmm2,
  134. OP_reg_xmm1,
  135. OP_reg_xmm1_imm8,
  136. OP_r32_xmm2m32,
  137. OP_r32_xmm2m64,
  138. OP_rm32_xmm2,
  139. OP_xmm1_rm32,
  140. OP_xmm1_m64,
  141. OP_m64_xmm2,
  142. OP_rm8_xmm2m32,
  143. OP_xmm_mm,
  144. OP_xmm1_mm2m64,
  145. OP_mm1m64_xmm2,
  146. OP_mm_xmm,
  147. OP_mm1_xmm2m64,
  148. OP_mm1_xmm2m128,
  149. OP_xmm1_r32m16_imm8,
  150. __EndFormatsWithRMByte,
  151. OP_reg32_imm32,
  152. OP_regW_immW,
  153. OP_AL_imm8,
  154. OP_AX_imm16,
  155. OP_EAX_imm32,
  156. OP_CS,
  157. OP_DS,
  158. OP_ES,
  159. OP_SS,
  160. OP_FS,
  161. OP_GS,
  162. OP,
  163. OP_reg16,
  164. OP_imm16,
  165. OP_relimm16,
  166. OP_relimm32,
  167. OP_imm8,
  168. OP_imm16_imm16,
  169. OP_imm16_imm32,
  170. OP_AX_reg16,
  171. OP_EAX_reg32,
  172. OP_AL_moff8,
  173. OP_AX_moff16,
  174. OP_EAX_moff32,
  175. OP_moff8_AL,
  176. OP_moff16_AX,
  177. OP_moff32_EAX,
  178. OP_reg8_imm8,
  179. OP_reg16_imm16,
  180. OP_3,
  181. OP_AX_imm8,
  182. OP_EAX_imm8,
  183. OP_short_imm8,
  184. OP_AL_DX,
  185. OP_AX_DX,
  186. OP_EAX_DX,
  187. OP_DX_AL,
  188. OP_DX_AX,
  189. OP_DX_EAX,
  190. OP_imm8_AL,
  191. OP_imm8_AX,
  192. OP_imm8_EAX,
  193. OP_reg8_CL,
  194. OP_reg32,
  195. OP_imm32,
  196. OP_imm16_imm8,
  197. OP_NEAR_imm,
  198. };
  199. static constexpr unsigned CurrentAddressSize = 0xB33FBABE;
  200. static constexpr unsigned CurrentOperandSize = 0xB33FB00F;
  201. struct InstructionDescriptor {
  202. InstructionHandler handler { nullptr };
  203. bool opcode_has_register_index { false };
  204. char const* mnemonic { nullptr };
  205. InstructionFormat format { InvalidFormat };
  206. bool has_rm { false };
  207. unsigned imm1_bytes { 0 };
  208. unsigned imm2_bytes { 0 };
  209. bool long_mode_default_64 { false };
  210. bool long_mode_force_64 { false };
  211. // Addressed by the 3 REG bits in the MOD-REG-R/M byte.
  212. // Some slash instructions have further subgroups when MOD is 11,
  213. // in that case the InstructionDescriptors in slashes have themselves
  214. // a non-null slashes member that's indexed by the three R/M bits.
  215. InstructionDescriptor* slashes { nullptr };
  216. unsigned imm1_bytes_for(AddressSize address_size, OperandSize operand_size) const
  217. {
  218. if (imm1_bytes == CurrentAddressSize) {
  219. switch (address_size) {
  220. case AddressSize::Size64:
  221. return 8;
  222. case AddressSize::Size32:
  223. return 4;
  224. case AddressSize::Size16:
  225. return 2;
  226. }
  227. VERIFY_NOT_REACHED();
  228. }
  229. if (imm1_bytes == CurrentOperandSize) {
  230. switch (operand_size) {
  231. case OperandSize::Size64:
  232. return 8;
  233. case OperandSize::Size32:
  234. return 4;
  235. case OperandSize::Size16:
  236. return 2;
  237. }
  238. VERIFY_NOT_REACHED();
  239. }
  240. return imm1_bytes;
  241. }
  242. unsigned imm2_bytes_for(AddressSize address_size, OperandSize operand_size) const
  243. {
  244. if (imm2_bytes == CurrentAddressSize) {
  245. switch (address_size) {
  246. case AddressSize::Size64:
  247. return 8;
  248. case AddressSize::Size32:
  249. return 4;
  250. case AddressSize::Size16:
  251. return 2;
  252. }
  253. VERIFY_NOT_REACHED();
  254. }
  255. if (imm2_bytes == CurrentOperandSize) {
  256. switch (operand_size) {
  257. case OperandSize::Size64:
  258. return 8;
  259. case OperandSize::Size32:
  260. return 4;
  261. case OperandSize::Size16:
  262. return 2;
  263. }
  264. VERIFY_NOT_REACHED();
  265. }
  266. return imm2_bytes;
  267. }
  268. IsLockPrefixAllowed lock_prefix_allowed { LockPrefixNotAllowed };
  269. };
  270. extern InstructionDescriptor s_table[3][256];
  271. extern InstructionDescriptor s_0f_table[3][256];
  272. extern InstructionDescriptor s_sse_table_np[256];
  273. extern InstructionDescriptor s_sse_table_66[256];
  274. extern InstructionDescriptor s_sse_table_f2[256];
  275. extern InstructionDescriptor s_sse_table_f3[256];
  276. struct Prefix {
  277. enum Op {
  278. REX_Mask = 0xf0,
  279. REX_Base = 0x40,
  280. OperandSizeOverride = 0x66,
  281. AddressSizeOverride = 0x67,
  282. REP = 0xf3,
  283. REPZ = 0xf3,
  284. REPNZ = 0xf2,
  285. LOCK = 0xf0,
  286. };
  287. };
  288. enum class SegmentRegister {
  289. ES = 0,
  290. CS,
  291. SS,
  292. DS,
  293. FS,
  294. GS,
  295. SegR6,
  296. SegR7,
  297. };
  298. enum RegisterIndex8 {
  299. RegisterAL = 0,
  300. RegisterCL,
  301. RegisterDL,
  302. RegisterBL,
  303. RegisterAH,
  304. RegisterCH,
  305. RegisterDH,
  306. RegisterBH,
  307. RegisterR8B,
  308. RegisterR9B,
  309. RegisterR10B,
  310. RegisterR11B,
  311. RegisterR12B,
  312. RegisterR13B,
  313. RegisterR14B,
  314. RegisterR15B,
  315. };
  316. enum RegisterIndex16 {
  317. RegisterAX = 0,
  318. RegisterCX,
  319. RegisterDX,
  320. RegisterBX,
  321. RegisterSP,
  322. RegisterBP,
  323. RegisterSI,
  324. RegisterDI,
  325. RegisterR8W,
  326. RegisterR9W,
  327. RegisterR10W,
  328. RegisterR11W,
  329. RegisterR12W,
  330. RegisterR13W,
  331. RegisterR14W,
  332. RegisterR15W,
  333. };
  334. enum RegisterIndex32 {
  335. RegisterEAX = 0,
  336. RegisterECX,
  337. RegisterEDX,
  338. RegisterEBX,
  339. RegisterESP,
  340. RegisterEBP,
  341. RegisterESI,
  342. RegisterEDI,
  343. RegisterR8D,
  344. RegisterR9D,
  345. RegisterR10D,
  346. RegisterR11D,
  347. RegisterR12D,
  348. RegisterR13D,
  349. RegisterR14D,
  350. RegisterR15D,
  351. };
  352. enum RegisterIndex64 {
  353. RegisterRAX = 0,
  354. RegisterRCX,
  355. RegisterRDX,
  356. RegisterRBX,
  357. RegisterRSP,
  358. RegisterRBP,
  359. RegisterRSI,
  360. RegisterRDI,
  361. RegisterR8,
  362. RegisterR9,
  363. RegisterR10,
  364. RegisterR11,
  365. RegisterR12,
  366. RegisterR13,
  367. RegisterR14,
  368. RegisterR15,
  369. };
  370. enum FpuRegisterIndex {
  371. ST0 = 0,
  372. ST1,
  373. ST2,
  374. ST3,
  375. ST4,
  376. ST5,
  377. ST6,
  378. ST7
  379. };
  380. enum MMXRegisterIndex {
  381. RegisterMM0 = 0,
  382. RegisterMM1,
  383. RegisterMM2,
  384. RegisterMM3,
  385. RegisterMM4,
  386. RegisterMM5,
  387. RegisterMM6,
  388. RegisterMM7
  389. };
  390. enum XMMRegisterIndex {
  391. RegisterXMM0 = 0,
  392. RegisterXMM1,
  393. RegisterXMM2,
  394. RegisterXMM3,
  395. RegisterXMM4,
  396. RegisterXMM5,
  397. RegisterXMM6,
  398. RegisterXMM7,
  399. RegisterXMM8,
  400. RegisterXMM9,
  401. RegisterXMM10,
  402. RegisterXMM11,
  403. RegisterXMM12,
  404. RegisterXMM13,
  405. RegisterXMM14,
  406. RegisterXMM15,
  407. };
  408. class LogicalAddress {
  409. public:
  410. LogicalAddress() = default;
  411. LogicalAddress(u16 selector, FlatPtr offset)
  412. : m_selector(selector)
  413. , m_offset(offset)
  414. {
  415. }
  416. u16 selector() const { return m_selector; }
  417. FlatPtr offset() const { return m_offset; }
  418. void set_selector(u16 selector) { m_selector = selector; }
  419. void set_offset(FlatPtr offset) { m_offset = offset; }
  420. private:
  421. u16 m_selector { 0 };
  422. FlatPtr m_offset { 0 };
  423. };
  424. class InstructionStream {
  425. public:
  426. virtual bool can_read() = 0;
  427. virtual u8 read8() = 0;
  428. virtual u16 read16() = 0;
  429. virtual u32 read32() = 0;
  430. virtual u64 read64() = 0;
  431. protected:
  432. virtual ~InstructionStream() = default;
  433. };
  434. class SimpleInstructionStream final : public InstructionStream {
  435. public:
  436. SimpleInstructionStream(u8 const* data, size_t size)
  437. : m_data(data)
  438. , m_size(size)
  439. {
  440. }
  441. virtual bool can_read() override { return m_offset < m_size; }
  442. virtual u8 read8() override
  443. {
  444. if (!can_read())
  445. return 0;
  446. return m_data[m_offset++];
  447. }
  448. virtual u16 read16() override
  449. {
  450. u8 lsb = read8();
  451. u8 msb = read8();
  452. return ((u16)msb << 8) | (u16)lsb;
  453. }
  454. virtual u32 read32() override
  455. {
  456. u16 lsw = read16();
  457. u16 msw = read16();
  458. return ((u32)msw << 16) | (u32)lsw;
  459. }
  460. virtual u64 read64() override
  461. {
  462. u32 lsw = read32();
  463. u32 msw = read32();
  464. return ((u64)msw << 32) | (u64)lsw;
  465. }
  466. size_t offset() const { return m_offset; }
  467. private:
  468. u8 const* m_data { nullptr };
  469. size_t m_offset { 0 };
  470. size_t m_size { 0 };
  471. };
  472. class MemoryOrRegisterReference {
  473. friend class Instruction;
  474. public:
  475. ByteString to_byte_string_o8(Instruction const&) const;
  476. ByteString to_byte_string_o16(Instruction const&) const;
  477. ByteString to_byte_string_o32(Instruction const&) const;
  478. ByteString to_byte_string_o64(Instruction const&) const;
  479. ByteString to_byte_string_fpu_reg() const;
  480. ByteString to_byte_string_fpu_mem(Instruction const&) const;
  481. ByteString to_byte_string_fpu_ax16() const;
  482. ByteString to_byte_string_fpu16(Instruction const&) const;
  483. ByteString to_byte_string_fpu32(Instruction const&) const;
  484. ByteString to_byte_string_fpu64(Instruction const&) const;
  485. ByteString to_byte_string_fpu80(Instruction const&) const;
  486. ByteString to_byte_string_mm(Instruction const&) const;
  487. ByteString to_byte_string_xmm(Instruction const&) const;
  488. ByteString sib_to_byte_string(ProcessorMode) const;
  489. bool is_register() const { return m_register_index != 0x7f; }
  490. unsigned register_index() const { return m_register_index; }
  491. RegisterIndex64 reg64() const { return static_cast<RegisterIndex64>(register_index()); }
  492. RegisterIndex32 reg32() const { return static_cast<RegisterIndex32>(register_index()); }
  493. RegisterIndex16 reg16() const { return static_cast<RegisterIndex16>(register_index()); }
  494. RegisterIndex8 reg8() const { return static_cast<RegisterIndex8>(register_index()); }
  495. FpuRegisterIndex reg_fpu() const { return static_cast<FpuRegisterIndex>(register_index()); }
  496. // helpers to get the parts by name as in the spec
  497. u8 mod() const { return m_mod; }
  498. u8 reg() const { return m_reg; }
  499. u8 rm() const { return m_rm; }
  500. u8 modrm_byte() const { return (m_mod << 6) | ((m_reg & 7) << 3) | (m_rm & 7); }
  501. template<typename CPU, typename T>
  502. void write8(CPU&, Instruction const&, T);
  503. template<typename CPU, typename T>
  504. void write16(CPU&, Instruction const&, T);
  505. template<typename CPU, typename T>
  506. void write32(CPU&, Instruction const&, T);
  507. template<typename CPU, typename T>
  508. void write64(CPU&, Instruction const&, T);
  509. template<typename CPU, typename T>
  510. void write128(CPU&, Instruction const&, T);
  511. template<typename CPU, typename T>
  512. void write256(CPU&, Instruction const&, T);
  513. template<typename CPU>
  514. typename CPU::ValueWithShadowType8 read8(CPU&, Instruction const&);
  515. template<typename CPU>
  516. typename CPU::ValueWithShadowType16 read16(CPU&, Instruction const&);
  517. template<typename CPU>
  518. typename CPU::ValueWithShadowType32 read32(CPU&, Instruction const&);
  519. template<typename CPU>
  520. typename CPU::ValueWithShadowType64 read64(CPU&, Instruction const&);
  521. template<typename CPU>
  522. typename CPU::ValueWithShadowType128 read128(CPU&, Instruction const&);
  523. template<typename CPU>
  524. typename CPU::ValueWithShadowType256 read256(CPU&, Instruction const&);
  525. template<typename CPU>
  526. LogicalAddress resolve(const CPU&, Instruction const&);
  527. private:
  528. MemoryOrRegisterReference() = default;
  529. ByteString to_byte_string(Instruction const&) const;
  530. ByteString to_byte_string_a16() const;
  531. ByteString to_byte_string_a32() const;
  532. ByteString to_byte_string_a64() const;
  533. template<typename InstructionStreamType>
  534. void decode(InstructionStreamType&, AddressSize, bool has_rex_r, bool has_rex_x, bool has_rex_b);
  535. template<typename InstructionStreamType>
  536. void decode16(InstructionStreamType&);
  537. template<typename InstructionStreamType>
  538. void decode32(InstructionStreamType&, bool has_rex_r, bool has_rex_x, bool has_rex_b);
  539. template<typename CPU>
  540. LogicalAddress resolve16(const CPU&, Optional<SegmentRegister>);
  541. template<typename CPU>
  542. LogicalAddress resolve32(const CPU&, Optional<SegmentRegister>);
  543. template<typename CPU>
  544. u32 evaluate_sib(const CPU&, SegmentRegister& default_segment) const;
  545. union {
  546. u32 m_displacement32 { 0 };
  547. u16 m_displacement16;
  548. };
  549. u8 m_mod : 2 { 0 };
  550. u8 m_reg : 4 { 0 };
  551. u8 : 2;
  552. u8 m_rm : 4 { 0 };
  553. u8 m_sib_scale : 2 { 0 };
  554. u8 : 2;
  555. u8 m_sib_index : 4 { 0 };
  556. u8 m_sib_base : 4 { 0 };
  557. u8 m_displacement_bytes { 0 };
  558. u8 m_register_index : 7 { 0x7f };
  559. bool m_has_sib : 1 { false };
  560. };
  561. class Instruction {
  562. public:
  563. template<typename InstructionStreamType>
  564. static Instruction from_stream(InstructionStreamType&, ProcessorMode);
  565. ~Instruction() = default;
  566. ALWAYS_INLINE MemoryOrRegisterReference& modrm() const { return m_modrm; }
  567. ALWAYS_INLINE InstructionHandler handler() const { return m_descriptor->handler; }
  568. bool has_segment_prefix() const { return m_segment_prefix != 0xff; }
  569. ALWAYS_INLINE Optional<SegmentRegister> segment_prefix() const
  570. {
  571. if (has_segment_prefix())
  572. return static_cast<SegmentRegister>(m_segment_prefix);
  573. return {};
  574. }
  575. bool has_address_size_override_prefix() const { return m_has_address_size_override_prefix; }
  576. bool has_operand_size_override_prefix() const { return m_has_operand_size_override_prefix; }
  577. bool has_lock_prefix() const { return m_has_lock_prefix; }
  578. bool has_rep_prefix() const { return m_rep_prefix; }
  579. u8 rep_prefix() const { return m_rep_prefix; }
  580. bool is_valid() const { return m_descriptor; }
  581. unsigned length() const;
  582. ByteString mnemonic() const;
  583. u8 op() const { return m_op; }
  584. u8 modrm_byte() const { return m_modrm.modrm_byte(); }
  585. u8 slash() const { return m_modrm.reg() & 7; }
  586. u8 imm8() const { return m_imm1; }
  587. u16 imm16() const { return m_imm1; }
  588. u32 imm32() const { return m_imm1; }
  589. u64 imm64() const { return m_imm1; }
  590. u8 imm8_1() const { return imm8(); }
  591. u8 imm8_2() const { return m_imm2; }
  592. u16 imm16_1() const { return imm16(); }
  593. u16 imm16_2() const { return m_imm2; }
  594. u32 imm32_1() const { return imm32(); }
  595. u32 imm32_2() const { return m_imm2; }
  596. u64 imm64_1() const { return imm64(); }
  597. u64 imm64_2() const { return m_imm2; }
  598. u32 imm_address() const
  599. {
  600. switch (m_address_size) {
  601. case AddressSize::Size64:
  602. return imm64();
  603. case AddressSize::Size32:
  604. return imm32();
  605. case AddressSize::Size16:
  606. return imm16();
  607. }
  608. VERIFY_NOT_REACHED();
  609. }
  610. LogicalAddress imm_address16_16() const { return LogicalAddress(imm16_1(), imm16_2()); }
  611. LogicalAddress imm_address16_32() const { return LogicalAddress(imm16_1(), imm32_2()); }
  612. bool has_sub_op() const
  613. {
  614. return m_op == 0x0f;
  615. }
  616. unsigned register_index() const { return m_register_index; }
  617. RegisterIndex32 reg32() const { return static_cast<RegisterIndex32>(register_index()); }
  618. RegisterIndex16 reg16() const { return static_cast<RegisterIndex16>(register_index()); }
  619. RegisterIndex8 reg8() const { return static_cast<RegisterIndex8>(register_index()); }
  620. SegmentRegister segment_register() const { return static_cast<SegmentRegister>(register_index()); }
  621. u8 cc() const { return has_sub_op() ? m_sub_op & 0xf : m_op & 0xf; }
  622. AddressSize address_size() const { return m_address_size; }
  623. OperandSize operand_size() const { return m_operand_size; }
  624. ProcessorMode mode() const { return m_mode; }
  625. ByteString to_byte_string(u32 origin, SymbolProvider const* = nullptr, bool x32 = true) const;
  626. private:
  627. template<typename InstructionStreamType>
  628. Instruction(InstructionStreamType&, ProcessorMode);
  629. void to_byte_string_internal(StringBuilder&, u32 origin, SymbolProvider const*, bool x32) const;
  630. StringView reg8_name() const;
  631. StringView reg16_name() const;
  632. StringView reg32_name() const;
  633. StringView reg64_name() const;
  634. InstructionDescriptor* m_descriptor { nullptr };
  635. mutable MemoryOrRegisterReference m_modrm;
  636. u64 m_imm1 { 0 };
  637. u64 m_imm2 { 0 };
  638. u8 m_segment_prefix { 0xff };
  639. u8 m_register_index { 0xff };
  640. u8 m_op { 0 };
  641. u8 m_sub_op { 0 };
  642. u8 m_extra_bytes { 0 };
  643. u8 m_rep_prefix { 0 };
  644. OperandSize m_operand_size { OperandSize::Size16 };
  645. AddressSize m_address_size { AddressSize::Size16 };
  646. ProcessorMode m_mode { ProcessorMode::Protected };
  647. bool m_has_lock_prefix : 1 { false };
  648. bool m_has_operand_size_override_prefix : 1 { false };
  649. bool m_has_address_size_override_prefix : 1 { false };
  650. bool m_has_rex_w : 1 { false };
  651. bool m_has_rex_r : 1 { false };
  652. bool m_has_rex_x : 1 { false };
  653. bool m_has_rex_b : 1 { false };
  654. };
  655. template<typename CPU>
  656. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve16(const CPU& cpu, Optional<SegmentRegister> segment_prefix)
  657. {
  658. auto default_segment = SegmentRegister::DS;
  659. u16 offset = 0;
  660. switch (rm()) {
  661. case 0:
  662. offset = cpu.bx().value() + cpu.si().value() + m_displacement16;
  663. break;
  664. case 1:
  665. offset = cpu.bx().value() + cpu.di().value() + m_displacement16;
  666. break;
  667. case 2:
  668. default_segment = SegmentRegister::SS;
  669. offset = cpu.bp().value() + cpu.si().value() + m_displacement16;
  670. break;
  671. case 3:
  672. default_segment = SegmentRegister::SS;
  673. offset = cpu.bp().value() + cpu.di().value() + m_displacement16;
  674. break;
  675. case 4:
  676. offset = cpu.si().value() + m_displacement16;
  677. break;
  678. case 5:
  679. offset = cpu.di().value() + m_displacement16;
  680. break;
  681. case 6:
  682. if (mod() == 0)
  683. offset = m_displacement16;
  684. else {
  685. default_segment = SegmentRegister::SS;
  686. offset = cpu.bp().value() + m_displacement16;
  687. }
  688. break;
  689. default:
  690. offset = cpu.bx().value() + m_displacement16;
  691. break;
  692. }
  693. u16 segment = cpu.segment(segment_prefix.value_or(default_segment));
  694. return { segment, offset };
  695. }
  696. template<typename CPU>
  697. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve32(const CPU& cpu, Optional<SegmentRegister> segment_prefix)
  698. {
  699. auto default_segment = SegmentRegister::DS;
  700. u32 offset = 0;
  701. switch (rm()) {
  702. case 0 ... 3:
  703. case 6 ... 7:
  704. offset = cpu.const_gpr32((RegisterIndex32)(rm())).value() + m_displacement32;
  705. break;
  706. case 4:
  707. offset = evaluate_sib(cpu, default_segment);
  708. break;
  709. default: // 5
  710. if (mod() == 0) {
  711. offset = m_displacement32;
  712. break;
  713. } else {
  714. default_segment = SegmentRegister::SS;
  715. offset = cpu.ebp().value() + m_displacement32;
  716. break;
  717. }
  718. break;
  719. }
  720. u16 segment = cpu.segment(segment_prefix.value_or(default_segment));
  721. return { segment, offset };
  722. }
  723. template<typename CPU>
  724. ALWAYS_INLINE u32 MemoryOrRegisterReference::evaluate_sib(const CPU& cpu, SegmentRegister& default_segment) const
  725. {
  726. u32 scale_shift = m_sib_scale;
  727. u32 index = 0;
  728. switch (m_sib_index) {
  729. case 0 ... 3:
  730. case 5 ... 15:
  731. index = cpu.const_gpr32((RegisterIndex32)m_sib_index).value();
  732. break;
  733. case 4:
  734. index = 0;
  735. break;
  736. }
  737. u32 base = m_displacement32;
  738. switch (m_sib_base) {
  739. case 0 ... 3:
  740. case 6 ... 15:
  741. base += cpu.const_gpr32((RegisterIndex32)m_sib_base).value();
  742. break;
  743. case 4:
  744. default_segment = SegmentRegister::SS;
  745. base += cpu.esp().value();
  746. break;
  747. default: // 5
  748. switch (mod()) {
  749. case 0:
  750. break;
  751. case 1:
  752. case 2:
  753. default_segment = SegmentRegister::SS;
  754. base += cpu.ebp().value();
  755. break;
  756. default:
  757. VERIFY_NOT_REACHED();
  758. }
  759. break;
  760. }
  761. return (index << scale_shift) + base;
  762. }
  763. template<typename CPU, typename T>
  764. ALWAYS_INLINE void MemoryOrRegisterReference::write8(CPU& cpu, Instruction const& insn, T value)
  765. {
  766. if (is_register()) {
  767. cpu.gpr8(reg8()) = value;
  768. return;
  769. }
  770. auto address = resolve(cpu, insn);
  771. cpu.write_memory8(address, value);
  772. }
  773. template<typename CPU, typename T>
  774. ALWAYS_INLINE void MemoryOrRegisterReference::write16(CPU& cpu, Instruction const& insn, T value)
  775. {
  776. if (is_register()) {
  777. cpu.gpr16(reg16()) = value;
  778. return;
  779. }
  780. auto address = resolve(cpu, insn);
  781. cpu.write_memory16(address, value);
  782. }
  783. template<typename CPU, typename T>
  784. ALWAYS_INLINE void MemoryOrRegisterReference::write32(CPU& cpu, Instruction const& insn, T value)
  785. {
  786. if (is_register()) {
  787. cpu.gpr32(reg32()) = value;
  788. return;
  789. }
  790. auto address = resolve(cpu, insn);
  791. cpu.write_memory32(address, value);
  792. }
  793. template<typename CPU, typename T>
  794. ALWAYS_INLINE void MemoryOrRegisterReference::write64(CPU& cpu, Instruction const& insn, T value)
  795. {
  796. VERIFY(!is_register());
  797. auto address = resolve(cpu, insn);
  798. cpu.write_memory64(address, value);
  799. }
  800. template<typename CPU, typename T>
  801. ALWAYS_INLINE void MemoryOrRegisterReference::write128(CPU& cpu, Instruction const& insn, T value)
  802. {
  803. VERIFY(!is_register());
  804. auto address = resolve(cpu, insn);
  805. cpu.write_memory128(address, value);
  806. }
  807. template<typename CPU, typename T>
  808. ALWAYS_INLINE void MemoryOrRegisterReference::write256(CPU& cpu, Instruction const& insn, T value)
  809. {
  810. VERIFY(!is_register());
  811. auto address = resolve(cpu, insn);
  812. cpu.write_memory256(address, value);
  813. }
  814. template<typename CPU>
  815. ALWAYS_INLINE typename CPU::ValueWithShadowType8 MemoryOrRegisterReference::read8(CPU& cpu, Instruction const& insn)
  816. {
  817. if (is_register())
  818. return cpu.const_gpr8(reg8());
  819. auto address = resolve(cpu, insn);
  820. return cpu.read_memory8(address);
  821. }
  822. template<typename CPU>
  823. ALWAYS_INLINE typename CPU::ValueWithShadowType16 MemoryOrRegisterReference::read16(CPU& cpu, Instruction const& insn)
  824. {
  825. if (is_register())
  826. return cpu.const_gpr16(reg16());
  827. auto address = resolve(cpu, insn);
  828. return cpu.read_memory16(address);
  829. }
  830. template<typename CPU>
  831. ALWAYS_INLINE typename CPU::ValueWithShadowType32 MemoryOrRegisterReference::read32(CPU& cpu, Instruction const& insn)
  832. {
  833. if (is_register())
  834. return cpu.const_gpr32(reg32());
  835. auto address = resolve(cpu, insn);
  836. return cpu.read_memory32(address);
  837. }
  838. template<typename CPU>
  839. ALWAYS_INLINE typename CPU::ValueWithShadowType64 MemoryOrRegisterReference::read64(CPU& cpu, Instruction const& insn)
  840. {
  841. VERIFY(!is_register());
  842. auto address = resolve(cpu, insn);
  843. return cpu.read_memory64(address);
  844. }
  845. template<typename CPU>
  846. ALWAYS_INLINE typename CPU::ValueWithShadowType128 MemoryOrRegisterReference::read128(CPU& cpu, Instruction const& insn)
  847. {
  848. VERIFY(!is_register());
  849. auto address = resolve(cpu, insn);
  850. return cpu.read_memory128(address);
  851. }
  852. template<typename CPU>
  853. ALWAYS_INLINE typename CPU::ValueWithShadowType256 MemoryOrRegisterReference::read256(CPU& cpu, Instruction const& insn)
  854. {
  855. VERIFY(!is_register());
  856. auto address = resolve(cpu, insn);
  857. return cpu.read_memory256(address);
  858. }
  859. template<typename InstructionStreamType>
  860. ALWAYS_INLINE Instruction Instruction::from_stream(InstructionStreamType& stream, ProcessorMode mode)
  861. {
  862. return Instruction(stream, mode);
  863. }
  864. ALWAYS_INLINE unsigned Instruction::length() const
  865. {
  866. unsigned len = 1;
  867. if (has_sub_op())
  868. ++len;
  869. if (m_descriptor && m_descriptor->has_rm) {
  870. ++len;
  871. if (m_modrm.m_has_sib)
  872. ++len;
  873. len += m_modrm.m_displacement_bytes;
  874. }
  875. len += m_extra_bytes;
  876. return len;
  877. }
  878. ALWAYS_INLINE Optional<SegmentRegister> to_segment_prefix(u8 op)
  879. {
  880. switch (op) {
  881. case 0x26:
  882. return SegmentRegister::ES;
  883. case 0x2e:
  884. return SegmentRegister::CS;
  885. case 0x36:
  886. return SegmentRegister::SS;
  887. case 0x3e:
  888. return SegmentRegister::DS;
  889. case 0x64:
  890. return SegmentRegister::FS;
  891. case 0x65:
  892. return SegmentRegister::GS;
  893. default:
  894. return {};
  895. }
  896. }
  897. template<typename InstructionStreamType>
  898. ALWAYS_INLINE Instruction::Instruction(InstructionStreamType& stream, ProcessorMode mode)
  899. : m_mode(mode)
  900. {
  901. m_operand_size = OperandSize::Size32;
  902. // m_address_size refers to the default size of displacements/immediates, which is 32 even in long mode (2.2.1.3 Displacement, 2.2.1.5 Immediates),
  903. // with the exception of moffset (see below).
  904. m_address_size = AddressSize::Size32;
  905. u8 prefix_bytes = 0;
  906. for (;; ++prefix_bytes) {
  907. u8 opbyte = stream.read8();
  908. if (opbyte == Prefix::OperandSizeOverride) {
  909. if (m_operand_size == OperandSize::Size32)
  910. m_operand_size = OperandSize::Size16;
  911. else if (m_operand_size == OperandSize::Size16)
  912. m_operand_size = OperandSize::Size32;
  913. m_has_operand_size_override_prefix = true;
  914. continue;
  915. }
  916. if (opbyte == Prefix::AddressSizeOverride) {
  917. if (m_address_size == AddressSize::Size32)
  918. m_address_size = AddressSize::Size16;
  919. else if (m_address_size == AddressSize::Size16)
  920. m_address_size = AddressSize::Size32;
  921. m_has_address_size_override_prefix = true;
  922. continue;
  923. }
  924. if (opbyte == Prefix::REPZ || opbyte == Prefix::REPNZ) {
  925. m_rep_prefix = opbyte;
  926. continue;
  927. }
  928. if (opbyte == Prefix::LOCK) {
  929. m_has_lock_prefix = true;
  930. continue;
  931. }
  932. if (m_mode == ProcessorMode::Long && (opbyte & Prefix::REX_Mask) == Prefix::REX_Base) {
  933. m_has_rex_w = opbyte & 8;
  934. if (m_has_rex_w)
  935. m_operand_size = OperandSize::Size64;
  936. m_has_rex_r = opbyte & 4;
  937. m_has_rex_x = opbyte & 2;
  938. m_has_rex_b = opbyte & 1;
  939. continue;
  940. }
  941. auto segment_prefix = to_segment_prefix(opbyte);
  942. if (segment_prefix.has_value()) {
  943. m_segment_prefix = (u8)segment_prefix.value();
  944. continue;
  945. }
  946. m_op = opbyte;
  947. break;
  948. }
  949. u8 table_index = to_underlying(m_operand_size);
  950. if (m_mode == ProcessorMode::Long && m_operand_size == OperandSize::Size32)
  951. table_index = to_underlying(OperandSize::Size64);
  952. if (m_op == 0x0f) {
  953. m_sub_op = stream.read8();
  954. m_descriptor = &s_0f_table[table_index][m_sub_op];
  955. } else {
  956. m_descriptor = &s_table[table_index][m_op];
  957. }
  958. if (m_descriptor->format == __SSE) {
  959. if (m_rep_prefix == 0xF2) {
  960. m_descriptor = &s_sse_table_f2[m_sub_op];
  961. } else if (m_rep_prefix == 0xF3) {
  962. m_descriptor = &s_sse_table_f3[m_sub_op];
  963. } else if (m_has_operand_size_override_prefix) {
  964. // This was unset while parsing the prefix initially
  965. m_operand_size = OperandSize::Size32;
  966. m_descriptor = &s_sse_table_66[m_sub_op];
  967. } else {
  968. m_descriptor = &s_sse_table_np[m_sub_op];
  969. }
  970. }
  971. if (m_descriptor->has_rm) {
  972. // Consume ModR/M (may include SIB and displacement.)
  973. m_modrm.decode(stream, m_address_size, m_has_rex_r, m_has_rex_x, m_has_rex_b);
  974. m_register_index = m_modrm.reg();
  975. } else {
  976. if (has_sub_op())
  977. m_register_index = m_sub_op & 7;
  978. else
  979. m_register_index = m_op & 7;
  980. if (m_has_rex_b)
  981. m_register_index |= 8;
  982. }
  983. if (m_mode == ProcessorMode::Long && (m_descriptor->long_mode_force_64 || m_descriptor->long_mode_default_64)) {
  984. m_operand_size = OperandSize::Size64;
  985. if (!m_descriptor->long_mode_force_64 && m_has_operand_size_override_prefix)
  986. m_operand_size = OperandSize::Size32;
  987. }
  988. bool has_slash = m_descriptor->format == MultibyteWithSlash;
  989. if (has_slash) {
  990. m_descriptor = &m_descriptor->slashes[slash()];
  991. if ((modrm_byte() & 0xc0) == 0xc0 && m_descriptor->slashes)
  992. m_descriptor = &m_descriptor->slashes[modrm_byte() & 7];
  993. }
  994. if (!m_descriptor->mnemonic) {
  995. if (has_sub_op()) {
  996. if (has_slash)
  997. warnln("Instruction {:02X} {:02X} /{} not understood", m_op, m_sub_op, slash());
  998. else
  999. warnln("Instruction {:02X} {:02X} not understood", m_op, m_sub_op);
  1000. } else {
  1001. if (has_slash)
  1002. warnln("Instruction {:02X} /{} not understood", m_op, slash());
  1003. else
  1004. warnln("Instruction {:02X} not understood", m_op);
  1005. }
  1006. m_descriptor = nullptr;
  1007. m_extra_bytes = prefix_bytes;
  1008. return;
  1009. }
  1010. // 2.2.1.4 Direct Memory-Offset MOVs
  1011. auto effective_address_size = m_address_size;
  1012. if (m_mode == ProcessorMode::Long) {
  1013. switch (m_descriptor->format) {
  1014. case OP_AL_moff8: // A0 MOV AL, moffset
  1015. case OP_EAX_moff32: // A1 MOV EAX, moffset
  1016. case OP_moff8_AL: // A2 MOV moffset, AL
  1017. case OP_moff32_EAX: // A3 MOV moffset, EAX
  1018. effective_address_size = AddressSize::Size64;
  1019. break;
  1020. default:
  1021. break;
  1022. }
  1023. }
  1024. auto imm1_bytes = m_descriptor->imm1_bytes_for(effective_address_size, m_operand_size);
  1025. auto imm2_bytes = m_descriptor->imm2_bytes_for(effective_address_size, m_operand_size);
  1026. // Consume immediates if present.
  1027. switch (imm2_bytes) {
  1028. case 1:
  1029. m_imm2 = stream.read8();
  1030. break;
  1031. case 2:
  1032. m_imm2 = stream.read16();
  1033. break;
  1034. case 4:
  1035. m_imm2 = stream.read32();
  1036. break;
  1037. case 8:
  1038. m_imm2 = stream.read64();
  1039. break;
  1040. default:
  1041. VERIFY(imm2_bytes == 0);
  1042. break;
  1043. }
  1044. switch (imm1_bytes) {
  1045. case 1:
  1046. m_imm1 = stream.read8();
  1047. break;
  1048. case 2:
  1049. m_imm1 = stream.read16();
  1050. break;
  1051. case 4:
  1052. m_imm1 = stream.read32();
  1053. break;
  1054. case 8:
  1055. m_imm1 = stream.read64();
  1056. break;
  1057. default:
  1058. VERIFY(imm1_bytes == 0);
  1059. break;
  1060. }
  1061. m_extra_bytes = prefix_bytes + imm1_bytes + imm2_bytes;
  1062. #ifdef DISALLOW_INVALID_LOCK_PREFIX
  1063. if (m_has_lock_prefix && !m_descriptor->lock_prefix_allowed) {
  1064. warnln("Instruction not allowed with LOCK prefix, this will raise #UD");
  1065. m_descriptor = nullptr;
  1066. }
  1067. #endif
  1068. }
  1069. template<typename InstructionStreamType>
  1070. ALWAYS_INLINE void MemoryOrRegisterReference::decode(InstructionStreamType& stream, AddressSize address_size, bool has_rex_r, bool has_rex_x, bool has_rex_b)
  1071. {
  1072. u8 mod_rm_byte = stream.read8();
  1073. m_mod = mod_rm_byte >> 6;
  1074. m_reg = (mod_rm_byte >> 3) & 7;
  1075. m_rm = mod_rm_byte & 7;
  1076. if (address_size == AddressSize::Size32) {
  1077. decode32(stream, has_rex_r, has_rex_x, has_rex_b);
  1078. switch (m_displacement_bytes) {
  1079. case 0:
  1080. break;
  1081. case 1:
  1082. m_displacement32 = sign_extended_to<u32>(stream.read8());
  1083. break;
  1084. case 4:
  1085. m_displacement32 = stream.read32();
  1086. break;
  1087. default:
  1088. VERIFY_NOT_REACHED();
  1089. }
  1090. } else if (address_size == AddressSize::Size16) {
  1091. decode16(stream);
  1092. switch (m_displacement_bytes) {
  1093. case 0:
  1094. break;
  1095. case 1:
  1096. m_displacement16 = sign_extended_to<u16>(stream.read8());
  1097. break;
  1098. case 2:
  1099. m_displacement16 = stream.read16();
  1100. break;
  1101. default:
  1102. VERIFY_NOT_REACHED();
  1103. }
  1104. } else {
  1105. VERIFY_NOT_REACHED();
  1106. }
  1107. }
  1108. template<typename InstructionStreamType>
  1109. ALWAYS_INLINE void MemoryOrRegisterReference::decode16(InstructionStreamType&)
  1110. {
  1111. switch (mod()) {
  1112. case 0b00:
  1113. if (rm() == 6)
  1114. m_displacement_bytes = 2;
  1115. else
  1116. VERIFY(m_displacement_bytes == 0);
  1117. break;
  1118. case 0b01:
  1119. m_displacement_bytes = 1;
  1120. break;
  1121. case 0b10:
  1122. m_displacement_bytes = 2;
  1123. break;
  1124. case 0b11:
  1125. m_register_index = rm();
  1126. break;
  1127. }
  1128. }
  1129. template<typename InstructionStreamType>
  1130. ALWAYS_INLINE void MemoryOrRegisterReference::decode32(InstructionStreamType& stream, bool has_rex_r, bool has_rex_x, bool has_rex_b)
  1131. {
  1132. m_reg |= has_rex_r << 3;
  1133. switch (m_mod) {
  1134. case 0b00:
  1135. if (m_rm == 5) {
  1136. m_displacement_bytes = 4;
  1137. return;
  1138. }
  1139. break;
  1140. case 0b01:
  1141. m_displacement_bytes = 1;
  1142. break;
  1143. case 0b10:
  1144. m_displacement_bytes = 4;
  1145. break;
  1146. case 0b11:
  1147. m_rm |= has_rex_b << 3;
  1148. m_register_index = rm();
  1149. return;
  1150. }
  1151. m_has_sib = m_rm == 4;
  1152. if (m_has_sib) {
  1153. u8 sib_byte = stream.read8();
  1154. m_sib_scale = sib_byte >> 6;
  1155. m_sib_index = (has_rex_x << 3) | ((sib_byte >> 3) & 7);
  1156. m_sib_base = (has_rex_b << 3) | (sib_byte & 7);
  1157. if (m_sib_base == 5) {
  1158. switch (mod()) {
  1159. case 0b00:
  1160. m_displacement_bytes = 4;
  1161. break;
  1162. case 0b01:
  1163. m_displacement_bytes = 1;
  1164. break;
  1165. case 0b10:
  1166. m_displacement_bytes = 4;
  1167. break;
  1168. default:
  1169. VERIFY_NOT_REACHED();
  1170. }
  1171. }
  1172. } else {
  1173. m_rm |= has_rex_b << 3;
  1174. }
  1175. }
  1176. template<typename CPU>
  1177. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve(const CPU& cpu, Instruction const& insn)
  1178. {
  1179. switch (insn.address_size()) {
  1180. case AddressSize::Size16:
  1181. return resolve16(cpu, insn.segment_prefix());
  1182. case AddressSize::Size32:
  1183. return resolve32(cpu, insn.segment_prefix());
  1184. default:
  1185. VERIFY_NOT_REACHED();
  1186. }
  1187. }
  1188. }