Instruction.h 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/Optional.h>
  9. #include <AK/StdLibExtras.h>
  10. #include <AK/String.h>
  11. #include <AK/Types.h>
  12. #include <stdio.h>
  13. namespace X86 {
  14. class Instruction;
  15. class Interpreter;
  16. typedef void (Interpreter::*InstructionHandler)(Instruction const&);
  17. class SymbolProvider {
  18. public:
  19. virtual String symbolicate(FlatPtr, u32* offset = nullptr) const = 0;
  20. protected:
  21. virtual ~SymbolProvider() = default;
  22. };
  23. template<typename T>
  24. struct TypeTrivia {
  25. static constexpr size_t bits = sizeof(T) * 8;
  26. static constexpr T sign_bit = 1 << (bits - 1);
  27. static constexpr T mask = MakeUnsigned<T>(-1);
  28. };
  29. template<typename T, typename U>
  30. constexpr T sign_extended_to(U value)
  31. {
  32. if (!(value & TypeTrivia<U>::sign_bit))
  33. return value;
  34. return (TypeTrivia<T>::mask & ~TypeTrivia<U>::mask) | value;
  35. }
  36. enum IsLockPrefixAllowed {
  37. LockPrefixNotAllowed = 0,
  38. LockPrefixAllowed
  39. };
  40. enum InstructionFormat {
  41. InvalidFormat,
  42. MultibyteWithSlash,
  43. InstructionPrefix,
  44. __BeginFormatsWithRMByte,
  45. OP_RM16_reg16,
  46. OP_reg8_RM8,
  47. OP_reg16_RM16,
  48. OP_RM16_seg,
  49. OP_RM32_seg,
  50. OP_RM8_imm8,
  51. OP_RM16_imm16,
  52. OP_RM16_imm8,
  53. OP_RM32_imm8,
  54. OP_RM8,
  55. OP_RM16,
  56. OP_RM32,
  57. OP_FPU,
  58. OP_FPU_reg,
  59. OP_FPU_mem,
  60. OP_FPU_AX16,
  61. OP_FPU_RM16,
  62. OP_FPU_RM32,
  63. OP_FPU_RM64,
  64. OP_FPU_M80,
  65. OP_RM8_reg8,
  66. OP_RM32_reg32,
  67. OP_reg32_RM32,
  68. OP_RM32_imm32,
  69. OP_reg16_RM16_imm8,
  70. OP_reg32_RM32_imm8,
  71. OP_reg16_RM16_imm16,
  72. OP_reg32_RM32_imm32,
  73. OP_reg16_mem16,
  74. OP_reg32_mem32,
  75. OP_seg_RM16,
  76. OP_seg_RM32,
  77. OP_RM8_1,
  78. OP_RM16_1,
  79. OP_RM32_1,
  80. OP_FAR_mem16,
  81. OP_FAR_mem32,
  82. OP_RM8_CL,
  83. OP_RM16_CL,
  84. OP_RM32_CL,
  85. OP_reg32_CR,
  86. OP_CR_reg32,
  87. OP_reg32_DR,
  88. OP_DR_reg32,
  89. OP_reg16_RM8,
  90. OP_reg32_RM8,
  91. OP_reg32_RM16,
  92. OP_RM16_reg16_imm8,
  93. OP_RM32_reg32_imm8,
  94. OP_RM16_reg16_CL,
  95. OP_RM32_reg32_CL,
  96. OP_reg,
  97. OP_m64,
  98. // SSE instructions mutate on some prefixes, so we have to mark them
  99. // for further parsing
  100. __SSE,
  101. OP_mm1_rm32,
  102. OP_rm32_mm2,
  103. OP_mm1_mm2m64,
  104. OP_mm1_mm2m32,
  105. OP_mm1_mm2m64_imm8,
  106. OP_mm1_imm8,
  107. OP_mm1m64_mm2,
  108. OP_reg_mm1,
  109. OP_reg_mm1_imm8,
  110. OP_mm1_r32m16_imm8,
  111. OP_xmm1_imm8,
  112. OP_xmm1_xmm2m32,
  113. OP_xmm1_xmm2m64,
  114. OP_xmm1_xmm2m128,
  115. OP_xmm1_xmm2m32_imm8,
  116. OP_xmm1_xmm2m128_imm8,
  117. OP_xmm1m32_xmm2,
  118. OP_xmm1m64_xmm2,
  119. OP_xmm1m128_xmm2,
  120. OP_reg_xmm1,
  121. OP_reg_xmm1_imm8,
  122. OP_r32_xmm2m32,
  123. OP_r32_xmm2m64,
  124. OP_rm32_xmm2,
  125. OP_xmm1_rm32,
  126. OP_xmm1_m64,
  127. OP_m64_xmm2,
  128. OP_rm8_xmm2m32,
  129. OP_xmm_mm,
  130. OP_xmm1_mm2m64,
  131. OP_mm1m64_xmm2,
  132. OP_mm_xmm,
  133. OP_mm1_xmm2m64,
  134. OP_mm1_xmm2m128,
  135. OP_xmm1_r32m16_imm8,
  136. __EndFormatsWithRMByte,
  137. OP_reg32_imm32,
  138. OP_AL_imm8,
  139. OP_AX_imm16,
  140. OP_EAX_imm32,
  141. OP_CS,
  142. OP_DS,
  143. OP_ES,
  144. OP_SS,
  145. OP_FS,
  146. OP_GS,
  147. OP,
  148. OP_reg16,
  149. OP_imm16,
  150. OP_relimm16,
  151. OP_relimm32,
  152. OP_imm8,
  153. OP_imm16_imm16,
  154. OP_imm16_imm32,
  155. OP_AX_reg16,
  156. OP_EAX_reg32,
  157. OP_AL_moff8,
  158. OP_AX_moff16,
  159. OP_EAX_moff32,
  160. OP_moff8_AL,
  161. OP_moff16_AX,
  162. OP_moff32_EAX,
  163. OP_reg8_imm8,
  164. OP_reg16_imm16,
  165. OP_3,
  166. OP_AX_imm8,
  167. OP_EAX_imm8,
  168. OP_short_imm8,
  169. OP_AL_DX,
  170. OP_AX_DX,
  171. OP_EAX_DX,
  172. OP_DX_AL,
  173. OP_DX_AX,
  174. OP_DX_EAX,
  175. OP_imm8_AL,
  176. OP_imm8_AX,
  177. OP_imm8_EAX,
  178. OP_reg8_CL,
  179. OP_reg32,
  180. OP_imm32,
  181. OP_imm16_imm8,
  182. OP_NEAR_imm,
  183. };
  184. static constexpr unsigned CurrentAddressSize = 0xB33FBABE;
  185. struct InstructionDescriptor {
  186. InstructionHandler handler { nullptr };
  187. bool opcode_has_register_index { false };
  188. char const* mnemonic { nullptr };
  189. InstructionFormat format { InvalidFormat };
  190. bool has_rm { false };
  191. unsigned imm1_bytes { 0 };
  192. unsigned imm2_bytes { 0 };
  193. // Addressed by the 3 REG bits in the MOD-REG-R/M byte.
  194. // Some slash instructions have further subgroups when MOD is 11,
  195. // in that case the InstructionDescriptors in slashes have themselves
  196. // a non-null slashes member that's indexed by the three R/M bits.
  197. InstructionDescriptor* slashes { nullptr };
  198. unsigned imm1_bytes_for_address_size(bool a32) const
  199. {
  200. if (imm1_bytes == CurrentAddressSize)
  201. return a32 ? 4 : 2;
  202. return imm1_bytes;
  203. }
  204. unsigned imm2_bytes_for_address_size(bool a32) const
  205. {
  206. if (imm2_bytes == CurrentAddressSize)
  207. return a32 ? 4 : 2;
  208. return imm2_bytes;
  209. }
  210. IsLockPrefixAllowed lock_prefix_allowed { LockPrefixNotAllowed };
  211. };
  212. extern InstructionDescriptor s_table16[256];
  213. extern InstructionDescriptor s_table32[256];
  214. extern InstructionDescriptor s_0f_table16[256];
  215. extern InstructionDescriptor s_0f_table32[256];
  216. extern InstructionDescriptor s_sse_table_np[256];
  217. extern InstructionDescriptor s_sse_table_66[256];
  218. extern InstructionDescriptor s_sse_table_f3[256];
  219. struct Prefix {
  220. enum Op {
  221. OperandSizeOverride = 0x66,
  222. AddressSizeOverride = 0x67,
  223. REP = 0xf3,
  224. REPZ = 0xf3,
  225. REPNZ = 0xf2,
  226. LOCK = 0xf0,
  227. };
  228. };
  229. enum class SegmentRegister {
  230. ES = 0,
  231. CS,
  232. SS,
  233. DS,
  234. FS,
  235. GS,
  236. SegR6,
  237. SegR7,
  238. };
  239. enum RegisterIndex8 {
  240. RegisterAL = 0,
  241. RegisterCL,
  242. RegisterDL,
  243. RegisterBL,
  244. RegisterAH,
  245. RegisterCH,
  246. RegisterDH,
  247. RegisterBH
  248. };
  249. enum RegisterIndex16 {
  250. RegisterAX = 0,
  251. RegisterCX,
  252. RegisterDX,
  253. RegisterBX,
  254. RegisterSP,
  255. RegisterBP,
  256. RegisterSI,
  257. RegisterDI
  258. };
  259. enum RegisterIndex32 {
  260. RegisterEAX = 0,
  261. RegisterECX,
  262. RegisterEDX,
  263. RegisterEBX,
  264. RegisterESP,
  265. RegisterEBP,
  266. RegisterESI,
  267. RegisterEDI
  268. };
  269. enum FpuRegisterIndex {
  270. ST0 = 0,
  271. ST1,
  272. ST2,
  273. ST3,
  274. ST4,
  275. ST5,
  276. ST6,
  277. ST7
  278. };
  279. enum MMXRegisterIndex {
  280. RegisterMM0 = 0,
  281. RegisterMM1,
  282. RegisterMM2,
  283. RegisterMM3,
  284. RegisterMM4,
  285. RegisterMM5,
  286. RegisterMM6,
  287. RegisterMM7
  288. };
  289. enum XMMRegisterIndex {
  290. RegisterXMM0 = 0,
  291. RegisterXMM1,
  292. RegisterXMM2,
  293. RegisterXMM3,
  294. RegisterXMM4,
  295. RegisterXMM5,
  296. RegisterXMM6,
  297. RegisterXMM7
  298. };
  299. class LogicalAddress {
  300. public:
  301. LogicalAddress() = default;
  302. LogicalAddress(u16 selector, FlatPtr offset)
  303. : m_selector(selector)
  304. , m_offset(offset)
  305. {
  306. }
  307. u16 selector() const { return m_selector; }
  308. FlatPtr offset() const { return m_offset; }
  309. void set_selector(u16 selector) { m_selector = selector; }
  310. void set_offset(FlatPtr offset) { m_offset = offset; }
  311. private:
  312. u16 m_selector { 0 };
  313. FlatPtr m_offset { 0 };
  314. };
  315. class InstructionStream {
  316. public:
  317. virtual bool can_read() = 0;
  318. virtual u8 read8() = 0;
  319. virtual u16 read16() = 0;
  320. virtual u32 read32() = 0;
  321. virtual u64 read64() = 0;
  322. protected:
  323. virtual ~InstructionStream() = default;
  324. };
  325. class SimpleInstructionStream final : public InstructionStream {
  326. public:
  327. SimpleInstructionStream(u8 const* data, size_t size)
  328. : m_data(data)
  329. , m_size(size)
  330. {
  331. }
  332. virtual bool can_read() override { return m_offset < m_size; }
  333. virtual u8 read8() override
  334. {
  335. if (!can_read())
  336. return 0;
  337. return m_data[m_offset++];
  338. }
  339. virtual u16 read16() override
  340. {
  341. u8 lsb = read8();
  342. u8 msb = read8();
  343. return ((u16)msb << 8) | (u16)lsb;
  344. }
  345. virtual u32 read32() override
  346. {
  347. u16 lsw = read16();
  348. u16 msw = read16();
  349. return ((u32)msw << 16) | (u32)lsw;
  350. }
  351. virtual u64 read64() override
  352. {
  353. u32 lsw = read32();
  354. u32 msw = read32();
  355. return ((u64)msw << 32) | (u64)lsw;
  356. }
  357. size_t offset() const { return m_offset; }
  358. private:
  359. u8 const* m_data { nullptr };
  360. size_t m_offset { 0 };
  361. size_t m_size { 0 };
  362. };
  363. class MemoryOrRegisterReference {
  364. friend class Instruction;
  365. public:
  366. String to_string_o8(Instruction const&) const;
  367. String to_string_o16(Instruction const&) const;
  368. String to_string_o32(Instruction const&) const;
  369. String to_string_fpu_reg() const;
  370. String to_string_fpu_mem(Instruction const&) const;
  371. String to_string_fpu_ax16() const;
  372. String to_string_fpu16(Instruction const&) const;
  373. String to_string_fpu32(Instruction const&) const;
  374. String to_string_fpu64(Instruction const&) const;
  375. String to_string_fpu80(Instruction const&) const;
  376. String to_string_mm(Instruction const&) const;
  377. String to_string_xmm(Instruction const&) const;
  378. bool is_register() const { return m_register_index != 0x7f; }
  379. unsigned register_index() const { return m_register_index; }
  380. RegisterIndex32 reg32() const { return static_cast<RegisterIndex32>(register_index()); }
  381. RegisterIndex16 reg16() const { return static_cast<RegisterIndex16>(register_index()); }
  382. RegisterIndex8 reg8() const { return static_cast<RegisterIndex8>(register_index()); }
  383. FpuRegisterIndex reg_fpu() const { return static_cast<FpuRegisterIndex>(register_index()); }
  384. // helpers to get the parts by name as in the spec
  385. u8 mod() const { return m_rm_byte >> 6; }
  386. u8 reg() const { return m_rm_byte >> 3 & 0b111; }
  387. u8 rm() const { return m_rm_byte & 0b111; }
  388. template<typename CPU, typename T>
  389. void write8(CPU&, Instruction const&, T);
  390. template<typename CPU, typename T>
  391. void write16(CPU&, Instruction const&, T);
  392. template<typename CPU, typename T>
  393. void write32(CPU&, Instruction const&, T);
  394. template<typename CPU, typename T>
  395. void write64(CPU&, Instruction const&, T);
  396. template<typename CPU, typename T>
  397. void write128(CPU&, Instruction const&, T);
  398. template<typename CPU, typename T>
  399. void write256(CPU&, Instruction const&, T);
  400. template<typename CPU>
  401. typename CPU::ValueWithShadowType8 read8(CPU&, Instruction const&);
  402. template<typename CPU>
  403. typename CPU::ValueWithShadowType16 read16(CPU&, Instruction const&);
  404. template<typename CPU>
  405. typename CPU::ValueWithShadowType32 read32(CPU&, Instruction const&);
  406. template<typename CPU>
  407. typename CPU::ValueWithShadowType64 read64(CPU&, Instruction const&);
  408. template<typename CPU>
  409. typename CPU::ValueWithShadowType128 read128(CPU&, Instruction const&);
  410. template<typename CPU>
  411. typename CPU::ValueWithShadowType256 read256(CPU&, Instruction const&);
  412. template<typename CPU>
  413. LogicalAddress resolve(const CPU&, Instruction const&);
  414. private:
  415. MemoryOrRegisterReference() = default;
  416. String to_string(Instruction const&) const;
  417. String to_string_a16() const;
  418. String to_string_a32() const;
  419. template<typename InstructionStreamType>
  420. void decode(InstructionStreamType&, bool a32);
  421. template<typename InstructionStreamType>
  422. void decode16(InstructionStreamType&);
  423. template<typename InstructionStreamType>
  424. void decode32(InstructionStreamType&);
  425. template<typename CPU>
  426. LogicalAddress resolve16(const CPU&, Optional<SegmentRegister>);
  427. template<typename CPU>
  428. LogicalAddress resolve32(const CPU&, Optional<SegmentRegister>);
  429. template<typename CPU>
  430. u32 evaluate_sib(const CPU&, SegmentRegister& default_segment) const;
  431. union {
  432. u32 m_displacement32 { 0 };
  433. u16 m_displacement16;
  434. };
  435. u8 m_rm_byte { 0 };
  436. u8 m_sib { 0 };
  437. u8 m_displacement_bytes { 0 };
  438. u8 m_register_index : 7 { 0x7f };
  439. bool m_has_sib : 1 { false };
  440. };
  441. class Instruction {
  442. public:
  443. template<typename InstructionStreamType>
  444. static Instruction from_stream(InstructionStreamType&, bool o32, bool a32);
  445. ~Instruction() = default;
  446. ALWAYS_INLINE MemoryOrRegisterReference& modrm() const { return m_modrm; }
  447. ALWAYS_INLINE InstructionHandler handler() const { return m_descriptor->handler; }
  448. bool has_segment_prefix() const { return m_segment_prefix != 0xff; }
  449. ALWAYS_INLINE Optional<SegmentRegister> segment_prefix() const
  450. {
  451. if (has_segment_prefix())
  452. return static_cast<SegmentRegister>(m_segment_prefix);
  453. return {};
  454. }
  455. bool has_address_size_override_prefix() const { return m_has_address_size_override_prefix; }
  456. bool has_operand_size_override_prefix() const { return m_has_operand_size_override_prefix; }
  457. bool has_lock_prefix() const { return m_has_lock_prefix; }
  458. bool has_rep_prefix() const { return m_rep_prefix; }
  459. u8 rep_prefix() const { return m_rep_prefix; }
  460. bool is_valid() const { return m_descriptor; }
  461. unsigned length() const;
  462. String mnemonic() const;
  463. u8 op() const { return m_op; }
  464. u8 modrm_byte() const { return m_modrm.m_rm_byte; }
  465. u8 slash() const { return (modrm_byte() >> 3) & 7; }
  466. u8 imm8() const { return m_imm1; }
  467. u16 imm16() const { return m_imm1; }
  468. u32 imm32() const { return m_imm1; }
  469. u8 imm8_1() const { return imm8(); }
  470. u8 imm8_2() const { return m_imm2; }
  471. u16 imm16_1() const { return imm16(); }
  472. u16 imm16_2() const { return m_imm2; }
  473. u32 imm32_1() const { return imm32(); }
  474. u32 imm32_2() const { return m_imm2; }
  475. u32 imm_address() const { return m_a32 ? imm32() : imm16(); }
  476. LogicalAddress imm_address16_16() const { return LogicalAddress(imm16_1(), imm16_2()); }
  477. LogicalAddress imm_address16_32() const { return LogicalAddress(imm16_1(), imm32_2()); }
  478. bool has_sub_op() const
  479. {
  480. return m_op == 0x0f;
  481. }
  482. unsigned register_index() const { return m_register_index; }
  483. RegisterIndex32 reg32() const { return static_cast<RegisterIndex32>(register_index()); }
  484. RegisterIndex16 reg16() const { return static_cast<RegisterIndex16>(register_index()); }
  485. RegisterIndex8 reg8() const { return static_cast<RegisterIndex8>(register_index()); }
  486. SegmentRegister segment_register() const { return static_cast<SegmentRegister>(register_index()); }
  487. u8 cc() const { return has_sub_op() ? m_sub_op & 0xf : m_op & 0xf; }
  488. bool a32() const { return m_a32; }
  489. String to_string(u32 origin, SymbolProvider const* = nullptr, bool x32 = true) const;
  490. private:
  491. template<typename InstructionStreamType>
  492. Instruction(InstructionStreamType&, bool o32, bool a32);
  493. void to_string_internal(StringBuilder&, u32 origin, SymbolProvider const*, bool x32) const;
  494. char const* reg8_name() const;
  495. char const* reg16_name() const;
  496. char const* reg32_name() const;
  497. InstructionDescriptor* m_descriptor { nullptr };
  498. mutable MemoryOrRegisterReference m_modrm;
  499. u32 m_imm1 { 0 };
  500. u32 m_imm2 { 0 };
  501. u8 m_segment_prefix { 0xff };
  502. u8 m_register_index { 0xff };
  503. u8 m_op { 0 };
  504. u8 m_sub_op { 0 };
  505. u8 m_extra_bytes { 0 };
  506. u8 m_rep_prefix { 0 };
  507. bool m_a32 : 1 { false };
  508. bool m_o32 : 1 { false };
  509. bool m_has_lock_prefix : 1 { false };
  510. bool m_has_operand_size_override_prefix : 1 { false };
  511. bool m_has_address_size_override_prefix : 1 { false };
  512. };
  513. template<typename CPU>
  514. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve16(const CPU& cpu, Optional<SegmentRegister> segment_prefix)
  515. {
  516. auto default_segment = SegmentRegister::DS;
  517. u16 offset = 0;
  518. switch (rm()) {
  519. case 0:
  520. offset = cpu.bx().value() + cpu.si().value() + m_displacement16;
  521. break;
  522. case 1:
  523. offset = cpu.bx().value() + cpu.di().value() + m_displacement16;
  524. break;
  525. case 2:
  526. default_segment = SegmentRegister::SS;
  527. offset = cpu.bp().value() + cpu.si().value() + m_displacement16;
  528. break;
  529. case 3:
  530. default_segment = SegmentRegister::SS;
  531. offset = cpu.bp().value() + cpu.di().value() + m_displacement16;
  532. break;
  533. case 4:
  534. offset = cpu.si().value() + m_displacement16;
  535. break;
  536. case 5:
  537. offset = cpu.di().value() + m_displacement16;
  538. break;
  539. case 6:
  540. if (mod() == 0)
  541. offset = m_displacement16;
  542. else {
  543. default_segment = SegmentRegister::SS;
  544. offset = cpu.bp().value() + m_displacement16;
  545. }
  546. break;
  547. default:
  548. offset = cpu.bx().value() + m_displacement16;
  549. break;
  550. }
  551. u16 segment = cpu.segment(segment_prefix.value_or(default_segment));
  552. return { segment, offset };
  553. }
  554. template<typename CPU>
  555. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve32(const CPU& cpu, Optional<SegmentRegister> segment_prefix)
  556. {
  557. auto default_segment = SegmentRegister::DS;
  558. u32 offset = 0;
  559. switch (rm()) {
  560. case 0 ... 3:
  561. case 6 ... 7:
  562. offset = cpu.const_gpr32((RegisterIndex32)(rm())).value() + m_displacement32;
  563. break;
  564. case 4:
  565. offset = evaluate_sib(cpu, default_segment);
  566. break;
  567. default: // 5
  568. if (mod() == 0) {
  569. offset = m_displacement32;
  570. break;
  571. } else {
  572. default_segment = SegmentRegister::SS;
  573. offset = cpu.ebp().value() + m_displacement32;
  574. break;
  575. }
  576. break;
  577. }
  578. u16 segment = cpu.segment(segment_prefix.value_or(default_segment));
  579. return { segment, offset };
  580. }
  581. template<typename CPU>
  582. ALWAYS_INLINE u32 MemoryOrRegisterReference::evaluate_sib(const CPU& cpu, SegmentRegister& default_segment) const
  583. {
  584. u32 scale_shift = m_sib >> 6;
  585. u32 index = 0;
  586. switch ((m_sib >> 3) & 0x07) {
  587. case 0 ... 3:
  588. case 5 ... 7:
  589. index = cpu.const_gpr32((RegisterIndex32)((m_sib >> 3) & 0x07)).value();
  590. break;
  591. case 4:
  592. index = 0;
  593. break;
  594. }
  595. u32 base = m_displacement32;
  596. switch (m_sib & 0x07) {
  597. case 0 ... 3:
  598. case 6 ... 7:
  599. base += cpu.const_gpr32((RegisterIndex32)(m_sib & 0x07)).value();
  600. break;
  601. case 4:
  602. default_segment = SegmentRegister::SS;
  603. base += cpu.esp().value();
  604. break;
  605. default: // 5
  606. switch (mod()) {
  607. case 0:
  608. break;
  609. case 1:
  610. case 2:
  611. default_segment = SegmentRegister::SS;
  612. base += cpu.ebp().value();
  613. break;
  614. default:
  615. VERIFY_NOT_REACHED();
  616. break;
  617. }
  618. break;
  619. }
  620. return (index << scale_shift) + base;
  621. }
  622. template<typename CPU, typename T>
  623. ALWAYS_INLINE void MemoryOrRegisterReference::write8(CPU& cpu, Instruction const& insn, T value)
  624. {
  625. if (is_register()) {
  626. cpu.gpr8(reg8()) = value;
  627. return;
  628. }
  629. auto address = resolve(cpu, insn);
  630. cpu.write_memory8(address, value);
  631. }
  632. template<typename CPU, typename T>
  633. ALWAYS_INLINE void MemoryOrRegisterReference::write16(CPU& cpu, Instruction const& insn, T value)
  634. {
  635. if (is_register()) {
  636. cpu.gpr16(reg16()) = value;
  637. return;
  638. }
  639. auto address = resolve(cpu, insn);
  640. cpu.write_memory16(address, value);
  641. }
  642. template<typename CPU, typename T>
  643. ALWAYS_INLINE void MemoryOrRegisterReference::write32(CPU& cpu, Instruction const& insn, T value)
  644. {
  645. if (is_register()) {
  646. cpu.gpr32(reg32()) = value;
  647. return;
  648. }
  649. auto address = resolve(cpu, insn);
  650. cpu.write_memory32(address, value);
  651. }
  652. template<typename CPU, typename T>
  653. ALWAYS_INLINE void MemoryOrRegisterReference::write64(CPU& cpu, Instruction const& insn, T value)
  654. {
  655. VERIFY(!is_register());
  656. auto address = resolve(cpu, insn);
  657. cpu.write_memory64(address, value);
  658. }
  659. template<typename CPU, typename T>
  660. ALWAYS_INLINE void MemoryOrRegisterReference::write128(CPU& cpu, Instruction const& insn, T value)
  661. {
  662. VERIFY(!is_register());
  663. auto address = resolve(cpu, insn);
  664. cpu.write_memory128(address, value);
  665. }
  666. template<typename CPU, typename T>
  667. ALWAYS_INLINE void MemoryOrRegisterReference::write256(CPU& cpu, Instruction const& insn, T value)
  668. {
  669. VERIFY(!is_register());
  670. auto address = resolve(cpu, insn);
  671. cpu.write_memory256(address, value);
  672. }
  673. template<typename CPU>
  674. ALWAYS_INLINE typename CPU::ValueWithShadowType8 MemoryOrRegisterReference::read8(CPU& cpu, Instruction const& insn)
  675. {
  676. if (is_register())
  677. return cpu.const_gpr8(reg8());
  678. auto address = resolve(cpu, insn);
  679. return cpu.read_memory8(address);
  680. }
  681. template<typename CPU>
  682. ALWAYS_INLINE typename CPU::ValueWithShadowType16 MemoryOrRegisterReference::read16(CPU& cpu, Instruction const& insn)
  683. {
  684. if (is_register())
  685. return cpu.const_gpr16(reg16());
  686. auto address = resolve(cpu, insn);
  687. return cpu.read_memory16(address);
  688. }
  689. template<typename CPU>
  690. ALWAYS_INLINE typename CPU::ValueWithShadowType32 MemoryOrRegisterReference::read32(CPU& cpu, Instruction const& insn)
  691. {
  692. if (is_register())
  693. return cpu.const_gpr32(reg32());
  694. auto address = resolve(cpu, insn);
  695. return cpu.read_memory32(address);
  696. }
  697. template<typename CPU>
  698. ALWAYS_INLINE typename CPU::ValueWithShadowType64 MemoryOrRegisterReference::read64(CPU& cpu, Instruction const& insn)
  699. {
  700. VERIFY(!is_register());
  701. auto address = resolve(cpu, insn);
  702. return cpu.read_memory64(address);
  703. }
  704. template<typename CPU>
  705. ALWAYS_INLINE typename CPU::ValueWithShadowType128 MemoryOrRegisterReference::read128(CPU& cpu, Instruction const& insn)
  706. {
  707. VERIFY(!is_register());
  708. auto address = resolve(cpu, insn);
  709. return cpu.read_memory128(address);
  710. }
  711. template<typename CPU>
  712. ALWAYS_INLINE typename CPU::ValueWithShadowType256 MemoryOrRegisterReference::read256(CPU& cpu, Instruction const& insn)
  713. {
  714. VERIFY(!is_register());
  715. auto address = resolve(cpu, insn);
  716. return cpu.read_memory256(address);
  717. }
  718. template<typename InstructionStreamType>
  719. ALWAYS_INLINE Instruction Instruction::from_stream(InstructionStreamType& stream, bool o32, bool a32)
  720. {
  721. return Instruction(stream, o32, a32);
  722. }
  723. ALWAYS_INLINE unsigned Instruction::length() const
  724. {
  725. unsigned len = 1;
  726. if (has_sub_op())
  727. ++len;
  728. if (m_descriptor->has_rm) {
  729. ++len;
  730. if (m_modrm.m_has_sib)
  731. ++len;
  732. len += m_modrm.m_displacement_bytes;
  733. }
  734. len += m_extra_bytes;
  735. return len;
  736. }
  737. ALWAYS_INLINE Optional<SegmentRegister> to_segment_prefix(u8 op)
  738. {
  739. switch (op) {
  740. case 0x26:
  741. return SegmentRegister::ES;
  742. case 0x2e:
  743. return SegmentRegister::CS;
  744. case 0x36:
  745. return SegmentRegister::SS;
  746. case 0x3e:
  747. return SegmentRegister::DS;
  748. case 0x64:
  749. return SegmentRegister::FS;
  750. case 0x65:
  751. return SegmentRegister::GS;
  752. default:
  753. return {};
  754. }
  755. }
  756. template<typename InstructionStreamType>
  757. ALWAYS_INLINE Instruction::Instruction(InstructionStreamType& stream, bool o32, bool a32)
  758. : m_a32(a32)
  759. , m_o32(o32)
  760. {
  761. u8 prefix_bytes = 0;
  762. for (;; ++prefix_bytes) {
  763. u8 opbyte = stream.read8();
  764. if (opbyte == Prefix::OperandSizeOverride) {
  765. m_o32 = !o32;
  766. m_has_operand_size_override_prefix = true;
  767. continue;
  768. }
  769. if (opbyte == Prefix::AddressSizeOverride) {
  770. m_a32 = !a32;
  771. m_has_address_size_override_prefix = true;
  772. continue;
  773. }
  774. if (opbyte == Prefix::REPZ || opbyte == Prefix::REPNZ) {
  775. m_rep_prefix = opbyte;
  776. continue;
  777. }
  778. if (opbyte == Prefix::LOCK) {
  779. m_has_lock_prefix = true;
  780. continue;
  781. }
  782. auto segment_prefix = to_segment_prefix(opbyte);
  783. if (segment_prefix.has_value()) {
  784. m_segment_prefix = (u8)segment_prefix.value();
  785. continue;
  786. }
  787. m_op = opbyte;
  788. break;
  789. }
  790. if (m_op == 0x0f) {
  791. m_sub_op = stream.read8();
  792. m_descriptor = m_o32 ? &s_0f_table32[m_sub_op] : &s_0f_table16[m_sub_op];
  793. } else {
  794. m_descriptor = m_o32 ? &s_table32[m_op] : &s_table16[m_op];
  795. }
  796. if (m_descriptor->format == __SSE) {
  797. if (m_rep_prefix == 0xF3) {
  798. m_descriptor = &s_sse_table_f3[m_sub_op];
  799. } else if (m_has_operand_size_override_prefix) {
  800. // This was unset while parsing the prefix initially
  801. m_o32 = true;
  802. m_descriptor = &s_sse_table_66[m_sub_op];
  803. } else {
  804. m_descriptor = &s_sse_table_np[m_sub_op];
  805. }
  806. }
  807. if (m_descriptor->has_rm) {
  808. // Consume ModR/M (may include SIB and displacement.)
  809. m_modrm.decode(stream, m_a32);
  810. m_register_index = m_modrm.reg();
  811. } else {
  812. if (has_sub_op())
  813. m_register_index = m_sub_op & 7;
  814. else
  815. m_register_index = m_op & 7;
  816. }
  817. bool has_slash = m_descriptor->format == MultibyteWithSlash;
  818. if (has_slash) {
  819. m_descriptor = &m_descriptor->slashes[slash()];
  820. if ((modrm_byte() & 0xc0) == 0xc0 && m_descriptor->slashes)
  821. m_descriptor = &m_descriptor->slashes[modrm_byte() & 7];
  822. }
  823. if (!m_descriptor->mnemonic) {
  824. if (has_sub_op()) {
  825. if (has_slash)
  826. warnln("Instruction {:02X} {:02X} /{} not understood", m_op, m_sub_op, slash());
  827. else
  828. warnln("Instruction {:02X} {:02X} not understood", m_op, m_sub_op);
  829. } else {
  830. if (has_slash)
  831. warnln("Instruction {:02X} /{} not understood", m_op, slash());
  832. else
  833. warnln("Instruction {:02X} not understood", m_op);
  834. }
  835. m_descriptor = nullptr;
  836. return;
  837. }
  838. auto imm1_bytes = m_descriptor->imm1_bytes_for_address_size(m_a32);
  839. auto imm2_bytes = m_descriptor->imm2_bytes_for_address_size(m_a32);
  840. // Consume immediates if present.
  841. switch (imm2_bytes) {
  842. case 1:
  843. m_imm2 = stream.read8();
  844. break;
  845. case 2:
  846. m_imm2 = stream.read16();
  847. break;
  848. case 4:
  849. m_imm2 = stream.read32();
  850. break;
  851. default:
  852. VERIFY(imm2_bytes == 0);
  853. break;
  854. }
  855. switch (imm1_bytes) {
  856. case 1:
  857. m_imm1 = stream.read8();
  858. break;
  859. case 2:
  860. m_imm1 = stream.read16();
  861. break;
  862. case 4:
  863. m_imm1 = stream.read32();
  864. break;
  865. default:
  866. VERIFY(imm1_bytes == 0);
  867. break;
  868. }
  869. m_extra_bytes = prefix_bytes + imm1_bytes + imm2_bytes;
  870. #ifdef DISALLOW_INVALID_LOCK_PREFIX
  871. if (m_has_lock_prefix && !m_descriptor->lock_prefix_allowed) {
  872. warnln("Instruction not allowed with LOCK prefix, this will raise #UD");
  873. m_descriptor = nullptr;
  874. }
  875. #endif
  876. }
  877. template<typename InstructionStreamType>
  878. ALWAYS_INLINE void MemoryOrRegisterReference::decode(InstructionStreamType& stream, bool a32)
  879. {
  880. m_rm_byte = stream.read8();
  881. if (a32) {
  882. decode32(stream);
  883. switch (m_displacement_bytes) {
  884. case 0:
  885. break;
  886. case 1:
  887. m_displacement32 = sign_extended_to<u32>(stream.read8());
  888. break;
  889. case 4:
  890. m_displacement32 = stream.read32();
  891. break;
  892. default:
  893. VERIFY_NOT_REACHED();
  894. break;
  895. }
  896. } else {
  897. decode16(stream);
  898. switch (m_displacement_bytes) {
  899. case 0:
  900. break;
  901. case 1:
  902. m_displacement16 = sign_extended_to<u16>(stream.read8());
  903. break;
  904. case 2:
  905. m_displacement16 = stream.read16();
  906. break;
  907. default:
  908. VERIFY_NOT_REACHED();
  909. break;
  910. }
  911. }
  912. }
  913. template<typename InstructionStreamType>
  914. ALWAYS_INLINE void MemoryOrRegisterReference::decode16(InstructionStreamType&)
  915. {
  916. switch (mod()) {
  917. case 0b00:
  918. if (rm() == 6)
  919. m_displacement_bytes = 2;
  920. else
  921. VERIFY(m_displacement_bytes == 0);
  922. break;
  923. case 0b01:
  924. m_displacement_bytes = 1;
  925. break;
  926. case 0b10:
  927. m_displacement_bytes = 2;
  928. break;
  929. case 0b11:
  930. m_register_index = rm();
  931. break;
  932. }
  933. }
  934. template<typename InstructionStreamType>
  935. ALWAYS_INLINE void MemoryOrRegisterReference::decode32(InstructionStreamType& stream)
  936. {
  937. switch (mod()) {
  938. case 0b00:
  939. if (rm() == 5)
  940. m_displacement_bytes = 4;
  941. break;
  942. case 0b01:
  943. m_displacement_bytes = 1;
  944. break;
  945. case 0b10:
  946. m_displacement_bytes = 4;
  947. break;
  948. case 0b11:
  949. m_register_index = rm();
  950. return;
  951. }
  952. m_has_sib = rm() == 4;
  953. if (m_has_sib) {
  954. m_sib = stream.read8();
  955. if ((m_sib & 0x07) == 5) {
  956. switch (mod()) {
  957. case 0b00:
  958. m_displacement_bytes = 4;
  959. break;
  960. case 0b01:
  961. m_displacement_bytes = 1;
  962. break;
  963. case 0b10:
  964. m_displacement_bytes = 4;
  965. break;
  966. default:
  967. VERIFY_NOT_REACHED();
  968. break;
  969. }
  970. }
  971. }
  972. }
  973. template<typename CPU>
  974. ALWAYS_INLINE LogicalAddress MemoryOrRegisterReference::resolve(const CPU& cpu, Instruction const& insn)
  975. {
  976. if (insn.a32())
  977. return resolve32(cpu, insn.segment_prefix());
  978. return resolve16(cpu, insn.segment_prefix());
  979. }
  980. }