1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
7 * Copyright (c) 2005 Keir Fraser
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
21 #include <linux/kvm_host.h>
22 #include "kvm_cache_regs.h"
23 #include "kvm_emulate.h"
24 #include <linux/stringify.h>
25 #include <asm/debugreg.h>
26 #include <asm/nospec-branch.h>
37 #define OpImplicit 1ull /* No generic decode */
38 #define OpReg 2ull /* Register */
39 #define OpMem 3ull /* Memory */
40 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
41 #define OpDI 5ull /* ES:DI/EDI/RDI */
42 #define OpMem64 6ull /* Memory, 64-bit */
43 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
44 #define OpDX 8ull /* DX register */
45 #define OpCL 9ull /* CL register (for shifts) */
46 #define OpImmByte 10ull /* 8-bit sign extended immediate */
47 #define OpOne 11ull /* Implied 1 */
48 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
49 #define OpMem16 13ull /* Memory operand (16-bit). */
50 #define OpMem32 14ull /* Memory operand (32-bit). */
51 #define OpImmU 15ull /* Immediate operand, zero extended */
52 #define OpSI 16ull /* SI/ESI/RSI */
53 #define OpImmFAddr 17ull /* Immediate far address */
54 #define OpMemFAddr 18ull /* Far address in memory */
55 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
56 #define OpES 20ull /* ES */
57 #define OpCS 21ull /* CS */
58 #define OpSS 22ull /* SS */
59 #define OpDS 23ull /* DS */
60 #define OpFS 24ull /* FS */
61 #define OpGS 25ull /* GS */
62 #define OpMem8 26ull /* 8-bit zero extended memory operand */
63 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
64 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
65 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
66 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
68 #define OpBits 5 /* Width of operand field */
69 #define OpMask ((1ull << OpBits) - 1)
72 * Opcode effective-address decode tables.
73 * Note that we only emulate instructions that have at least one memory
74 * operand (excluding implicit stack references). We assume that stack
75 * references and instruction fetches will never occur in special memory
76 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
80 /* Operand sizes: 8-bit operands or specified/overridden size. */
81 #define ByteOp (1<<0) /* 8-bit operands. */
82 /* Destination operand type. */
84 #define ImplicitOps (OpImplicit << DstShift)
85 #define DstReg (OpReg << DstShift)
86 #define DstMem (OpMem << DstShift)
87 #define DstAcc (OpAcc << DstShift)
88 #define DstDI (OpDI << DstShift)
89 #define DstMem64 (OpMem64 << DstShift)
90 #define DstMem16 (OpMem16 << DstShift)
91 #define DstImmUByte (OpImmUByte << DstShift)
92 #define DstDX (OpDX << DstShift)
93 #define DstAccLo (OpAccLo << DstShift)
94 #define DstMask (OpMask << DstShift)
95 /* Source operand type. */
97 #define SrcNone (OpNone << SrcShift)
98 #define SrcReg (OpReg << SrcShift)
99 #define SrcMem (OpMem << SrcShift)
100 #define SrcMem16 (OpMem16 << SrcShift)
101 #define SrcMem32 (OpMem32 << SrcShift)
102 #define SrcImm (OpImm << SrcShift)
103 #define SrcImmByte (OpImmByte << SrcShift)
104 #define SrcOne (OpOne << SrcShift)
105 #define SrcImmUByte (OpImmUByte << SrcShift)
106 #define SrcImmU (OpImmU << SrcShift)
107 #define SrcSI (OpSI << SrcShift)
108 #define SrcXLat (OpXLat << SrcShift)
109 #define SrcImmFAddr (OpImmFAddr << SrcShift)
110 #define SrcMemFAddr (OpMemFAddr << SrcShift)
111 #define SrcAcc (OpAcc << SrcShift)
112 #define SrcImmU16 (OpImmU16 << SrcShift)
113 #define SrcImm64 (OpImm64 << SrcShift)
114 #define SrcDX (OpDX << SrcShift)
115 #define SrcMem8 (OpMem8 << SrcShift)
116 #define SrcAccHi (OpAccHi << SrcShift)
117 #define SrcMask (OpMask << SrcShift)
118 #define BitOp (1<<11)
119 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
120 #define String (1<<13) /* String instruction (rep capable) */
121 #define Stack (1<<14) /* Stack instruction (push/pop) */
122 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
123 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
124 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
125 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
126 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
127 #define Escape (5<<15) /* Escape to coprocessor instruction */
128 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
129 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
130 #define Sse (1<<18) /* SSE Vector instruction */
131 /* Generic ModRM decode. */
132 #define ModRM (1<<19)
133 /* Destination is only written; never read. */
136 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
137 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
138 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
139 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
140 #define Undefined (1<<25) /* No Such Instruction */
141 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
142 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
144 #define PageTable (1 << 29) /* instruction used to write page table */
145 #define NotImpl (1 << 30) /* instruction is not implemented */
146 /* Source 2 operand type */
147 #define Src2Shift (31)
148 #define Src2None (OpNone << Src2Shift)
149 #define Src2Mem (OpMem << Src2Shift)
150 #define Src2CL (OpCL << Src2Shift)
151 #define Src2ImmByte (OpImmByte << Src2Shift)
152 #define Src2One (OpOne << Src2Shift)
153 #define Src2Imm (OpImm << Src2Shift)
154 #define Src2ES (OpES << Src2Shift)
155 #define Src2CS (OpCS << Src2Shift)
156 #define Src2SS (OpSS << Src2Shift)
157 #define Src2DS (OpDS << Src2Shift)
158 #define Src2FS (OpFS << Src2Shift)
159 #define Src2GS (OpGS << Src2Shift)
160 #define Src2Mask (OpMask << Src2Shift)
161 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
162 #define AlignMask ((u64)7 << 41)
163 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
164 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
165 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
166 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
167 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
168 #define NoWrite ((u64)1 << 45) /* No writeback */
169 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
170 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
171 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
172 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
173 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
174 #define NearBranch ((u64)1 << 52) /* Near branches */
175 #define No16 ((u64)1 << 53) /* No 16 bit operand */
176 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
177 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
179 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
181 #define X2(x...) x, x
182 #define X3(x...) X2(x), x
183 #define X4(x...) X2(x), X2(x)
184 #define X5(x...) X4(x), x
185 #define X6(x...) X4(x), X2(x)
186 #define X7(x...) X4(x), X3(x)
187 #define X8(x...) X4(x), X4(x)
188 #define X16(x...) X8(x), X8(x)
190 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
191 #define FASTOP_SIZE 8
197 int (*execute)(struct x86_emulate_ctxt *ctxt);
198 const struct opcode *group;
199 const struct group_dual *gdual;
200 const struct gprefix *gprefix;
201 const struct escape *esc;
202 const struct instr_dual *idual;
203 const struct mode_dual *mdual;
204 void (*fastop)(struct fastop *fake);
206 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
210 struct opcode mod012[8];
211 struct opcode mod3[8];
215 struct opcode pfx_no;
216 struct opcode pfx_66;
217 struct opcode pfx_f2;
218 struct opcode pfx_f3;
223 struct opcode high[64];
227 struct opcode mod012;
232 struct opcode mode32;
233 struct opcode mode64;
236 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
238 enum x86_transfer_type {
240 X86_TRANSFER_CALL_JMP,
242 X86_TRANSFER_TASK_SWITCH,
245 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
247 if (!(ctxt->regs_valid & (1 << nr))) {
248 ctxt->regs_valid |= 1 << nr;
249 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
251 return ctxt->_regs[nr];
254 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
256 ctxt->regs_valid |= 1 << nr;
257 ctxt->regs_dirty |= 1 << nr;
258 return &ctxt->_regs[nr];
261 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
264 return reg_write(ctxt, nr);
267 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
271 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
272 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
275 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
277 ctxt->regs_dirty = 0;
278 ctxt->regs_valid = 0;
282 * These EFLAGS bits are restored from saved value during emulation, and
283 * any changes are written back to the saved value after emulation.
285 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
286 X86_EFLAGS_PF|X86_EFLAGS_CF)
295 * fastop functions have a special calling convention:
300 * flags: rflags (in/out)
301 * ex: rsi (in:fastop pointer, out:zero if exception)
303 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
304 * different operand sizes can be reached by calculation, rather than a jump
305 * table (which would be bigger than the code).
307 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
309 #define __FOP_FUNC(name) \
310 ".align " __stringify(FASTOP_SIZE) " \n\t" \
311 ".type " name ", @function \n\t" \
314 #define FOP_FUNC(name) \
317 #define __FOP_RET(name) \
319 ".size " name ", .-" name "\n\t"
321 #define FOP_RET(name) \
324 #define FOP_START(op) \
325 extern void em_##op(struct fastop *fake); \
326 asm(".pushsection .text, \"ax\" \n\t" \
327 ".global em_" #op " \n\t" \
328 ".align " __stringify(FASTOP_SIZE) " \n\t" \
334 #define __FOPNOP(name) \
339 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
341 #define FOP1E(op, dst) \
342 __FOP_FUNC(#op "_" #dst) \
343 "10: " #op " %" #dst " \n\t" \
344 __FOP_RET(#op "_" #dst)
346 #define FOP1EEX(op, dst) \
347 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
349 #define FASTOP1(op) \
354 ON64(FOP1E(op##q, rax)) \
357 /* 1-operand, using src2 (for MUL/DIV r/m) */
358 #define FASTOP1SRC2(op, name) \
363 ON64(FOP1E(op, rcx)) \
366 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
367 #define FASTOP1SRC2EX(op, name) \
372 ON64(FOP1EEX(op, rcx)) \
375 #define FOP2E(op, dst, src) \
376 __FOP_FUNC(#op "_" #dst "_" #src) \
377 #op " %" #src ", %" #dst " \n\t" \
378 __FOP_RET(#op "_" #dst "_" #src)
380 #define FASTOP2(op) \
382 FOP2E(op##b, al, dl) \
383 FOP2E(op##w, ax, dx) \
384 FOP2E(op##l, eax, edx) \
385 ON64(FOP2E(op##q, rax, rdx)) \
388 /* 2 operand, word only */
389 #define FASTOP2W(op) \
392 FOP2E(op##w, ax, dx) \
393 FOP2E(op##l, eax, edx) \
394 ON64(FOP2E(op##q, rax, rdx)) \
397 /* 2 operand, src is CL */
398 #define FASTOP2CL(op) \
400 FOP2E(op##b, al, cl) \
401 FOP2E(op##w, ax, cl) \
402 FOP2E(op##l, eax, cl) \
403 ON64(FOP2E(op##q, rax, cl)) \
406 /* 2 operand, src and dest are reversed */
407 #define FASTOP2R(op, name) \
409 FOP2E(op##b, dl, al) \
410 FOP2E(op##w, dx, ax) \
411 FOP2E(op##l, edx, eax) \
412 ON64(FOP2E(op##q, rdx, rax)) \
415 #define FOP3E(op, dst, src, src2) \
416 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
417 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
418 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
420 /* 3-operand, word-only, src2=cl */
421 #define FASTOP3WCL(op) \
424 FOP3E(op##w, ax, dx, cl) \
425 FOP3E(op##l, eax, edx, cl) \
426 ON64(FOP3E(op##q, rax, rdx, cl)) \
429 /* Special case for SETcc - 1 instruction per cc */
430 #define FOP_SETCC(op) \
432 ".type " #op ", @function \n\t" \
437 asm(".pushsection .fixup, \"ax\"\n"
438 ".global kvm_fastop_exception \n"
439 "kvm_fastop_exception: xor %esi, %esi; ret\n"
463 "pushf; sbb %al, %al; popf \n\t"
468 * XXX: inoutclob user must know where the argument is being expanded.
469 * Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault.
471 #define asm_safe(insn, inoutclob...) \
475 asm volatile("1:" insn "\n" \
477 ".pushsection .fixup, \"ax\"\n" \
478 "3: movl $1, %[_fault]\n" \
481 _ASM_EXTABLE(1b, 3b) \
482 : [_fault] "+qm"(_fault) inoutclob ); \
484 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
487 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
488 enum x86_intercept intercept,
489 enum x86_intercept_stage stage)
491 struct x86_instruction_info info = {
492 .intercept = intercept,
493 .rep_prefix = ctxt->rep_prefix,
494 .modrm_mod = ctxt->modrm_mod,
495 .modrm_reg = ctxt->modrm_reg,
496 .modrm_rm = ctxt->modrm_rm,
497 .src_val = ctxt->src.val64,
498 .dst_val = ctxt->dst.val64,
499 .src_bytes = ctxt->src.bytes,
500 .dst_bytes = ctxt->dst.bytes,
501 .ad_bytes = ctxt->ad_bytes,
502 .next_rip = ctxt->eip,
505 return ctxt->ops->intercept(ctxt, &info, stage);
508 static void assign_masked(ulong *dest, ulong src, ulong mask)
510 *dest = (*dest & ~mask) | (src & mask);
513 static void assign_register(unsigned long *reg, u64 val, int bytes)
515 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
518 *(u8 *)reg = (u8)val;
521 *(u16 *)reg = (u16)val;
525 break; /* 64b: zero-extend */
532 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
534 return (1UL << (ctxt->ad_bytes << 3)) - 1;
537 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
540 struct desc_struct ss;
542 if (ctxt->mode == X86EMUL_MODE_PROT64)
544 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
545 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
548 static int stack_size(struct x86_emulate_ctxt *ctxt)
550 return (__fls(stack_mask(ctxt)) + 1) >> 3;
553 /* Access/update address held in a register, based on addressing mode. */
554 static inline unsigned long
555 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
557 if (ctxt->ad_bytes == sizeof(unsigned long))
560 return reg & ad_mask(ctxt);
563 static inline unsigned long
564 register_address(struct x86_emulate_ctxt *ctxt, int reg)
566 return address_mask(ctxt, reg_read(ctxt, reg));
569 static void masked_increment(ulong *reg, ulong mask, int inc)
571 assign_masked(reg, *reg + inc, mask);
575 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
577 ulong *preg = reg_rmw(ctxt, reg);
579 assign_register(preg, *preg + inc, ctxt->ad_bytes);
582 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
584 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
587 static u32 desc_limit_scaled(struct desc_struct *desc)
589 u32 limit = get_desc_limit(desc);
591 return desc->g ? (limit << 12) | 0xfff : limit;
594 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
596 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
599 return ctxt->ops->get_cached_segment_base(ctxt, seg);
602 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
603 u32 error, bool valid)
606 ctxt->exception.vector = vec;
607 ctxt->exception.error_code = error;
608 ctxt->exception.error_code_valid = valid;
609 return X86EMUL_PROPAGATE_FAULT;
612 static int emulate_db(struct x86_emulate_ctxt *ctxt)
614 return emulate_exception(ctxt, DB_VECTOR, 0, false);
617 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
619 return emulate_exception(ctxt, GP_VECTOR, err, true);
622 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
624 return emulate_exception(ctxt, SS_VECTOR, err, true);
627 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
629 return emulate_exception(ctxt, UD_VECTOR, 0, false);
632 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
634 return emulate_exception(ctxt, TS_VECTOR, err, true);
637 static int emulate_de(struct x86_emulate_ctxt *ctxt)
639 return emulate_exception(ctxt, DE_VECTOR, 0, false);
642 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
644 return emulate_exception(ctxt, NM_VECTOR, 0, false);
647 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
650 struct desc_struct desc;
652 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
656 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
661 struct desc_struct desc;
663 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
664 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
667 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
669 return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
672 static inline bool emul_is_noncanonical_address(u64 la,
673 struct x86_emulate_ctxt *ctxt)
675 return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
679 * x86 defines three classes of vector instructions: explicitly
680 * aligned, explicitly unaligned, and the rest, which change behaviour
681 * depending on whether they're AVX encoded or not.
683 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
684 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
685 * 512 bytes of data must be aligned to a 16 byte boundary.
687 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
689 u64 alignment = ctxt->d & AlignMask;
691 if (likely(size < 16))
706 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
707 struct segmented_address addr,
708 unsigned *max_size, unsigned size,
709 bool write, bool fetch,
710 enum x86emul_mode mode, ulong *linear)
712 struct desc_struct desc;
719 la = seg_base(ctxt, addr.seg) + addr.ea;
722 case X86EMUL_MODE_PROT64:
724 va_bits = ctxt_virt_addr_bits(ctxt);
725 if (get_canonical(la, va_bits) != la)
728 *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
729 if (size > *max_size)
733 *linear = la = (u32)la;
734 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
738 /* code segment in protected mode or read-only data segment */
739 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
740 || !(desc.type & 2)) && write)
742 /* unreadable code segment */
743 if (!fetch && (desc.type & 8) && !(desc.type & 2))
745 lim = desc_limit_scaled(&desc);
746 if (!(desc.type & 8) && (desc.type & 4)) {
747 /* expand-down segment */
750 lim = desc.d ? 0xffffffff : 0xffff;
754 if (lim == 0xffffffff)
757 *max_size = (u64)lim + 1 - addr.ea;
758 if (size > *max_size)
763 if (la & (insn_alignment(ctxt, size) - 1))
764 return emulate_gp(ctxt, 0);
765 return X86EMUL_CONTINUE;
767 if (addr.seg == VCPU_SREG_SS)
768 return emulate_ss(ctxt, 0);
770 return emulate_gp(ctxt, 0);
773 static int linearize(struct x86_emulate_ctxt *ctxt,
774 struct segmented_address addr,
775 unsigned size, bool write,
779 return __linearize(ctxt, addr, &max_size, size, write, false,
783 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
784 enum x86emul_mode mode)
789 struct segmented_address addr = { .seg = VCPU_SREG_CS,
792 if (ctxt->op_bytes != sizeof(unsigned long))
793 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
794 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
795 if (rc == X86EMUL_CONTINUE)
796 ctxt->_eip = addr.ea;
800 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
802 return assign_eip(ctxt, dst, ctxt->mode);
805 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
806 const struct desc_struct *cs_desc)
808 enum x86emul_mode mode = ctxt->mode;
812 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
816 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
818 mode = X86EMUL_MODE_PROT64;
820 mode = X86EMUL_MODE_PROT32; /* temporary value */
823 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
824 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
825 rc = assign_eip(ctxt, dst, mode);
826 if (rc == X86EMUL_CONTINUE)
831 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
833 return assign_eip_near(ctxt, ctxt->_eip + rel);
836 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
837 void *data, unsigned size)
839 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
842 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
843 ulong linear, void *data,
846 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
849 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
850 struct segmented_address addr,
857 rc = linearize(ctxt, addr, size, false, &linear);
858 if (rc != X86EMUL_CONTINUE)
860 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
863 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
864 struct segmented_address addr,
871 rc = linearize(ctxt, addr, size, true, &linear);
872 if (rc != X86EMUL_CONTINUE)
874 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
878 * Prefetch the remaining bytes of the instruction without crossing page
879 * boundary if they are not in fetch_cache yet.
881 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
884 unsigned size, max_size;
885 unsigned long linear;
886 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
887 struct segmented_address addr = { .seg = VCPU_SREG_CS,
888 .ea = ctxt->eip + cur_size };
891 * We do not know exactly how many bytes will be needed, and
892 * __linearize is expensive, so fetch as much as possible. We
893 * just have to avoid going beyond the 15 byte limit, the end
894 * of the segment, or the end of the page.
896 * __linearize is called with size 0 so that it does not do any
897 * boundary check itself. Instead, we use max_size to check
900 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
902 if (unlikely(rc != X86EMUL_CONTINUE))
905 size = min_t(unsigned, 15UL ^ cur_size, max_size);
906 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
909 * One instruction can only straddle two pages,
910 * and one has been loaded at the beginning of
911 * x86_decode_insn. So, if not enough bytes
912 * still, we must have hit the 15-byte boundary.
914 if (unlikely(size < op_size))
915 return emulate_gp(ctxt, 0);
917 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
918 size, &ctxt->exception);
919 if (unlikely(rc != X86EMUL_CONTINUE))
921 ctxt->fetch.end += size;
922 return X86EMUL_CONTINUE;
925 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
928 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
930 if (unlikely(done_size < size))
931 return __do_insn_fetch_bytes(ctxt, size - done_size);
933 return X86EMUL_CONTINUE;
936 /* Fetch next part of the instruction being emulated. */
937 #define insn_fetch(_type, _ctxt) \
940 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
941 if (rc != X86EMUL_CONTINUE) \
943 ctxt->_eip += sizeof(_type); \
944 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
945 ctxt->fetch.ptr += sizeof(_type); \
949 #define insn_fetch_arr(_arr, _size, _ctxt) \
951 rc = do_insn_fetch_bytes(_ctxt, _size); \
952 if (rc != X86EMUL_CONTINUE) \
954 ctxt->_eip += (_size); \
955 memcpy(_arr, ctxt->fetch.ptr, _size); \
956 ctxt->fetch.ptr += (_size); \
960 * Given the 'reg' portion of a ModRM byte, and a register block, return a
961 * pointer into the block that addresses the relevant register.
962 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
964 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
968 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
970 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
971 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
973 p = reg_rmw(ctxt, modrm_reg);
977 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
978 struct segmented_address addr,
979 u16 *size, unsigned long *address, int op_bytes)
986 rc = segmented_read_std(ctxt, addr, size, 2);
987 if (rc != X86EMUL_CONTINUE)
990 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1004 FASTOP1SRC2(mul, mul_ex);
1005 FASTOP1SRC2(imul, imul_ex);
1006 FASTOP1SRC2EX(div, div_ex);
1007 FASTOP1SRC2EX(idiv, idiv_ex);
1036 FASTOP2R(cmp, cmp_r);
1038 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1040 /* If src is zero, do not writeback, but update flags */
1041 if (ctxt->src.val == 0)
1042 ctxt->dst.type = OP_NONE;
1043 return fastop(ctxt, em_bsf);
1046 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1048 /* If src is zero, do not writeback, but update flags */
1049 if (ctxt->src.val == 0)
1050 ctxt->dst.type = OP_NONE;
1051 return fastop(ctxt, em_bsr);
1054 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1057 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1059 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1060 asm("push %[flags]; popf; " CALL_NOSPEC
1061 : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1065 static void fetch_register_operand(struct operand *op)
1067 switch (op->bytes) {
1069 op->val = *(u8 *)op->addr.reg;
1072 op->val = *(u16 *)op->addr.reg;
1075 op->val = *(u32 *)op->addr.reg;
1078 op->val = *(u64 *)op->addr.reg;
1083 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1085 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1086 return emulate_nm(ctxt);
1089 asm volatile("fninit");
1091 return X86EMUL_CONTINUE;
1094 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1098 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1099 return emulate_nm(ctxt);
1102 asm volatile("fnstcw %0": "+m"(fcw));
1105 ctxt->dst.val = fcw;
1107 return X86EMUL_CONTINUE;
1110 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1114 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1115 return emulate_nm(ctxt);
1118 asm volatile("fnstsw %0": "+m"(fsw));
1121 ctxt->dst.val = fsw;
1123 return X86EMUL_CONTINUE;
1126 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1129 unsigned reg = ctxt->modrm_reg;
1131 if (!(ctxt->d & ModRM))
1132 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1134 if (ctxt->d & Sse) {
1138 kvm_read_sse_reg(reg, &op->vec_val);
1141 if (ctxt->d & Mmx) {
1150 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1151 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1153 fetch_register_operand(op);
1154 op->orig_val = op->val;
1157 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1159 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1160 ctxt->modrm_seg = VCPU_SREG_SS;
1163 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1167 int index_reg, base_reg, scale;
1168 int rc = X86EMUL_CONTINUE;
1171 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1172 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1173 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1175 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1176 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1177 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1178 ctxt->modrm_seg = VCPU_SREG_DS;
1180 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1182 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1183 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1185 if (ctxt->d & Sse) {
1188 op->addr.xmm = ctxt->modrm_rm;
1189 kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1192 if (ctxt->d & Mmx) {
1195 op->addr.mm = ctxt->modrm_rm & 7;
1198 fetch_register_operand(op);
1204 if (ctxt->ad_bytes == 2) {
1205 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1206 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1207 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1208 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1210 /* 16-bit ModR/M decode. */
1211 switch (ctxt->modrm_mod) {
1213 if (ctxt->modrm_rm == 6)
1214 modrm_ea += insn_fetch(u16, ctxt);
1217 modrm_ea += insn_fetch(s8, ctxt);
1220 modrm_ea += insn_fetch(u16, ctxt);
1223 switch (ctxt->modrm_rm) {
1225 modrm_ea += bx + si;
1228 modrm_ea += bx + di;
1231 modrm_ea += bp + si;
1234 modrm_ea += bp + di;
1243 if (ctxt->modrm_mod != 0)
1250 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1251 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1252 ctxt->modrm_seg = VCPU_SREG_SS;
1253 modrm_ea = (u16)modrm_ea;
1255 /* 32/64-bit ModR/M decode. */
1256 if ((ctxt->modrm_rm & 7) == 4) {
1257 sib = insn_fetch(u8, ctxt);
1258 index_reg |= (sib >> 3) & 7;
1259 base_reg |= sib & 7;
1262 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1263 modrm_ea += insn_fetch(s32, ctxt);
1265 modrm_ea += reg_read(ctxt, base_reg);
1266 adjust_modrm_seg(ctxt, base_reg);
1267 /* Increment ESP on POP [ESP] */
1268 if ((ctxt->d & IncSP) &&
1269 base_reg == VCPU_REGS_RSP)
1270 modrm_ea += ctxt->op_bytes;
1273 modrm_ea += reg_read(ctxt, index_reg) << scale;
1274 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1275 modrm_ea += insn_fetch(s32, ctxt);
1276 if (ctxt->mode == X86EMUL_MODE_PROT64)
1277 ctxt->rip_relative = 1;
1279 base_reg = ctxt->modrm_rm;
1280 modrm_ea += reg_read(ctxt, base_reg);
1281 adjust_modrm_seg(ctxt, base_reg);
1283 switch (ctxt->modrm_mod) {
1285 modrm_ea += insn_fetch(s8, ctxt);
1288 modrm_ea += insn_fetch(s32, ctxt);
1292 op->addr.mem.ea = modrm_ea;
1293 if (ctxt->ad_bytes != 8)
1294 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1300 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1303 int rc = X86EMUL_CONTINUE;
1306 switch (ctxt->ad_bytes) {
1308 op->addr.mem.ea = insn_fetch(u16, ctxt);
1311 op->addr.mem.ea = insn_fetch(u32, ctxt);
1314 op->addr.mem.ea = insn_fetch(u64, ctxt);
1321 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1325 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1326 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1328 if (ctxt->src.bytes == 2)
1329 sv = (s16)ctxt->src.val & (s16)mask;
1330 else if (ctxt->src.bytes == 4)
1331 sv = (s32)ctxt->src.val & (s32)mask;
1333 sv = (s64)ctxt->src.val & (s64)mask;
1335 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1336 ctxt->dst.addr.mem.ea + (sv >> 3));
1339 /* only subword offset */
1340 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1343 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1344 unsigned long addr, void *dest, unsigned size)
1347 struct read_cache *mc = &ctxt->mem_read;
1349 if (mc->pos < mc->end)
1352 WARN_ON((mc->end + size) >= sizeof(mc->data));
1354 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1356 if (rc != X86EMUL_CONTINUE)
1362 memcpy(dest, mc->data + mc->pos, size);
1364 return X86EMUL_CONTINUE;
1367 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1368 struct segmented_address addr,
1375 rc = linearize(ctxt, addr, size, false, &linear);
1376 if (rc != X86EMUL_CONTINUE)
1378 return read_emulated(ctxt, linear, data, size);
1381 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1382 struct segmented_address addr,
1389 rc = linearize(ctxt, addr, size, true, &linear);
1390 if (rc != X86EMUL_CONTINUE)
1392 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1396 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1397 struct segmented_address addr,
1398 const void *orig_data, const void *data,
1404 rc = linearize(ctxt, addr, size, true, &linear);
1405 if (rc != X86EMUL_CONTINUE)
1407 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1408 size, &ctxt->exception);
1411 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1412 unsigned int size, unsigned short port,
1415 struct read_cache *rc = &ctxt->io_read;
1417 if (rc->pos == rc->end) { /* refill pio read ahead */
1418 unsigned int in_page, n;
1419 unsigned int count = ctxt->rep_prefix ?
1420 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1421 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1422 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1423 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1424 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1427 rc->pos = rc->end = 0;
1428 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1433 if (ctxt->rep_prefix && (ctxt->d & String) &&
1434 !(ctxt->eflags & X86_EFLAGS_DF)) {
1435 ctxt->dst.data = rc->data + rc->pos;
1436 ctxt->dst.type = OP_MEM_STR;
1437 ctxt->dst.count = (rc->end - rc->pos) / size;
1440 memcpy(dest, rc->data + rc->pos, size);
1446 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1447 u16 index, struct desc_struct *desc)
1452 ctxt->ops->get_idt(ctxt, &dt);
1454 if (dt.size < index * 8 + 7)
1455 return emulate_gp(ctxt, index << 3 | 0x2);
1457 addr = dt.address + index * 8;
1458 return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1461 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1462 u16 selector, struct desc_ptr *dt)
1464 const struct x86_emulate_ops *ops = ctxt->ops;
1467 if (selector & 1 << 2) {
1468 struct desc_struct desc;
1471 memset(dt, 0, sizeof(*dt));
1472 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1476 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1477 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1479 ops->get_gdt(ctxt, dt);
1482 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1483 u16 selector, ulong *desc_addr_p)
1486 u16 index = selector >> 3;
1489 get_descriptor_table_ptr(ctxt, selector, &dt);
1491 if (dt.size < index * 8 + 7)
1492 return emulate_gp(ctxt, selector & 0xfffc);
1494 addr = dt.address + index * 8;
1496 #ifdef CONFIG_X86_64
1497 if (addr >> 32 != 0) {
1500 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1501 if (!(efer & EFER_LMA))
1506 *desc_addr_p = addr;
1507 return X86EMUL_CONTINUE;
1510 /* allowed just for 8 bytes segments */
1511 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1512 u16 selector, struct desc_struct *desc,
1517 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1518 if (rc != X86EMUL_CONTINUE)
1521 return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1524 /* allowed just for 8 bytes segments */
1525 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1526 u16 selector, struct desc_struct *desc)
1531 rc = get_descriptor_ptr(ctxt, selector, &addr);
1532 if (rc != X86EMUL_CONTINUE)
1535 return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1538 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1539 u16 selector, int seg, u8 cpl,
1540 enum x86_transfer_type transfer,
1541 struct desc_struct *desc)
1543 struct desc_struct seg_desc, old_desc;
1545 unsigned err_vec = GP_VECTOR;
1547 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1553 memset(&seg_desc, 0, sizeof(seg_desc));
1555 if (ctxt->mode == X86EMUL_MODE_REAL) {
1556 /* set real mode segment descriptor (keep limit etc. for
1558 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1559 set_desc_base(&seg_desc, selector << 4);
1561 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1562 /* VM86 needs a clean new segment descriptor */
1563 set_desc_base(&seg_desc, selector << 4);
1564 set_desc_limit(&seg_desc, 0xffff);
1574 /* TR should be in GDT only */
1575 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1578 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1579 if (null_selector) {
1580 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1583 if (seg == VCPU_SREG_SS) {
1584 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1588 * ctxt->ops->set_segment expects the CPL to be in
1589 * SS.DPL, so fake an expand-up 32-bit data segment.
1599 /* Skip all following checks */
1603 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1604 if (ret != X86EMUL_CONTINUE)
1607 err_code = selector & 0xfffc;
1608 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1611 /* can't load system descriptor into segment selector */
1612 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1613 if (transfer == X86_TRANSFER_CALL_JMP)
1614 return X86EMUL_UNHANDLEABLE;
1619 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1628 * segment is not a writable data segment or segment
1629 * selector's RPL != CPL or segment selector's RPL != CPL
1631 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1635 if (!(seg_desc.type & 8))
1638 if (seg_desc.type & 4) {
1644 if (rpl > cpl || dpl != cpl)
1647 /* in long-mode d/b must be clear if l is set */
1648 if (seg_desc.d && seg_desc.l) {
1651 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1652 if (efer & EFER_LMA)
1656 /* CS(RPL) <- CPL */
1657 selector = (selector & 0xfffc) | cpl;
1660 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1662 old_desc = seg_desc;
1663 seg_desc.type |= 2; /* busy */
1664 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1665 sizeof(seg_desc), &ctxt->exception);
1666 if (ret != X86EMUL_CONTINUE)
1669 case VCPU_SREG_LDTR:
1670 if (seg_desc.s || seg_desc.type != 2)
1673 default: /* DS, ES, FS, or GS */
1675 * segment is not a data or readable code segment or
1676 * ((segment is a data or nonconforming code segment)
1677 * and (both RPL and CPL > DPL))
1679 if ((seg_desc.type & 0xa) == 0x8 ||
1680 (((seg_desc.type & 0xc) != 0xc) &&
1681 (rpl > dpl && cpl > dpl)))
1687 /* mark segment as accessed */
1688 if (!(seg_desc.type & 1)) {
1690 ret = write_segment_descriptor(ctxt, selector,
1692 if (ret != X86EMUL_CONTINUE)
1695 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1696 ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1697 if (ret != X86EMUL_CONTINUE)
1699 if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1700 ((u64)base3 << 32), ctxt))
1701 return emulate_gp(ctxt, 0);
1704 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1707 return X86EMUL_CONTINUE;
1709 return emulate_exception(ctxt, err_vec, err_code, true);
1712 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1713 u16 selector, int seg)
1715 u8 cpl = ctxt->ops->cpl(ctxt);
1718 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1719 * they can load it at CPL<3 (Intel's manual says only LSS can,
1722 * However, the Intel manual says that putting IST=1/DPL=3 in
1723 * an interrupt gate will result in SS=3 (the AMD manual instead
1724 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1725 * and only forbid it here.
1727 if (seg == VCPU_SREG_SS && selector == 3 &&
1728 ctxt->mode == X86EMUL_MODE_PROT64)
1729 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1731 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1732 X86_TRANSFER_NONE, NULL);
1735 static void write_register_operand(struct operand *op)
1737 return assign_register(op->addr.reg, op->val, op->bytes);
1740 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1744 write_register_operand(op);
1747 if (ctxt->lock_prefix)
1748 return segmented_cmpxchg(ctxt,
1754 return segmented_write(ctxt,
1760 return segmented_write(ctxt,
1763 op->bytes * op->count);
1766 kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1769 kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1777 return X86EMUL_CONTINUE;
1780 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1782 struct segmented_address addr;
1784 rsp_increment(ctxt, -bytes);
1785 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1786 addr.seg = VCPU_SREG_SS;
1788 return segmented_write(ctxt, addr, data, bytes);
1791 static int em_push(struct x86_emulate_ctxt *ctxt)
1793 /* Disable writeback. */
1794 ctxt->dst.type = OP_NONE;
1795 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1798 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1799 void *dest, int len)
1802 struct segmented_address addr;
1804 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1805 addr.seg = VCPU_SREG_SS;
1806 rc = segmented_read(ctxt, addr, dest, len);
1807 if (rc != X86EMUL_CONTINUE)
1810 rsp_increment(ctxt, len);
1814 static int em_pop(struct x86_emulate_ctxt *ctxt)
1816 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1819 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1820 void *dest, int len)
1823 unsigned long val, change_mask;
1824 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1825 int cpl = ctxt->ops->cpl(ctxt);
1827 rc = emulate_pop(ctxt, &val, len);
1828 if (rc != X86EMUL_CONTINUE)
1831 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1832 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1833 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1834 X86_EFLAGS_AC | X86_EFLAGS_ID;
1836 switch(ctxt->mode) {
1837 case X86EMUL_MODE_PROT64:
1838 case X86EMUL_MODE_PROT32:
1839 case X86EMUL_MODE_PROT16:
1841 change_mask |= X86_EFLAGS_IOPL;
1843 change_mask |= X86_EFLAGS_IF;
1845 case X86EMUL_MODE_VM86:
1847 return emulate_gp(ctxt, 0);
1848 change_mask |= X86_EFLAGS_IF;
1850 default: /* real mode */
1851 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1855 *(unsigned long *)dest =
1856 (ctxt->eflags & ~change_mask) | (val & change_mask);
1861 static int em_popf(struct x86_emulate_ctxt *ctxt)
1863 ctxt->dst.type = OP_REG;
1864 ctxt->dst.addr.reg = &ctxt->eflags;
1865 ctxt->dst.bytes = ctxt->op_bytes;
1866 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1869 static int em_enter(struct x86_emulate_ctxt *ctxt)
1872 unsigned frame_size = ctxt->src.val;
1873 unsigned nesting_level = ctxt->src2.val & 31;
1877 return X86EMUL_UNHANDLEABLE;
1879 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1880 rc = push(ctxt, &rbp, stack_size(ctxt));
1881 if (rc != X86EMUL_CONTINUE)
1883 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1885 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1886 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1888 return X86EMUL_CONTINUE;
1891 static int em_leave(struct x86_emulate_ctxt *ctxt)
1893 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1895 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1898 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1900 int seg = ctxt->src2.val;
1902 ctxt->src.val = get_segment_selector(ctxt, seg);
1903 if (ctxt->op_bytes == 4) {
1904 rsp_increment(ctxt, -2);
1908 return em_push(ctxt);
1911 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1913 int seg = ctxt->src2.val;
1914 unsigned long selector;
1917 rc = emulate_pop(ctxt, &selector, 2);
1918 if (rc != X86EMUL_CONTINUE)
1921 if (ctxt->modrm_reg == VCPU_SREG_SS)
1922 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1923 if (ctxt->op_bytes > 2)
1924 rsp_increment(ctxt, ctxt->op_bytes - 2);
1926 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1930 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1932 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1933 int rc = X86EMUL_CONTINUE;
1934 int reg = VCPU_REGS_RAX;
1936 while (reg <= VCPU_REGS_RDI) {
1937 (reg == VCPU_REGS_RSP) ?
1938 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1941 if (rc != X86EMUL_CONTINUE)
1950 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1952 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1953 return em_push(ctxt);
1956 static int em_popa(struct x86_emulate_ctxt *ctxt)
1958 int rc = X86EMUL_CONTINUE;
1959 int reg = VCPU_REGS_RDI;
1962 while (reg >= VCPU_REGS_RAX) {
1963 if (reg == VCPU_REGS_RSP) {
1964 rsp_increment(ctxt, ctxt->op_bytes);
1968 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
1969 if (rc != X86EMUL_CONTINUE)
1971 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
1977 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1979 const struct x86_emulate_ops *ops = ctxt->ops;
1986 /* TODO: Add limit checks */
1987 ctxt->src.val = ctxt->eflags;
1989 if (rc != X86EMUL_CONTINUE)
1992 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
1994 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1996 if (rc != X86EMUL_CONTINUE)
1999 ctxt->src.val = ctxt->_eip;
2001 if (rc != X86EMUL_CONTINUE)
2004 ops->get_idt(ctxt, &dt);
2006 eip_addr = dt.address + (irq << 2);
2007 cs_addr = dt.address + (irq << 2) + 2;
2009 rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2010 if (rc != X86EMUL_CONTINUE)
2013 rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2014 if (rc != X86EMUL_CONTINUE)
2017 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2018 if (rc != X86EMUL_CONTINUE)
2026 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2030 invalidate_registers(ctxt);
2031 rc = __emulate_int_real(ctxt, irq);
2032 if (rc == X86EMUL_CONTINUE)
2033 writeback_registers(ctxt);
2037 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2039 switch(ctxt->mode) {
2040 case X86EMUL_MODE_REAL:
2041 return __emulate_int_real(ctxt, irq);
2042 case X86EMUL_MODE_VM86:
2043 case X86EMUL_MODE_PROT16:
2044 case X86EMUL_MODE_PROT32:
2045 case X86EMUL_MODE_PROT64:
2047 /* Protected mode interrupts unimplemented yet */
2048 return X86EMUL_UNHANDLEABLE;
2052 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2054 int rc = X86EMUL_CONTINUE;
2055 unsigned long temp_eip = 0;
2056 unsigned long temp_eflags = 0;
2057 unsigned long cs = 0;
2058 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2059 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2060 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2061 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2062 X86_EFLAGS_AC | X86_EFLAGS_ID |
2064 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2067 /* TODO: Add stack limit check */
2069 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2071 if (rc != X86EMUL_CONTINUE)
2074 if (temp_eip & ~0xffff)
2075 return emulate_gp(ctxt, 0);
2077 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2079 if (rc != X86EMUL_CONTINUE)
2082 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2084 if (rc != X86EMUL_CONTINUE)
2087 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2089 if (rc != X86EMUL_CONTINUE)
2092 ctxt->_eip = temp_eip;
2094 if (ctxt->op_bytes == 4)
2095 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2096 else if (ctxt->op_bytes == 2) {
2097 ctxt->eflags &= ~0xffff;
2098 ctxt->eflags |= temp_eflags;
2101 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2102 ctxt->eflags |= X86_EFLAGS_FIXED;
2103 ctxt->ops->set_nmi_mask(ctxt, false);
2108 static int em_iret(struct x86_emulate_ctxt *ctxt)
2110 switch(ctxt->mode) {
2111 case X86EMUL_MODE_REAL:
2112 return emulate_iret_real(ctxt);
2113 case X86EMUL_MODE_VM86:
2114 case X86EMUL_MODE_PROT16:
2115 case X86EMUL_MODE_PROT32:
2116 case X86EMUL_MODE_PROT64:
2118 /* iret from protected mode unimplemented yet */
2119 return X86EMUL_UNHANDLEABLE;
2123 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2127 struct desc_struct new_desc;
2128 u8 cpl = ctxt->ops->cpl(ctxt);
2130 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2132 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2133 X86_TRANSFER_CALL_JMP,
2135 if (rc != X86EMUL_CONTINUE)
2138 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2139 /* Error handling is not implemented. */
2140 if (rc != X86EMUL_CONTINUE)
2141 return X86EMUL_UNHANDLEABLE;
2146 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2148 return assign_eip_near(ctxt, ctxt->src.val);
2151 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2156 old_eip = ctxt->_eip;
2157 rc = assign_eip_near(ctxt, ctxt->src.val);
2158 if (rc != X86EMUL_CONTINUE)
2160 ctxt->src.val = old_eip;
2165 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2167 u64 old = ctxt->dst.orig_val64;
2169 if (ctxt->dst.bytes == 16)
2170 return X86EMUL_UNHANDLEABLE;
2172 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2173 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2174 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2175 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2176 ctxt->eflags &= ~X86_EFLAGS_ZF;
2178 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2179 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2181 ctxt->eflags |= X86_EFLAGS_ZF;
2183 return X86EMUL_CONTINUE;
2186 static int em_ret(struct x86_emulate_ctxt *ctxt)
2191 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2192 if (rc != X86EMUL_CONTINUE)
2195 return assign_eip_near(ctxt, eip);
2198 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2201 unsigned long eip, cs;
2202 int cpl = ctxt->ops->cpl(ctxt);
2203 struct desc_struct new_desc;
2205 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2206 if (rc != X86EMUL_CONTINUE)
2208 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2209 if (rc != X86EMUL_CONTINUE)
2211 /* Outer-privilege level return is not implemented */
2212 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2213 return X86EMUL_UNHANDLEABLE;
2214 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2217 if (rc != X86EMUL_CONTINUE)
2219 rc = assign_eip_far(ctxt, eip, &new_desc);
2220 /* Error handling is not implemented. */
2221 if (rc != X86EMUL_CONTINUE)
2222 return X86EMUL_UNHANDLEABLE;
2227 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2231 rc = em_ret_far(ctxt);
2232 if (rc != X86EMUL_CONTINUE)
2234 rsp_increment(ctxt, ctxt->src.val);
2235 return X86EMUL_CONTINUE;
2238 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2240 /* Save real source value, then compare EAX against destination. */
2241 ctxt->dst.orig_val = ctxt->dst.val;
2242 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2243 ctxt->src.orig_val = ctxt->src.val;
2244 ctxt->src.val = ctxt->dst.orig_val;
2245 fastop(ctxt, em_cmp);
2247 if (ctxt->eflags & X86_EFLAGS_ZF) {
2248 /* Success: write back to memory; no update of EAX */
2249 ctxt->src.type = OP_NONE;
2250 ctxt->dst.val = ctxt->src.orig_val;
2252 /* Failure: write the value we saw to EAX. */
2253 ctxt->src.type = OP_REG;
2254 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2255 ctxt->src.val = ctxt->dst.orig_val;
2256 /* Create write-cycle to dest by writing the same value */
2257 ctxt->dst.val = ctxt->dst.orig_val;
2259 return X86EMUL_CONTINUE;
2262 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2264 int seg = ctxt->src2.val;
2268 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2270 rc = load_segment_descriptor(ctxt, sel, seg);
2271 if (rc != X86EMUL_CONTINUE)
2274 ctxt->dst.val = ctxt->src.val;
2278 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2280 #ifdef CONFIG_X86_64
2281 return ctxt->ops->guest_has_long_mode(ctxt);
2287 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2289 desc->g = (flags >> 23) & 1;
2290 desc->d = (flags >> 22) & 1;
2291 desc->l = (flags >> 21) & 1;
2292 desc->avl = (flags >> 20) & 1;
2293 desc->p = (flags >> 15) & 1;
2294 desc->dpl = (flags >> 13) & 3;
2295 desc->s = (flags >> 12) & 1;
2296 desc->type = (flags >> 8) & 15;
2299 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
2302 struct desc_struct desc;
2306 selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
2309 offset = 0x7f84 + n * 12;
2311 offset = 0x7f2c + (n - 3) * 12;
2313 set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
2314 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
2315 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
2316 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2317 return X86EMUL_CONTINUE;
2320 #ifdef CONFIG_X86_64
2321 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
2324 struct desc_struct desc;
2329 offset = 0x7e00 + n * 16;
2331 selector = GET_SMSTATE(u16, smstate, offset);
2332 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
2333 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
2334 set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
2335 base3 = GET_SMSTATE(u32, smstate, offset + 12);
2337 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2338 return X86EMUL_CONTINUE;
2342 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2343 u64 cr0, u64 cr3, u64 cr4)
2348 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2350 if (cr4 & X86_CR4_PCIDE) {
2355 bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2357 return X86EMUL_UNHANDLEABLE;
2360 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2361 * Then enable protected mode. However, PCID cannot be enabled
2362 * if EFER.LMA=0, so set it separately.
2364 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2366 return X86EMUL_UNHANDLEABLE;
2368 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2370 return X86EMUL_UNHANDLEABLE;
2372 if (cr4 & X86_CR4_PCIDE) {
2373 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2375 return X86EMUL_UNHANDLEABLE;
2377 bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2379 return X86EMUL_UNHANDLEABLE;
2384 return X86EMUL_CONTINUE;
2387 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
2388 const char *smstate)
2390 struct desc_struct desc;
2393 u32 val, cr0, cr3, cr4;
2396 cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
2397 cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
2398 ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
2399 ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
2401 for (i = 0; i < 8; i++)
2402 *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
2404 val = GET_SMSTATE(u32, smstate, 0x7fcc);
2406 if (ctxt->ops->set_dr(ctxt, 6, val))
2407 return X86EMUL_UNHANDLEABLE;
2409 val = GET_SMSTATE(u32, smstate, 0x7fc8);
2411 if (ctxt->ops->set_dr(ctxt, 7, val))
2412 return X86EMUL_UNHANDLEABLE;
2414 selector = GET_SMSTATE(u32, smstate, 0x7fc4);
2415 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
2416 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
2417 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
2418 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2420 selector = GET_SMSTATE(u32, smstate, 0x7fc0);
2421 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
2422 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
2423 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
2424 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2426 dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
2427 dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
2428 ctxt->ops->set_gdt(ctxt, &dt);
2430 dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
2431 dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
2432 ctxt->ops->set_idt(ctxt, &dt);
2434 for (i = 0; i < 6; i++) {
2435 int r = rsm_load_seg_32(ctxt, smstate, i);
2436 if (r != X86EMUL_CONTINUE)
2440 cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
2442 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
2444 return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2447 #ifdef CONFIG_X86_64
2448 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
2449 const char *smstate)
2451 struct desc_struct desc;
2453 u64 val, cr0, cr3, cr4;
2458 for (i = 0; i < 16; i++)
2459 *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
2461 ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
2462 ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
2464 val = GET_SMSTATE(u64, smstate, 0x7f68);
2466 if (ctxt->ops->set_dr(ctxt, 6, val))
2467 return X86EMUL_UNHANDLEABLE;
2469 val = GET_SMSTATE(u64, smstate, 0x7f60);
2471 if (ctxt->ops->set_dr(ctxt, 7, val))
2472 return X86EMUL_UNHANDLEABLE;
2474 cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
2475 cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
2476 cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
2477 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
2478 val = GET_SMSTATE(u64, smstate, 0x7ed0);
2480 if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
2481 return X86EMUL_UNHANDLEABLE;
2483 selector = GET_SMSTATE(u32, smstate, 0x7e90);
2484 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
2485 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
2486 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
2487 base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
2488 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2490 dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
2491 dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
2492 ctxt->ops->set_idt(ctxt, &dt);
2494 selector = GET_SMSTATE(u32, smstate, 0x7e70);
2495 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
2496 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
2497 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
2498 base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
2499 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2501 dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
2502 dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
2503 ctxt->ops->set_gdt(ctxt, &dt);
2505 r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2506 if (r != X86EMUL_CONTINUE)
2509 for (i = 0; i < 6; i++) {
2510 r = rsm_load_seg_64(ctxt, smstate, i);
2511 if (r != X86EMUL_CONTINUE)
2515 return X86EMUL_CONTINUE;
2519 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2521 unsigned long cr0, cr4, efer;
2526 if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
2527 return emulate_ud(ctxt);
2529 smbase = ctxt->ops->get_smbase(ctxt);
2531 ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
2532 if (ret != X86EMUL_CONTINUE)
2533 return X86EMUL_UNHANDLEABLE;
2535 if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2536 ctxt->ops->set_nmi_mask(ctxt, false);
2538 ctxt->ops->exiting_smm(ctxt);
2541 * Get back to real mode, to prepare a safe state in which to load
2542 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2543 * supports long mode.
2545 if (emulator_has_longmode(ctxt)) {
2546 struct desc_struct cs_desc;
2548 /* Zero CR4.PCIDE before CR0.PG. */
2549 cr4 = ctxt->ops->get_cr(ctxt, 4);
2550 if (cr4 & X86_CR4_PCIDE)
2551 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2553 /* A 32-bit code segment is required to clear EFER.LMA. */
2554 memset(&cs_desc, 0, sizeof(cs_desc));
2556 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2557 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2560 /* For the 64-bit case, this will clear EFER.LMA. */
2561 cr0 = ctxt->ops->get_cr(ctxt, 0);
2562 if (cr0 & X86_CR0_PE)
2563 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2565 if (emulator_has_longmode(ctxt)) {
2566 /* Clear CR4.PAE before clearing EFER.LME. */
2567 cr4 = ctxt->ops->get_cr(ctxt, 4);
2568 if (cr4 & X86_CR4_PAE)
2569 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2571 /* And finally go back to 32-bit mode. */
2573 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2577 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
2578 * state (e.g. enter guest mode) before loading state from the SMM
2581 if (ctxt->ops->leave_smm(ctxt, buf))
2582 goto emulate_shutdown;
2584 #ifdef CONFIG_X86_64
2585 if (emulator_has_longmode(ctxt))
2586 ret = rsm_load_state_64(ctxt, buf);
2589 ret = rsm_load_state_32(ctxt, buf);
2591 if (ret != X86EMUL_CONTINUE)
2592 goto emulate_shutdown;
2595 * Note, the ctxt->ops callbacks are responsible for handling side
2596 * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
2597 * runtime updates, etc... If that changes, e.g. this flow is moved
2598 * out of the emulator to make it look more like enter_smm(), then
2599 * those side effects need to be explicitly handled for both success
2602 return X86EMUL_CONTINUE;
2605 ctxt->ops->triple_fault(ctxt);
2606 return X86EMUL_CONTINUE;
2610 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2611 struct desc_struct *cs, struct desc_struct *ss)
2613 cs->l = 0; /* will be adjusted later */
2614 set_desc_base(cs, 0); /* flat segment */
2615 cs->g = 1; /* 4kb granularity */
2616 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2617 cs->type = 0x0b; /* Read, Execute, Accessed */
2619 cs->dpl = 0; /* will be adjusted later */
2624 set_desc_base(ss, 0); /* flat segment */
2625 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2626 ss->g = 1; /* 4kb granularity */
2628 ss->type = 0x03; /* Read/Write, Accessed */
2629 ss->d = 1; /* 32bit stack segment */
2636 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2638 u32 eax, ebx, ecx, edx;
2641 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2642 return is_guest_vendor_intel(ebx, ecx, edx);
2645 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2647 const struct x86_emulate_ops *ops = ctxt->ops;
2648 u32 eax, ebx, ecx, edx;
2651 * syscall should always be enabled in longmode - so only become
2652 * vendor specific (cpuid) if other modes are active...
2654 if (ctxt->mode == X86EMUL_MODE_PROT64)
2659 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2661 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2662 * 64bit guest with a 32bit compat-app running will #UD !! While this
2663 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2664 * AMD can't behave like Intel.
2666 if (is_guest_vendor_intel(ebx, ecx, edx))
2669 if (is_guest_vendor_amd(ebx, ecx, edx) ||
2670 is_guest_vendor_hygon(ebx, ecx, edx))
2674 * default: (not Intel, not AMD, not Hygon), apply Intel's
2680 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2682 const struct x86_emulate_ops *ops = ctxt->ops;
2683 struct desc_struct cs, ss;
2688 /* syscall is not available in real mode */
2689 if (ctxt->mode == X86EMUL_MODE_REAL ||
2690 ctxt->mode == X86EMUL_MODE_VM86)
2691 return emulate_ud(ctxt);
2693 if (!(em_syscall_is_enabled(ctxt)))
2694 return emulate_ud(ctxt);
2696 ops->get_msr(ctxt, MSR_EFER, &efer);
2697 if (!(efer & EFER_SCE))
2698 return emulate_ud(ctxt);
2700 setup_syscalls_segments(ctxt, &cs, &ss);
2701 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2703 cs_sel = (u16)(msr_data & 0xfffc);
2704 ss_sel = (u16)(msr_data + 8);
2706 if (efer & EFER_LMA) {
2710 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2711 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2713 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2714 if (efer & EFER_LMA) {
2715 #ifdef CONFIG_X86_64
2716 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2719 ctxt->mode == X86EMUL_MODE_PROT64 ?
2720 MSR_LSTAR : MSR_CSTAR, &msr_data);
2721 ctxt->_eip = msr_data;
2723 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2724 ctxt->eflags &= ~msr_data;
2725 ctxt->eflags |= X86_EFLAGS_FIXED;
2729 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2730 ctxt->_eip = (u32)msr_data;
2732 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2735 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2736 return X86EMUL_CONTINUE;
2739 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2741 const struct x86_emulate_ops *ops = ctxt->ops;
2742 struct desc_struct cs, ss;
2747 ops->get_msr(ctxt, MSR_EFER, &efer);
2748 /* inject #GP if in real mode */
2749 if (ctxt->mode == X86EMUL_MODE_REAL)
2750 return emulate_gp(ctxt, 0);
2753 * Not recognized on AMD in compat mode (but is recognized in legacy
2756 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2757 && !vendor_intel(ctxt))
2758 return emulate_ud(ctxt);
2760 /* sysenter/sysexit have not been tested in 64bit mode. */
2761 if (ctxt->mode == X86EMUL_MODE_PROT64)
2762 return X86EMUL_UNHANDLEABLE;
2764 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2765 if ((msr_data & 0xfffc) == 0x0)
2766 return emulate_gp(ctxt, 0);
2768 setup_syscalls_segments(ctxt, &cs, &ss);
2769 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2770 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2771 ss_sel = cs_sel + 8;
2772 if (efer & EFER_LMA) {
2777 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2778 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2780 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2781 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2783 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2784 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2786 if (efer & EFER_LMA)
2787 ctxt->mode = X86EMUL_MODE_PROT64;
2789 return X86EMUL_CONTINUE;
2792 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2794 const struct x86_emulate_ops *ops = ctxt->ops;
2795 struct desc_struct cs, ss;
2796 u64 msr_data, rcx, rdx;
2798 u16 cs_sel = 0, ss_sel = 0;
2800 /* inject #GP if in real mode or Virtual 8086 mode */
2801 if (ctxt->mode == X86EMUL_MODE_REAL ||
2802 ctxt->mode == X86EMUL_MODE_VM86)
2803 return emulate_gp(ctxt, 0);
2805 setup_syscalls_segments(ctxt, &cs, &ss);
2807 if ((ctxt->rex_prefix & 0x8) != 0x0)
2808 usermode = X86EMUL_MODE_PROT64;
2810 usermode = X86EMUL_MODE_PROT32;
2812 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2813 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2817 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2819 case X86EMUL_MODE_PROT32:
2820 cs_sel = (u16)(msr_data + 16);
2821 if ((msr_data & 0xfffc) == 0x0)
2822 return emulate_gp(ctxt, 0);
2823 ss_sel = (u16)(msr_data + 24);
2827 case X86EMUL_MODE_PROT64:
2828 cs_sel = (u16)(msr_data + 32);
2829 if (msr_data == 0x0)
2830 return emulate_gp(ctxt, 0);
2831 ss_sel = cs_sel + 8;
2834 if (emul_is_noncanonical_address(rcx, ctxt) ||
2835 emul_is_noncanonical_address(rdx, ctxt))
2836 return emulate_gp(ctxt, 0);
2839 cs_sel |= SEGMENT_RPL_MASK;
2840 ss_sel |= SEGMENT_RPL_MASK;
2842 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2843 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2846 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2848 return X86EMUL_CONTINUE;
2851 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2854 if (ctxt->mode == X86EMUL_MODE_REAL)
2856 if (ctxt->mode == X86EMUL_MODE_VM86)
2858 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2859 return ctxt->ops->cpl(ctxt) > iopl;
2862 #define VMWARE_PORT_VMPORT (0x5658)
2863 #define VMWARE_PORT_VMRPC (0x5659)
2865 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2868 const struct x86_emulate_ops *ops = ctxt->ops;
2869 struct desc_struct tr_seg;
2872 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2873 unsigned mask = (1 << len) - 1;
2877 * VMware allows access to these ports even if denied
2878 * by TSS I/O permission bitmap. Mimic behavior.
2880 if (enable_vmware_backdoor &&
2881 ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2884 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2887 if (desc_limit_scaled(&tr_seg) < 103)
2889 base = get_desc_base(&tr_seg);
2890 #ifdef CONFIG_X86_64
2891 base |= ((u64)base3) << 32;
2893 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2894 if (r != X86EMUL_CONTINUE)
2896 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2898 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2899 if (r != X86EMUL_CONTINUE)
2901 if ((perm >> bit_idx) & mask)
2906 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2912 if (emulator_bad_iopl(ctxt))
2913 if (!emulator_io_port_access_allowed(ctxt, port, len))
2916 ctxt->perm_ok = true;
2921 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2924 * Intel CPUs mask the counter and pointers in quite strange
2925 * manner when ECX is zero due to REP-string optimizations.
2927 #ifdef CONFIG_X86_64
2928 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2931 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2934 case 0xa4: /* movsb */
2935 case 0xa5: /* movsd/w */
2936 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2938 case 0xaa: /* stosb */
2939 case 0xab: /* stosd/w */
2940 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2945 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2946 struct tss_segment_16 *tss)
2948 tss->ip = ctxt->_eip;
2949 tss->flag = ctxt->eflags;
2950 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2951 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2952 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2953 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2954 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2955 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2956 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2957 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2959 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2960 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2961 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2962 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2963 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2966 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2967 struct tss_segment_16 *tss)
2972 ctxt->_eip = tss->ip;
2973 ctxt->eflags = tss->flag | 2;
2974 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2975 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2976 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2977 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2978 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2979 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2980 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2981 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2984 * SDM says that segment selectors are loaded before segment
2987 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2988 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2989 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2990 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2991 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2996 * Now load segment descriptors. If fault happens at this stage
2997 * it is handled in a context of new task
2999 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
3000 X86_TRANSFER_TASK_SWITCH, NULL);
3001 if (ret != X86EMUL_CONTINUE)
3003 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3004 X86_TRANSFER_TASK_SWITCH, NULL);
3005 if (ret != X86EMUL_CONTINUE)
3007 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3008 X86_TRANSFER_TASK_SWITCH, NULL);
3009 if (ret != X86EMUL_CONTINUE)
3011 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3012 X86_TRANSFER_TASK_SWITCH, NULL);
3013 if (ret != X86EMUL_CONTINUE)
3015 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3016 X86_TRANSFER_TASK_SWITCH, NULL);
3017 if (ret != X86EMUL_CONTINUE)
3020 return X86EMUL_CONTINUE;
3023 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
3024 u16 tss_selector, u16 old_tss_sel,
3025 ulong old_tss_base, struct desc_struct *new_desc)
3027 struct tss_segment_16 tss_seg;
3029 u32 new_tss_base = get_desc_base(new_desc);
3031 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3032 if (ret != X86EMUL_CONTINUE)
3035 save_state_to_tss16(ctxt, &tss_seg);
3037 ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3038 if (ret != X86EMUL_CONTINUE)
3041 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3042 if (ret != X86EMUL_CONTINUE)
3045 if (old_tss_sel != 0xffff) {
3046 tss_seg.prev_task_link = old_tss_sel;
3048 ret = linear_write_system(ctxt, new_tss_base,
3049 &tss_seg.prev_task_link,
3050 sizeof(tss_seg.prev_task_link));
3051 if (ret != X86EMUL_CONTINUE)
3055 return load_state_from_tss16(ctxt, &tss_seg);
3058 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
3059 struct tss_segment_32 *tss)
3061 /* CR3 and ldt selector are not saved intentionally */
3062 tss->eip = ctxt->_eip;
3063 tss->eflags = ctxt->eflags;
3064 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
3065 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
3066 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
3067 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3068 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3069 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3070 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3071 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3073 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3074 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3075 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3076 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3077 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3078 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3081 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3082 struct tss_segment_32 *tss)
3087 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3088 return emulate_gp(ctxt, 0);
3089 ctxt->_eip = tss->eip;
3090 ctxt->eflags = tss->eflags | 2;
3092 /* General purpose registers */
3093 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3094 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3095 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3096 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3097 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3098 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3099 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3100 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3103 * SDM says that segment selectors are loaded before segment
3104 * descriptors. This is important because CPL checks will
3107 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3108 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3109 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3110 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3111 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3112 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3113 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3116 * If we're switching between Protected Mode and VM86, we need to make
3117 * sure to update the mode before loading the segment descriptors so
3118 * that the selectors are interpreted correctly.
3120 if (ctxt->eflags & X86_EFLAGS_VM) {
3121 ctxt->mode = X86EMUL_MODE_VM86;
3124 ctxt->mode = X86EMUL_MODE_PROT32;
3129 * Now load segment descriptors. If fault happens at this stage
3130 * it is handled in a context of new task
3132 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3133 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3134 if (ret != X86EMUL_CONTINUE)
3136 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3137 X86_TRANSFER_TASK_SWITCH, NULL);
3138 if (ret != X86EMUL_CONTINUE)
3140 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3141 X86_TRANSFER_TASK_SWITCH, NULL);
3142 if (ret != X86EMUL_CONTINUE)
3144 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3145 X86_TRANSFER_TASK_SWITCH, NULL);
3146 if (ret != X86EMUL_CONTINUE)
3148 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3149 X86_TRANSFER_TASK_SWITCH, NULL);
3150 if (ret != X86EMUL_CONTINUE)
3152 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3153 X86_TRANSFER_TASK_SWITCH, NULL);
3154 if (ret != X86EMUL_CONTINUE)
3156 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3157 X86_TRANSFER_TASK_SWITCH, NULL);
3162 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3163 u16 tss_selector, u16 old_tss_sel,
3164 ulong old_tss_base, struct desc_struct *new_desc)
3166 struct tss_segment_32 tss_seg;
3168 u32 new_tss_base = get_desc_base(new_desc);
3169 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3170 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3172 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3173 if (ret != X86EMUL_CONTINUE)
3176 save_state_to_tss32(ctxt, &tss_seg);
3178 /* Only GP registers and segment selectors are saved */
3179 ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3180 ldt_sel_offset - eip_offset);
3181 if (ret != X86EMUL_CONTINUE)
3184 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3185 if (ret != X86EMUL_CONTINUE)
3188 if (old_tss_sel != 0xffff) {
3189 tss_seg.prev_task_link = old_tss_sel;
3191 ret = linear_write_system(ctxt, new_tss_base,
3192 &tss_seg.prev_task_link,
3193 sizeof(tss_seg.prev_task_link));
3194 if (ret != X86EMUL_CONTINUE)
3198 return load_state_from_tss32(ctxt, &tss_seg);
3201 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3202 u16 tss_selector, int idt_index, int reason,
3203 bool has_error_code, u32 error_code)
3205 const struct x86_emulate_ops *ops = ctxt->ops;
3206 struct desc_struct curr_tss_desc, next_tss_desc;
3208 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3209 ulong old_tss_base =
3210 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3212 ulong desc_addr, dr7;
3214 /* FIXME: old_tss_base == ~0 ? */
3216 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3217 if (ret != X86EMUL_CONTINUE)
3219 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3220 if (ret != X86EMUL_CONTINUE)
3223 /* FIXME: check that next_tss_desc is tss */
3226 * Check privileges. The three cases are task switch caused by...
3228 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3229 * 2. Exception/IRQ/iret: No check is performed
3230 * 3. jmp/call to TSS/task-gate: No check is performed since the
3231 * hardware checks it before exiting.
3233 if (reason == TASK_SWITCH_GATE) {
3234 if (idt_index != -1) {
3235 /* Software interrupts */
3236 struct desc_struct task_gate_desc;
3239 ret = read_interrupt_descriptor(ctxt, idt_index,
3241 if (ret != X86EMUL_CONTINUE)
3244 dpl = task_gate_desc.dpl;
3245 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3246 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3250 desc_limit = desc_limit_scaled(&next_tss_desc);
3251 if (!next_tss_desc.p ||
3252 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3253 desc_limit < 0x2b)) {
3254 return emulate_ts(ctxt, tss_selector & 0xfffc);
3257 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3258 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3259 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3262 if (reason == TASK_SWITCH_IRET)
3263 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3265 /* set back link to prev task only if NT bit is set in eflags
3266 note that old_tss_sel is not used after this point */
3267 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3268 old_tss_sel = 0xffff;
3270 if (next_tss_desc.type & 8)
3271 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3272 old_tss_base, &next_tss_desc);
3274 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3275 old_tss_base, &next_tss_desc);
3276 if (ret != X86EMUL_CONTINUE)
3279 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3280 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3282 if (reason != TASK_SWITCH_IRET) {
3283 next_tss_desc.type |= (1 << 1); /* set busy flag */
3284 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3287 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3288 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3290 if (has_error_code) {
3291 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3292 ctxt->lock_prefix = 0;
3293 ctxt->src.val = (unsigned long) error_code;
3294 ret = em_push(ctxt);
3297 ops->get_dr(ctxt, 7, &dr7);
3298 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3303 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3304 u16 tss_selector, int idt_index, int reason,
3305 bool has_error_code, u32 error_code)
3309 invalidate_registers(ctxt);
3310 ctxt->_eip = ctxt->eip;
3311 ctxt->dst.type = OP_NONE;
3313 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3314 has_error_code, error_code);
3316 if (rc == X86EMUL_CONTINUE) {
3317 ctxt->eip = ctxt->_eip;
3318 writeback_registers(ctxt);
3321 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3324 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3327 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3329 register_address_increment(ctxt, reg, df * op->bytes);
3330 op->addr.mem.ea = register_address(ctxt, reg);
3333 static int em_das(struct x86_emulate_ctxt *ctxt)
3336 bool af, cf, old_cf;
3338 cf = ctxt->eflags & X86_EFLAGS_CF;
3344 af = ctxt->eflags & X86_EFLAGS_AF;
3345 if ((al & 0x0f) > 9 || af) {
3347 cf = old_cf | (al >= 250);
3352 if (old_al > 0x99 || old_cf) {
3358 /* Set PF, ZF, SF */
3359 ctxt->src.type = OP_IMM;
3361 ctxt->src.bytes = 1;
3362 fastop(ctxt, em_or);
3363 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3365 ctxt->eflags |= X86_EFLAGS_CF;
3367 ctxt->eflags |= X86_EFLAGS_AF;
3368 return X86EMUL_CONTINUE;
3371 static int em_aam(struct x86_emulate_ctxt *ctxt)
3375 if (ctxt->src.val == 0)
3376 return emulate_de(ctxt);
3378 al = ctxt->dst.val & 0xff;
3379 ah = al / ctxt->src.val;
3380 al %= ctxt->src.val;
3382 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3384 /* Set PF, ZF, SF */
3385 ctxt->src.type = OP_IMM;
3387 ctxt->src.bytes = 1;
3388 fastop(ctxt, em_or);
3390 return X86EMUL_CONTINUE;
3393 static int em_aad(struct x86_emulate_ctxt *ctxt)
3395 u8 al = ctxt->dst.val & 0xff;
3396 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3398 al = (al + (ah * ctxt->src.val)) & 0xff;
3400 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3402 /* Set PF, ZF, SF */
3403 ctxt->src.type = OP_IMM;
3405 ctxt->src.bytes = 1;
3406 fastop(ctxt, em_or);
3408 return X86EMUL_CONTINUE;
3411 static int em_call(struct x86_emulate_ctxt *ctxt)
3414 long rel = ctxt->src.val;
3416 ctxt->src.val = (unsigned long)ctxt->_eip;
3417 rc = jmp_rel(ctxt, rel);
3418 if (rc != X86EMUL_CONTINUE)
3420 return em_push(ctxt);
3423 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3428 struct desc_struct old_desc, new_desc;
3429 const struct x86_emulate_ops *ops = ctxt->ops;
3430 int cpl = ctxt->ops->cpl(ctxt);
3431 enum x86emul_mode prev_mode = ctxt->mode;
3433 old_eip = ctxt->_eip;
3434 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3436 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3437 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3438 X86_TRANSFER_CALL_JMP, &new_desc);
3439 if (rc != X86EMUL_CONTINUE)
3442 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3443 if (rc != X86EMUL_CONTINUE)
3446 ctxt->src.val = old_cs;
3448 if (rc != X86EMUL_CONTINUE)
3451 ctxt->src.val = old_eip;
3453 /* If we failed, we tainted the memory, but the very least we should
3455 if (rc != X86EMUL_CONTINUE) {
3456 pr_warn_once("faulting far call emulation tainted memory\n");
3461 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3462 ctxt->mode = prev_mode;
3467 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3472 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3473 if (rc != X86EMUL_CONTINUE)
3475 rc = assign_eip_near(ctxt, eip);
3476 if (rc != X86EMUL_CONTINUE)
3478 rsp_increment(ctxt, ctxt->src.val);
3479 return X86EMUL_CONTINUE;
3482 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3484 /* Write back the register source. */
3485 ctxt->src.val = ctxt->dst.val;
3486 write_register_operand(&ctxt->src);
3488 /* Write back the memory destination with implicit LOCK prefix. */
3489 ctxt->dst.val = ctxt->src.orig_val;
3490 ctxt->lock_prefix = 1;
3491 return X86EMUL_CONTINUE;
3494 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3496 ctxt->dst.val = ctxt->src2.val;
3497 return fastop(ctxt, em_imul);
3500 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3502 ctxt->dst.type = OP_REG;
3503 ctxt->dst.bytes = ctxt->src.bytes;
3504 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3505 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3507 return X86EMUL_CONTINUE;
3510 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3514 if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
3515 return emulate_ud(ctxt);
3516 ctxt->dst.val = tsc_aux;
3517 return X86EMUL_CONTINUE;
3520 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3524 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3525 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3526 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3527 return X86EMUL_CONTINUE;
3530 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3534 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3535 return emulate_gp(ctxt, 0);
3536 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3537 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3538 return X86EMUL_CONTINUE;
3541 static int em_mov(struct x86_emulate_ctxt *ctxt)
3543 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3544 return X86EMUL_CONTINUE;
3547 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3551 if (!ctxt->ops->guest_has_movbe(ctxt))
3552 return emulate_ud(ctxt);
3554 switch (ctxt->op_bytes) {
3557 * From MOVBE definition: "...When the operand size is 16 bits,
3558 * the upper word of the destination register remains unchanged
3561 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3562 * rules so we have to do the operation almost per hand.
3564 tmp = (u16)ctxt->src.val;
3565 ctxt->dst.val &= ~0xffffUL;
3566 ctxt->dst.val |= (unsigned long)swab16(tmp);
3569 ctxt->dst.val = swab32((u32)ctxt->src.val);
3572 ctxt->dst.val = swab64(ctxt->src.val);
3577 return X86EMUL_CONTINUE;
3580 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3582 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3583 return emulate_gp(ctxt, 0);
3585 /* Disable writeback. */
3586 ctxt->dst.type = OP_NONE;
3587 return X86EMUL_CONTINUE;
3590 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3594 if (ctxt->mode == X86EMUL_MODE_PROT64)
3595 val = ctxt->src.val & ~0ULL;
3597 val = ctxt->src.val & ~0U;
3599 /* #UD condition is already handled. */
3600 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3601 return emulate_gp(ctxt, 0);
3603 /* Disable writeback. */
3604 ctxt->dst.type = OP_NONE;
3605 return X86EMUL_CONTINUE;
3608 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3610 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3614 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3615 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3616 r = ctxt->ops->set_msr(ctxt, msr_index, msr_data);
3618 if (r == X86EMUL_IO_NEEDED)
3622 return emulate_gp(ctxt, 0);
3624 return r < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
3627 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3629 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3633 r = ctxt->ops->get_msr(ctxt, msr_index, &msr_data);
3635 if (r == X86EMUL_IO_NEEDED)
3639 return emulate_gp(ctxt, 0);
3641 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3642 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3643 return X86EMUL_CONTINUE;
3646 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3648 if (segment > VCPU_SREG_GS &&
3649 (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3650 ctxt->ops->cpl(ctxt) > 0)
3651 return emulate_gp(ctxt, 0);
3653 ctxt->dst.val = get_segment_selector(ctxt, segment);
3654 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3655 ctxt->dst.bytes = 2;
3656 return X86EMUL_CONTINUE;
3659 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3661 if (ctxt->modrm_reg > VCPU_SREG_GS)
3662 return emulate_ud(ctxt);
3664 return em_store_sreg(ctxt, ctxt->modrm_reg);
3667 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3669 u16 sel = ctxt->src.val;
3671 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3672 return emulate_ud(ctxt);
3674 if (ctxt->modrm_reg == VCPU_SREG_SS)
3675 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3677 /* Disable writeback. */
3678 ctxt->dst.type = OP_NONE;
3679 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3682 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3684 return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3687 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3689 u16 sel = ctxt->src.val;
3691 /* Disable writeback. */
3692 ctxt->dst.type = OP_NONE;
3693 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3696 static int em_str(struct x86_emulate_ctxt *ctxt)
3698 return em_store_sreg(ctxt, VCPU_SREG_TR);
3701 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3703 u16 sel = ctxt->src.val;
3705 /* Disable writeback. */
3706 ctxt->dst.type = OP_NONE;
3707 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3710 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3715 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3716 if (rc == X86EMUL_CONTINUE)
3717 ctxt->ops->invlpg(ctxt, linear);
3718 /* Disable writeback. */
3719 ctxt->dst.type = OP_NONE;
3720 return X86EMUL_CONTINUE;
3723 static int em_clts(struct x86_emulate_ctxt *ctxt)
3727 cr0 = ctxt->ops->get_cr(ctxt, 0);
3729 ctxt->ops->set_cr(ctxt, 0, cr0);
3730 return X86EMUL_CONTINUE;
3733 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3735 int rc = ctxt->ops->fix_hypercall(ctxt);
3737 if (rc != X86EMUL_CONTINUE)
3740 /* Let the processor re-execute the fixed hypercall */
3741 ctxt->_eip = ctxt->eip;
3742 /* Disable writeback. */
3743 ctxt->dst.type = OP_NONE;
3744 return X86EMUL_CONTINUE;
3747 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3748 void (*get)(struct x86_emulate_ctxt *ctxt,
3749 struct desc_ptr *ptr))
3751 struct desc_ptr desc_ptr;
3753 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3754 ctxt->ops->cpl(ctxt) > 0)
3755 return emulate_gp(ctxt, 0);
3757 if (ctxt->mode == X86EMUL_MODE_PROT64)
3759 get(ctxt, &desc_ptr);
3760 if (ctxt->op_bytes == 2) {
3762 desc_ptr.address &= 0x00ffffff;
3764 /* Disable writeback. */
3765 ctxt->dst.type = OP_NONE;
3766 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3767 &desc_ptr, 2 + ctxt->op_bytes);
3770 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3772 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3775 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3777 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3780 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3782 struct desc_ptr desc_ptr;
3785 if (ctxt->mode == X86EMUL_MODE_PROT64)
3787 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3788 &desc_ptr.size, &desc_ptr.address,
3790 if (rc != X86EMUL_CONTINUE)
3792 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3793 emul_is_noncanonical_address(desc_ptr.address, ctxt))
3794 return emulate_gp(ctxt, 0);
3796 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3798 ctxt->ops->set_idt(ctxt, &desc_ptr);
3799 /* Disable writeback. */
3800 ctxt->dst.type = OP_NONE;
3801 return X86EMUL_CONTINUE;
3804 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3806 return em_lgdt_lidt(ctxt, true);
3809 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3811 return em_lgdt_lidt(ctxt, false);
3814 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3816 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3817 ctxt->ops->cpl(ctxt) > 0)
3818 return emulate_gp(ctxt, 0);
3820 if (ctxt->dst.type == OP_MEM)
3821 ctxt->dst.bytes = 2;
3822 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3823 return X86EMUL_CONTINUE;
3826 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3828 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3829 | (ctxt->src.val & 0x0f));
3830 ctxt->dst.type = OP_NONE;
3831 return X86EMUL_CONTINUE;
3834 static int em_loop(struct x86_emulate_ctxt *ctxt)
3836 int rc = X86EMUL_CONTINUE;
3838 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3839 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3840 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3841 rc = jmp_rel(ctxt, ctxt->src.val);
3846 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3848 int rc = X86EMUL_CONTINUE;
3850 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3851 rc = jmp_rel(ctxt, ctxt->src.val);
3856 static int em_in(struct x86_emulate_ctxt *ctxt)
3858 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3860 return X86EMUL_IO_NEEDED;
3862 return X86EMUL_CONTINUE;
3865 static int em_out(struct x86_emulate_ctxt *ctxt)
3867 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3869 /* Disable writeback. */
3870 ctxt->dst.type = OP_NONE;
3871 return X86EMUL_CONTINUE;
3874 static int em_cli(struct x86_emulate_ctxt *ctxt)
3876 if (emulator_bad_iopl(ctxt))
3877 return emulate_gp(ctxt, 0);
3879 ctxt->eflags &= ~X86_EFLAGS_IF;
3880 return X86EMUL_CONTINUE;
3883 static int em_sti(struct x86_emulate_ctxt *ctxt)
3885 if (emulator_bad_iopl(ctxt))
3886 return emulate_gp(ctxt, 0);
3888 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3889 ctxt->eflags |= X86_EFLAGS_IF;
3890 return X86EMUL_CONTINUE;
3893 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3895 u32 eax, ebx, ecx, edx;
3898 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3899 if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3900 ctxt->ops->cpl(ctxt)) {
3901 return emulate_gp(ctxt, 0);
3904 eax = reg_read(ctxt, VCPU_REGS_RAX);
3905 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3906 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3907 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3908 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3909 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3910 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3911 return X86EMUL_CONTINUE;
3914 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3918 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3920 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3922 ctxt->eflags &= ~0xffUL;
3923 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3924 return X86EMUL_CONTINUE;
3927 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3929 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3930 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3931 return X86EMUL_CONTINUE;
3934 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3936 switch (ctxt->op_bytes) {
3937 #ifdef CONFIG_X86_64
3939 asm("bswap %0" : "+r"(ctxt->dst.val));
3943 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3946 return X86EMUL_CONTINUE;
3949 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3951 /* emulating clflush regardless of cpuid */
3952 return X86EMUL_CONTINUE;
3955 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3957 /* emulating clflushopt regardless of cpuid */
3958 return X86EMUL_CONTINUE;
3961 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3963 ctxt->dst.val = (s32) ctxt->src.val;
3964 return X86EMUL_CONTINUE;
3967 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3969 if (!ctxt->ops->guest_has_fxsr(ctxt))
3970 return emulate_ud(ctxt);
3972 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3973 return emulate_nm(ctxt);
3976 * Don't emulate a case that should never be hit, instead of working
3977 * around a lack of fxsave64/fxrstor64 on old compilers.
3979 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3980 return X86EMUL_UNHANDLEABLE;
3982 return X86EMUL_CONTINUE;
3986 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3987 * and restore MXCSR.
3989 static size_t __fxstate_size(int nregs)
3991 return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3994 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3997 if (ctxt->mode == X86EMUL_MODE_PROT64)
3998 return __fxstate_size(16);
4000 cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
4001 return __fxstate_size(cr4_osfxsr ? 8 : 0);
4005 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
4008 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
4009 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
4011 * 3) 64-bit mode with REX.W prefix
4012 * - like (2), but XMM 8-15 are being saved and restored
4013 * 4) 64-bit mode without REX.W prefix
4014 * - like (3), but FIP and FDP are 64 bit
4016 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
4017 * desired result. (4) is not emulated.
4019 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
4020 * and FPU DS) should match.
4022 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
4024 struct fxregs_state fx_state;
4027 rc = check_fxsr(ctxt);
4028 if (rc != X86EMUL_CONTINUE)
4033 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
4037 if (rc != X86EMUL_CONTINUE)
4040 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
4041 fxstate_size(ctxt));
4045 * FXRSTOR might restore XMM registers not provided by the guest. Fill
4046 * in the host registers (via FXSAVE) instead, so they won't be modified.
4047 * (preemption has to stay disabled until FXRSTOR).
4049 * Use noinline to keep the stack for other functions called by callers small.
4051 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
4052 const size_t used_size)
4054 struct fxregs_state fx_tmp;
4057 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
4058 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
4059 __fxstate_size(16) - used_size);
4064 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4066 struct fxregs_state fx_state;
4070 rc = check_fxsr(ctxt);
4071 if (rc != X86EMUL_CONTINUE)
4074 size = fxstate_size(ctxt);
4075 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
4076 if (rc != X86EMUL_CONTINUE)
4081 if (size < __fxstate_size(16)) {
4082 rc = fxregs_fixup(&fx_state, size);
4083 if (rc != X86EMUL_CONTINUE)
4087 if (fx_state.mxcsr >> 16) {
4088 rc = emulate_gp(ctxt, 0);
4092 if (rc == X86EMUL_CONTINUE)
4093 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
4101 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
4105 eax = reg_read(ctxt, VCPU_REGS_RAX);
4106 edx = reg_read(ctxt, VCPU_REGS_RDX);
4107 ecx = reg_read(ctxt, VCPU_REGS_RCX);
4109 if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
4110 return emulate_gp(ctxt, 0);
4112 return X86EMUL_CONTINUE;
4115 static bool valid_cr(int nr)
4127 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
4129 if (!valid_cr(ctxt->modrm_reg))
4130 return emulate_ud(ctxt);
4132 return X86EMUL_CONTINUE;
4135 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
4139 ctxt->ops->get_dr(ctxt, 7, &dr7);
4141 /* Check if DR7.Global_Enable is set */
4142 return dr7 & (1 << 13);
4145 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
4147 int dr = ctxt->modrm_reg;
4151 return emulate_ud(ctxt);
4153 cr4 = ctxt->ops->get_cr(ctxt, 4);
4154 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
4155 return emulate_ud(ctxt);
4157 if (check_dr7_gd(ctxt)) {
4160 ctxt->ops->get_dr(ctxt, 6, &dr6);
4161 dr6 &= ~DR_TRAP_BITS;
4162 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
4163 ctxt->ops->set_dr(ctxt, 6, dr6);
4164 return emulate_db(ctxt);
4167 return X86EMUL_CONTINUE;
4170 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
4172 u64 new_val = ctxt->src.val64;
4173 int dr = ctxt->modrm_reg;
4175 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
4176 return emulate_gp(ctxt, 0);
4178 return check_dr_read(ctxt);
4181 static int check_svme(struct x86_emulate_ctxt *ctxt)
4185 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4187 if (!(efer & EFER_SVME))
4188 return emulate_ud(ctxt);
4190 return X86EMUL_CONTINUE;
4193 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
4195 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4197 /* Valid physical address? */
4198 if (rax & 0xffff000000000000ULL)
4199 return emulate_gp(ctxt, 0);
4201 return check_svme(ctxt);
4204 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
4206 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4208 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4209 return emulate_ud(ctxt);
4211 return X86EMUL_CONTINUE;
4214 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4216 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4217 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4220 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
4221 * in Ring3 when CR4.PCE=0.
4223 if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
4224 return X86EMUL_CONTINUE;
4226 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4227 ctxt->ops->check_pmc(ctxt, rcx))
4228 return emulate_gp(ctxt, 0);
4230 return X86EMUL_CONTINUE;
4233 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4235 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4236 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4237 return emulate_gp(ctxt, 0);
4239 return X86EMUL_CONTINUE;
4242 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4244 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4245 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4246 return emulate_gp(ctxt, 0);
4248 return X86EMUL_CONTINUE;
4251 #define D(_y) { .flags = (_y) }
4252 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4253 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4254 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4255 #define N D(NotImpl)
4256 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4257 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4258 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4259 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4260 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4261 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4262 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4263 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4264 #define II(_f, _e, _i) \
4265 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4266 #define IIP(_f, _e, _i, _p) \
4267 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4268 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4269 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4271 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4272 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4273 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4274 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4275 #define I2bvIP(_f, _e, _i, _p) \
4276 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4278 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4279 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4280 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4282 static const struct opcode group7_rm0[] = {
4284 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4288 static const struct opcode group7_rm1[] = {
4289 DI(SrcNone | Priv, monitor),
4290 DI(SrcNone | Priv, mwait),
4294 static const struct opcode group7_rm2[] = {
4296 II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4300 static const struct opcode group7_rm3[] = {
4301 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4302 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4303 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4304 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4305 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4306 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4307 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4308 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4311 static const struct opcode group7_rm7[] = {
4313 DIP(SrcNone, rdtscp, check_rdtsc),
4317 static const struct opcode group1[] = {
4319 F(Lock | PageTable, em_or),
4322 F(Lock | PageTable, em_and),
4328 static const struct opcode group1A[] = {
4329 I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4332 static const struct opcode group2[] = {
4333 F(DstMem | ModRM, em_rol),
4334 F(DstMem | ModRM, em_ror),
4335 F(DstMem | ModRM, em_rcl),
4336 F(DstMem | ModRM, em_rcr),
4337 F(DstMem | ModRM, em_shl),
4338 F(DstMem | ModRM, em_shr),
4339 F(DstMem | ModRM, em_shl),
4340 F(DstMem | ModRM, em_sar),
4343 static const struct opcode group3[] = {
4344 F(DstMem | SrcImm | NoWrite, em_test),
4345 F(DstMem | SrcImm | NoWrite, em_test),
4346 F(DstMem | SrcNone | Lock, em_not),
4347 F(DstMem | SrcNone | Lock, em_neg),
4348 F(DstXacc | Src2Mem, em_mul_ex),
4349 F(DstXacc | Src2Mem, em_imul_ex),
4350 F(DstXacc | Src2Mem, em_div_ex),
4351 F(DstXacc | Src2Mem, em_idiv_ex),
4354 static const struct opcode group4[] = {
4355 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4356 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4360 static const struct opcode group5[] = {
4361 F(DstMem | SrcNone | Lock, em_inc),
4362 F(DstMem | SrcNone | Lock, em_dec),
4363 I(SrcMem | NearBranch, em_call_near_abs),
4364 I(SrcMemFAddr | ImplicitOps, em_call_far),
4365 I(SrcMem | NearBranch, em_jmp_abs),
4366 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
4367 I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4370 static const struct opcode group6[] = {
4371 II(Prot | DstMem, em_sldt, sldt),
4372 II(Prot | DstMem, em_str, str),
4373 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4374 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4378 static const struct group_dual group7 = { {
4379 II(Mov | DstMem, em_sgdt, sgdt),
4380 II(Mov | DstMem, em_sidt, sidt),
4381 II(SrcMem | Priv, em_lgdt, lgdt),
4382 II(SrcMem | Priv, em_lidt, lidt),
4383 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4384 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4385 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4391 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4392 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4396 static const struct opcode group8[] = {
4398 F(DstMem | SrcImmByte | NoWrite, em_bt),
4399 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4400 F(DstMem | SrcImmByte | Lock, em_btr),
4401 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4405 * The "memory" destination is actually always a register, since we come
4406 * from the register case of group9.
4408 static const struct gprefix pfx_0f_c7_7 = {
4409 N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4413 static const struct group_dual group9 = { {
4414 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4416 N, N, N, N, N, N, N,
4417 GP(0, &pfx_0f_c7_7),
4420 static const struct opcode group11[] = {
4421 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4425 static const struct gprefix pfx_0f_ae_7 = {
4426 I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4429 static const struct group_dual group15 = { {
4430 I(ModRM | Aligned16, em_fxsave),
4431 I(ModRM | Aligned16, em_fxrstor),
4432 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4434 N, N, N, N, N, N, N, N,
4437 static const struct gprefix pfx_0f_6f_0f_7f = {
4438 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4441 static const struct instr_dual instr_dual_0f_2b = {
4445 static const struct gprefix pfx_0f_2b = {
4446 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4449 static const struct gprefix pfx_0f_10_0f_11 = {
4450 I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4453 static const struct gprefix pfx_0f_28_0f_29 = {
4454 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4457 static const struct gprefix pfx_0f_e7 = {
4458 N, I(Sse, em_mov), N, N,
4461 static const struct escape escape_d9 = { {
4462 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4465 N, N, N, N, N, N, N, N,
4467 N, N, N, N, N, N, N, N,
4469 N, N, N, N, N, N, N, N,
4471 N, N, N, N, N, N, N, N,
4473 N, N, N, N, N, N, N, N,
4475 N, N, N, N, N, N, N, N,
4477 N, N, N, N, N, N, N, N,
4479 N, N, N, N, N, N, N, N,
4482 static const struct escape escape_db = { {
4483 N, N, N, N, N, N, N, N,
4486 N, N, N, N, N, N, N, N,
4488 N, N, N, N, N, N, N, N,
4490 N, N, N, N, N, N, N, N,
4492 N, N, N, N, N, N, N, N,
4494 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4496 N, N, N, N, N, N, N, N,
4498 N, N, N, N, N, N, N, N,
4500 N, N, N, N, N, N, N, N,
4503 static const struct escape escape_dd = { {
4504 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4507 N, N, N, N, N, N, N, N,
4509 N, N, N, N, N, N, N, N,
4511 N, N, N, N, N, N, N, N,
4513 N, N, N, N, N, N, N, N,
4515 N, N, N, N, N, N, N, N,
4517 N, N, N, N, N, N, N, N,
4519 N, N, N, N, N, N, N, N,
4521 N, N, N, N, N, N, N, N,
4524 static const struct instr_dual instr_dual_0f_c3 = {
4525 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4528 static const struct mode_dual mode_dual_63 = {
4529 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4532 static const struct opcode opcode_table[256] = {
4534 F6ALU(Lock, em_add),
4535 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4536 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4538 F6ALU(Lock | PageTable, em_or),
4539 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4542 F6ALU(Lock, em_adc),
4543 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4544 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4546 F6ALU(Lock, em_sbb),
4547 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4548 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4550 F6ALU(Lock | PageTable, em_and), N, N,
4552 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4554 F6ALU(Lock, em_xor), N, N,
4556 F6ALU(NoWrite, em_cmp), N, N,
4558 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4560 X8(I(SrcReg | Stack, em_push)),
4562 X8(I(DstReg | Stack, em_pop)),
4564 I(ImplicitOps | Stack | No64, em_pusha),
4565 I(ImplicitOps | Stack | No64, em_popa),
4566 N, MD(ModRM, &mode_dual_63),
4569 I(SrcImm | Mov | Stack, em_push),
4570 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4571 I(SrcImmByte | Mov | Stack, em_push),
4572 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4573 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4574 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4576 X16(D(SrcImmByte | NearBranch)),
4578 G(ByteOp | DstMem | SrcImm, group1),
4579 G(DstMem | SrcImm, group1),
4580 G(ByteOp | DstMem | SrcImm | No64, group1),
4581 G(DstMem | SrcImmByte, group1),
4582 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4583 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4585 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4586 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4587 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4588 D(ModRM | SrcMem | NoAccess | DstReg),
4589 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4592 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4594 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4595 I(SrcImmFAddr | No64, em_call_far), N,
4596 II(ImplicitOps | Stack, em_pushf, pushf),
4597 II(ImplicitOps | Stack, em_popf, popf),
4598 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4600 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4601 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4602 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4603 F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4605 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4606 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4607 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4608 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4610 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4612 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4614 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4615 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4616 I(ImplicitOps | NearBranch, em_ret),
4617 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4618 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4619 G(ByteOp, group11), G(0, group11),
4621 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4622 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4623 I(ImplicitOps, em_ret_far),
4624 D(ImplicitOps), DI(SrcImmByte, intn),
4625 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4627 G(Src2One | ByteOp, group2), G(Src2One, group2),
4628 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4629 I(DstAcc | SrcImmUByte | No64, em_aam),
4630 I(DstAcc | SrcImmUByte | No64, em_aad),
4631 F(DstAcc | ByteOp | No64, em_salc),
4632 I(DstAcc | SrcXLat | ByteOp, em_mov),
4634 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4636 X3(I(SrcImmByte | NearBranch, em_loop)),
4637 I(SrcImmByte | NearBranch, em_jcxz),
4638 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4639 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4641 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4642 I(SrcImmFAddr | No64, em_jmp_far),
4643 D(SrcImmByte | ImplicitOps | NearBranch),
4644 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4645 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4647 N, DI(ImplicitOps, icebp), N, N,
4648 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4649 G(ByteOp, group3), G(0, group3),
4651 D(ImplicitOps), D(ImplicitOps),
4652 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4653 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4656 static const struct opcode twobyte_table[256] = {
4658 G(0, group6), GD(0, &group7), N, N,
4659 N, I(ImplicitOps | EmulateOnUD, em_syscall),
4660 II(ImplicitOps | Priv, em_clts, clts), N,
4661 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4662 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4664 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4665 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4667 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4668 D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4669 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4670 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4671 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4672 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4674 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4675 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4676 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4678 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4681 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4682 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4683 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4686 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4687 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4688 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4689 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4690 I(ImplicitOps | EmulateOnUD, em_sysenter),
4691 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4693 N, N, N, N, N, N, N, N,
4695 X16(D(DstReg | SrcMem | ModRM)),
4697 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4702 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4707 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4709 X16(D(SrcImm | NearBranch)),
4711 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4713 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4714 II(ImplicitOps, em_cpuid, cpuid),
4715 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4716 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4717 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4719 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4720 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4721 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4722 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4723 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4724 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4726 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4727 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4728 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4729 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4730 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4731 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4735 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4736 I(DstReg | SrcMem | ModRM, em_bsf_c),
4737 I(DstReg | SrcMem | ModRM, em_bsr_c),
4738 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4740 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4741 N, ID(0, &instr_dual_0f_c3),
4742 N, N, N, GD(0, &group9),
4744 X8(I(DstReg, em_bswap)),
4746 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4748 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4749 N, N, N, N, N, N, N, N,
4751 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4754 static const struct instr_dual instr_dual_0f_38_f0 = {
4755 I(DstReg | SrcMem | Mov, em_movbe), N
4758 static const struct instr_dual instr_dual_0f_38_f1 = {
4759 I(DstMem | SrcReg | Mov, em_movbe), N
4762 static const struct gprefix three_byte_0f_38_f0 = {
4763 ID(0, &instr_dual_0f_38_f0), N, N, N
4766 static const struct gprefix three_byte_0f_38_f1 = {
4767 ID(0, &instr_dual_0f_38_f1), N, N, N
4771 * Insns below are selected by the prefix which indexed by the third opcode
4774 static const struct opcode opcode_map_0f_38[256] = {
4776 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4778 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4780 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4781 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4802 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4806 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4812 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4813 unsigned size, bool sign_extension)
4815 int rc = X86EMUL_CONTINUE;
4819 op->addr.mem.ea = ctxt->_eip;
4820 /* NB. Immediates are sign-extended as necessary. */
4821 switch (op->bytes) {
4823 op->val = insn_fetch(s8, ctxt);
4826 op->val = insn_fetch(s16, ctxt);
4829 op->val = insn_fetch(s32, ctxt);
4832 op->val = insn_fetch(s64, ctxt);
4835 if (!sign_extension) {
4836 switch (op->bytes) {
4844 op->val &= 0xffffffff;
4852 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4855 int rc = X86EMUL_CONTINUE;
4859 decode_register_operand(ctxt, op);
4862 rc = decode_imm(ctxt, op, 1, false);
4865 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4869 if (ctxt->d & BitOp)
4870 fetch_bit_operand(ctxt);
4871 op->orig_val = op->val;
4874 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4878 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4879 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4880 fetch_register_operand(op);
4881 op->orig_val = op->val;
4885 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4886 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4887 fetch_register_operand(op);
4888 op->orig_val = op->val;
4891 if (ctxt->d & ByteOp) {
4896 op->bytes = ctxt->op_bytes;
4897 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4898 fetch_register_operand(op);
4899 op->orig_val = op->val;
4903 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4905 register_address(ctxt, VCPU_REGS_RDI);
4906 op->addr.mem.seg = VCPU_SREG_ES;
4913 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4914 fetch_register_operand(op);
4919 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4922 rc = decode_imm(ctxt, op, 1, true);
4930 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4933 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4936 ctxt->memop.bytes = 1;
4937 if (ctxt->memop.type == OP_REG) {
4938 ctxt->memop.addr.reg = decode_register(ctxt,
4939 ctxt->modrm_rm, true);
4940 fetch_register_operand(&ctxt->memop);
4944 ctxt->memop.bytes = 2;
4947 ctxt->memop.bytes = 4;
4950 rc = decode_imm(ctxt, op, 2, false);
4953 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4957 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4959 register_address(ctxt, VCPU_REGS_RSI);
4960 op->addr.mem.seg = ctxt->seg_override;
4966 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4969 reg_read(ctxt, VCPU_REGS_RBX) +
4970 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4971 op->addr.mem.seg = ctxt->seg_override;
4976 op->addr.mem.ea = ctxt->_eip;
4977 op->bytes = ctxt->op_bytes + 2;
4978 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4981 ctxt->memop.bytes = ctxt->op_bytes + 2;
4985 op->val = VCPU_SREG_ES;
4989 op->val = VCPU_SREG_CS;
4993 op->val = VCPU_SREG_SS;
4997 op->val = VCPU_SREG_DS;
5001 op->val = VCPU_SREG_FS;
5005 op->val = VCPU_SREG_GS;
5008 /* Special instructions do their own operand decoding. */
5010 op->type = OP_NONE; /* Disable writeback. */
5018 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
5020 int rc = X86EMUL_CONTINUE;
5021 int mode = ctxt->mode;
5022 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
5023 bool op_prefix = false;
5024 bool has_seg_override = false;
5025 struct opcode opcode;
5027 struct desc_struct desc;
5029 ctxt->memop.type = OP_NONE;
5030 ctxt->memopp = NULL;
5031 ctxt->_eip = ctxt->eip;
5032 ctxt->fetch.ptr = ctxt->fetch.data;
5033 ctxt->fetch.end = ctxt->fetch.data + insn_len;
5034 ctxt->opcode_len = 1;
5035 ctxt->intercept = x86_intercept_none;
5037 memcpy(ctxt->fetch.data, insn, insn_len);
5039 rc = __do_insn_fetch_bytes(ctxt, 1);
5040 if (rc != X86EMUL_CONTINUE)
5045 case X86EMUL_MODE_REAL:
5046 case X86EMUL_MODE_VM86:
5047 def_op_bytes = def_ad_bytes = 2;
5048 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
5050 def_op_bytes = def_ad_bytes = 4;
5052 case X86EMUL_MODE_PROT16:
5053 def_op_bytes = def_ad_bytes = 2;
5055 case X86EMUL_MODE_PROT32:
5056 def_op_bytes = def_ad_bytes = 4;
5058 #ifdef CONFIG_X86_64
5059 case X86EMUL_MODE_PROT64:
5065 return EMULATION_FAILED;
5068 ctxt->op_bytes = def_op_bytes;
5069 ctxt->ad_bytes = def_ad_bytes;
5071 /* Legacy prefixes. */
5073 switch (ctxt->b = insn_fetch(u8, ctxt)) {
5074 case 0x66: /* operand-size override */
5076 /* switch between 2/4 bytes */
5077 ctxt->op_bytes = def_op_bytes ^ 6;
5079 case 0x67: /* address-size override */
5080 if (mode == X86EMUL_MODE_PROT64)
5081 /* switch between 4/8 bytes */
5082 ctxt->ad_bytes = def_ad_bytes ^ 12;
5084 /* switch between 2/4 bytes */
5085 ctxt->ad_bytes = def_ad_bytes ^ 6;
5087 case 0x26: /* ES override */
5088 has_seg_override = true;
5089 ctxt->seg_override = VCPU_SREG_ES;
5091 case 0x2e: /* CS override */
5092 has_seg_override = true;
5093 ctxt->seg_override = VCPU_SREG_CS;
5095 case 0x36: /* SS override */
5096 has_seg_override = true;
5097 ctxt->seg_override = VCPU_SREG_SS;
5099 case 0x3e: /* DS override */
5100 has_seg_override = true;
5101 ctxt->seg_override = VCPU_SREG_DS;
5103 case 0x64: /* FS override */
5104 has_seg_override = true;
5105 ctxt->seg_override = VCPU_SREG_FS;
5107 case 0x65: /* GS override */
5108 has_seg_override = true;
5109 ctxt->seg_override = VCPU_SREG_GS;
5111 case 0x40 ... 0x4f: /* REX */
5112 if (mode != X86EMUL_MODE_PROT64)
5114 ctxt->rex_prefix = ctxt->b;
5116 case 0xf0: /* LOCK */
5117 ctxt->lock_prefix = 1;
5119 case 0xf2: /* REPNE/REPNZ */
5120 case 0xf3: /* REP/REPE/REPZ */
5121 ctxt->rep_prefix = ctxt->b;
5127 /* Any legacy prefix after a REX prefix nullifies its effect. */
5129 ctxt->rex_prefix = 0;
5135 if (ctxt->rex_prefix & 8)
5136 ctxt->op_bytes = 8; /* REX.W */
5138 /* Opcode byte(s). */
5139 opcode = opcode_table[ctxt->b];
5140 /* Two-byte opcode? */
5141 if (ctxt->b == 0x0f) {
5142 ctxt->opcode_len = 2;
5143 ctxt->b = insn_fetch(u8, ctxt);
5144 opcode = twobyte_table[ctxt->b];
5146 /* 0F_38 opcode map */
5147 if (ctxt->b == 0x38) {
5148 ctxt->opcode_len = 3;
5149 ctxt->b = insn_fetch(u8, ctxt);
5150 opcode = opcode_map_0f_38[ctxt->b];
5153 ctxt->d = opcode.flags;
5155 if (ctxt->d & ModRM)
5156 ctxt->modrm = insn_fetch(u8, ctxt);
5158 /* vex-prefix instructions are not implemented */
5159 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5160 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5164 while (ctxt->d & GroupMask) {
5165 switch (ctxt->d & GroupMask) {
5167 goffset = (ctxt->modrm >> 3) & 7;
5168 opcode = opcode.u.group[goffset];
5171 goffset = (ctxt->modrm >> 3) & 7;
5172 if ((ctxt->modrm >> 6) == 3)
5173 opcode = opcode.u.gdual->mod3[goffset];
5175 opcode = opcode.u.gdual->mod012[goffset];
5178 goffset = ctxt->modrm & 7;
5179 opcode = opcode.u.group[goffset];
5182 if (ctxt->rep_prefix && op_prefix)
5183 return EMULATION_FAILED;
5184 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5185 switch (simd_prefix) {
5186 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5187 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5188 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5189 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5193 if (ctxt->modrm > 0xbf) {
5194 size_t size = ARRAY_SIZE(opcode.u.esc->high);
5195 u32 index = array_index_nospec(
5196 ctxt->modrm - 0xc0, size);
5198 opcode = opcode.u.esc->high[index];
5200 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5204 if ((ctxt->modrm >> 6) == 3)
5205 opcode = opcode.u.idual->mod3;
5207 opcode = opcode.u.idual->mod012;
5210 if (ctxt->mode == X86EMUL_MODE_PROT64)
5211 opcode = opcode.u.mdual->mode64;
5213 opcode = opcode.u.mdual->mode32;
5216 return EMULATION_FAILED;
5219 ctxt->d &= ~(u64)GroupMask;
5220 ctxt->d |= opcode.flags;
5225 return EMULATION_FAILED;
5227 ctxt->execute = opcode.u.execute;
5229 if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
5230 likely(!(ctxt->d & EmulateOnUD)))
5231 return EMULATION_FAILED;
5233 if (unlikely(ctxt->d &
5234 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5237 * These are copied unconditionally here, and checked unconditionally
5238 * in x86_emulate_insn.
5240 ctxt->check_perm = opcode.check_perm;
5241 ctxt->intercept = opcode.intercept;
5243 if (ctxt->d & NotImpl)
5244 return EMULATION_FAILED;
5246 if (mode == X86EMUL_MODE_PROT64) {
5247 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5249 else if (ctxt->d & NearBranch)
5253 if (ctxt->d & Op3264) {
5254 if (mode == X86EMUL_MODE_PROT64)
5260 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5264 ctxt->op_bytes = 16;
5265 else if (ctxt->d & Mmx)
5269 /* ModRM and SIB bytes. */
5270 if (ctxt->d & ModRM) {
5271 rc = decode_modrm(ctxt, &ctxt->memop);
5272 if (!has_seg_override) {
5273 has_seg_override = true;
5274 ctxt->seg_override = ctxt->modrm_seg;
5276 } else if (ctxt->d & MemAbs)
5277 rc = decode_abs(ctxt, &ctxt->memop);
5278 if (rc != X86EMUL_CONTINUE)
5281 if (!has_seg_override)
5282 ctxt->seg_override = VCPU_SREG_DS;
5284 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5287 * Decode and fetch the source operand: register, memory
5290 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5291 if (rc != X86EMUL_CONTINUE)
5295 * Decode and fetch the second source operand: register, memory
5298 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5299 if (rc != X86EMUL_CONTINUE)
5302 /* Decode and fetch the destination operand: register or memory. */
5303 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5305 if (ctxt->rip_relative && likely(ctxt->memopp))
5306 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5307 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5310 if (rc == X86EMUL_PROPAGATE_FAULT)
5311 ctxt->have_exception = true;
5312 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5315 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5317 return ctxt->d & PageTable;
5320 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5322 /* The second termination condition only applies for REPE
5323 * and REPNE. Test if the repeat string operation prefix is
5324 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5325 * corresponding termination condition according to:
5326 * - if REPE/REPZ and ZF = 0 then done
5327 * - if REPNE/REPNZ and ZF = 1 then done
5329 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5330 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5331 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5332 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5333 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5334 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5340 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5345 rc = asm_safe("fwait");
5348 if (unlikely(rc != X86EMUL_CONTINUE))
5349 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5351 return X86EMUL_CONTINUE;
5354 static void fetch_possible_mmx_operand(struct operand *op)
5356 if (op->type == OP_MM)
5357 kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5360 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5362 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5364 if (!(ctxt->d & ByteOp))
5365 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5367 asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5368 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5369 [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5370 : "c"(ctxt->src2.val));
5372 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5373 if (!fop) /* exception is returned in fop variable */
5374 return emulate_de(ctxt);
5375 return X86EMUL_CONTINUE;
5378 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5380 memset(&ctxt->rip_relative, 0,
5381 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5383 ctxt->io_read.pos = 0;
5384 ctxt->io_read.end = 0;
5385 ctxt->mem_read.end = 0;
5388 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5390 const struct x86_emulate_ops *ops = ctxt->ops;
5391 int rc = X86EMUL_CONTINUE;
5392 int saved_dst_type = ctxt->dst.type;
5393 unsigned emul_flags;
5395 ctxt->mem_read.pos = 0;
5397 /* LOCK prefix is allowed only with some instructions */
5398 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5399 rc = emulate_ud(ctxt);
5403 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5404 rc = emulate_ud(ctxt);
5408 emul_flags = ctxt->ops->get_hflags(ctxt);
5409 if (unlikely(ctxt->d &
5410 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5411 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5412 (ctxt->d & Undefined)) {
5413 rc = emulate_ud(ctxt);
5417 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5418 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5419 rc = emulate_ud(ctxt);
5423 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5424 rc = emulate_nm(ctxt);
5428 if (ctxt->d & Mmx) {
5429 rc = flush_pending_x87_faults(ctxt);
5430 if (rc != X86EMUL_CONTINUE)
5433 * Now that we know the fpu is exception safe, we can fetch
5436 fetch_possible_mmx_operand(&ctxt->src);
5437 fetch_possible_mmx_operand(&ctxt->src2);
5438 if (!(ctxt->d & Mov))
5439 fetch_possible_mmx_operand(&ctxt->dst);
5442 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5443 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5444 X86_ICPT_PRE_EXCEPT);
5445 if (rc != X86EMUL_CONTINUE)
5449 /* Instruction can only be executed in protected mode */
5450 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5451 rc = emulate_ud(ctxt);
5455 /* Privileged instruction can be executed only in CPL=0 */
5456 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5457 if (ctxt->d & PrivUD)
5458 rc = emulate_ud(ctxt);
5460 rc = emulate_gp(ctxt, 0);
5464 /* Do instruction specific permission checks */
5465 if (ctxt->d & CheckPerm) {
5466 rc = ctxt->check_perm(ctxt);
5467 if (rc != X86EMUL_CONTINUE)
5471 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5472 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5473 X86_ICPT_POST_EXCEPT);
5474 if (rc != X86EMUL_CONTINUE)
5478 if (ctxt->rep_prefix && (ctxt->d & String)) {
5479 /* All REP prefixes have the same first termination condition */
5480 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5481 string_registers_quirk(ctxt);
5482 ctxt->eip = ctxt->_eip;
5483 ctxt->eflags &= ~X86_EFLAGS_RF;
5489 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5490 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5491 ctxt->src.valptr, ctxt->src.bytes);
5492 if (rc != X86EMUL_CONTINUE)
5494 ctxt->src.orig_val64 = ctxt->src.val64;
5497 if (ctxt->src2.type == OP_MEM) {
5498 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5499 &ctxt->src2.val, ctxt->src2.bytes);
5500 if (rc != X86EMUL_CONTINUE)
5504 if ((ctxt->d & DstMask) == ImplicitOps)
5508 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5509 /* optimisation - avoid slow emulated read if Mov */
5510 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5511 &ctxt->dst.val, ctxt->dst.bytes);
5512 if (rc != X86EMUL_CONTINUE) {
5513 if (!(ctxt->d & NoWrite) &&
5514 rc == X86EMUL_PROPAGATE_FAULT &&
5515 ctxt->exception.vector == PF_VECTOR)
5516 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5520 /* Copy full 64-bit value for CMPXCHG8B. */
5521 ctxt->dst.orig_val64 = ctxt->dst.val64;
5525 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5526 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5527 X86_ICPT_POST_MEMACCESS);
5528 if (rc != X86EMUL_CONTINUE)
5532 if (ctxt->rep_prefix && (ctxt->d & String))
5533 ctxt->eflags |= X86_EFLAGS_RF;
5535 ctxt->eflags &= ~X86_EFLAGS_RF;
5537 if (ctxt->execute) {
5538 if (ctxt->d & Fastop)
5539 rc = fastop(ctxt, ctxt->fop);
5541 rc = ctxt->execute(ctxt);
5542 if (rc != X86EMUL_CONTINUE)
5547 if (ctxt->opcode_len == 2)
5549 else if (ctxt->opcode_len == 3)
5550 goto threebyte_insn;
5553 case 0x70 ... 0x7f: /* jcc (short) */
5554 if (test_cc(ctxt->b, ctxt->eflags))
5555 rc = jmp_rel(ctxt, ctxt->src.val);
5557 case 0x8d: /* lea r16/r32, m */
5558 ctxt->dst.val = ctxt->src.addr.mem.ea;
5560 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5561 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5562 ctxt->dst.type = OP_NONE;
5566 case 0x98: /* cbw/cwde/cdqe */
5567 switch (ctxt->op_bytes) {
5568 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5569 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5570 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5573 case 0xcc: /* int3 */
5574 rc = emulate_int(ctxt, 3);
5576 case 0xcd: /* int n */
5577 rc = emulate_int(ctxt, ctxt->src.val);
5579 case 0xce: /* into */
5580 if (ctxt->eflags & X86_EFLAGS_OF)
5581 rc = emulate_int(ctxt, 4);
5583 case 0xe9: /* jmp rel */
5584 case 0xeb: /* jmp rel short */
5585 rc = jmp_rel(ctxt, ctxt->src.val);
5586 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5588 case 0xf4: /* hlt */
5589 ctxt->ops->halt(ctxt);
5591 case 0xf5: /* cmc */
5592 /* complement carry flag from eflags reg */
5593 ctxt->eflags ^= X86_EFLAGS_CF;
5595 case 0xf8: /* clc */
5596 ctxt->eflags &= ~X86_EFLAGS_CF;
5598 case 0xf9: /* stc */
5599 ctxt->eflags |= X86_EFLAGS_CF;
5601 case 0xfc: /* cld */
5602 ctxt->eflags &= ~X86_EFLAGS_DF;
5604 case 0xfd: /* std */
5605 ctxt->eflags |= X86_EFLAGS_DF;
5608 goto cannot_emulate;
5611 if (rc != X86EMUL_CONTINUE)
5615 if (ctxt->d & SrcWrite) {
5616 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5617 rc = writeback(ctxt, &ctxt->src);
5618 if (rc != X86EMUL_CONTINUE)
5621 if (!(ctxt->d & NoWrite)) {
5622 rc = writeback(ctxt, &ctxt->dst);
5623 if (rc != X86EMUL_CONTINUE)
5628 * restore dst type in case the decoding will be reused
5629 * (happens for string instruction )
5631 ctxt->dst.type = saved_dst_type;
5633 if ((ctxt->d & SrcMask) == SrcSI)
5634 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5636 if ((ctxt->d & DstMask) == DstDI)
5637 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5639 if (ctxt->rep_prefix && (ctxt->d & String)) {
5641 struct read_cache *r = &ctxt->io_read;
5642 if ((ctxt->d & SrcMask) == SrcSI)
5643 count = ctxt->src.count;
5645 count = ctxt->dst.count;
5646 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5648 if (!string_insn_completed(ctxt)) {
5650 * Re-enter guest when pio read ahead buffer is empty
5651 * or, if it is not used, after each 1024 iteration.
5653 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5654 (r->end == 0 || r->end != r->pos)) {
5656 * Reset read cache. Usually happens before
5657 * decode, but since instruction is restarted
5658 * we have to do it here.
5660 ctxt->mem_read.end = 0;
5661 writeback_registers(ctxt);
5662 return EMULATION_RESTART;
5664 goto done; /* skip rip writeback */
5666 ctxt->eflags &= ~X86_EFLAGS_RF;
5669 ctxt->eip = ctxt->_eip;
5670 if (ctxt->mode != X86EMUL_MODE_PROT64)
5671 ctxt->eip = (u32)ctxt->_eip;
5674 if (rc == X86EMUL_PROPAGATE_FAULT) {
5675 WARN_ON(ctxt->exception.vector > 0x1f);
5676 ctxt->have_exception = true;
5678 if (rc == X86EMUL_INTERCEPTED)
5679 return EMULATION_INTERCEPTED;
5681 if (rc == X86EMUL_CONTINUE)
5682 writeback_registers(ctxt);
5684 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5688 case 0x09: /* wbinvd */
5689 (ctxt->ops->wbinvd)(ctxt);
5691 case 0x08: /* invd */
5692 case 0x0d: /* GrpP (prefetch) */
5693 case 0x18: /* Grp16 (prefetch/nop) */
5694 case 0x1f: /* nop */
5696 case 0x20: /* mov cr, reg */
5697 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5699 case 0x21: /* mov from dr to reg */
5700 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5702 case 0x40 ... 0x4f: /* cmov */
5703 if (test_cc(ctxt->b, ctxt->eflags))
5704 ctxt->dst.val = ctxt->src.val;
5705 else if (ctxt->op_bytes != 4)
5706 ctxt->dst.type = OP_NONE; /* no writeback */
5708 case 0x80 ... 0x8f: /* jnz rel, etc*/
5709 if (test_cc(ctxt->b, ctxt->eflags))
5710 rc = jmp_rel(ctxt, ctxt->src.val);
5712 case 0x90 ... 0x9f: /* setcc r/m8 */
5713 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5715 case 0xb6 ... 0xb7: /* movzx */
5716 ctxt->dst.bytes = ctxt->op_bytes;
5717 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5718 : (u16) ctxt->src.val;
5720 case 0xbe ... 0xbf: /* movsx */
5721 ctxt->dst.bytes = ctxt->op_bytes;
5722 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5723 (s16) ctxt->src.val;
5726 goto cannot_emulate;
5731 if (rc != X86EMUL_CONTINUE)
5737 return EMULATION_FAILED;
5740 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5742 invalidate_registers(ctxt);
5745 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5747 writeback_registers(ctxt);
5750 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5752 if (ctxt->rep_prefix && (ctxt->d & String))
5755 if (ctxt->d & TwoMemOp)