1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
7 * Copyright (c) 2005 Keir Fraser
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/kvm_host.h>
23 #include "kvm_cache_regs.h"
24 #include "kvm_emulate.h"
25 #include <linux/stringify.h>
26 #include <asm/debugreg.h>
27 #include <asm/nospec-branch.h>
39 #define OpImplicit 1ull /* No generic decode */
40 #define OpReg 2ull /* Register */
41 #define OpMem 3ull /* Memory */
42 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
43 #define OpDI 5ull /* ES:DI/EDI/RDI */
44 #define OpMem64 6ull /* Memory, 64-bit */
45 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
46 #define OpDX 8ull /* DX register */
47 #define OpCL 9ull /* CL register (for shifts) */
48 #define OpImmByte 10ull /* 8-bit sign extended immediate */
49 #define OpOne 11ull /* Implied 1 */
50 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
51 #define OpMem16 13ull /* Memory operand (16-bit). */
52 #define OpMem32 14ull /* Memory operand (32-bit). */
53 #define OpImmU 15ull /* Immediate operand, zero extended */
54 #define OpSI 16ull /* SI/ESI/RSI */
55 #define OpImmFAddr 17ull /* Immediate far address */
56 #define OpMemFAddr 18ull /* Far address in memory */
57 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
58 #define OpES 20ull /* ES */
59 #define OpCS 21ull /* CS */
60 #define OpSS 22ull /* SS */
61 #define OpDS 23ull /* DS */
62 #define OpFS 24ull /* FS */
63 #define OpGS 25ull /* GS */
64 #define OpMem8 26ull /* 8-bit zero extended memory operand */
65 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
66 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
67 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
68 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
70 #define OpBits 5 /* Width of operand field */
71 #define OpMask ((1ull << OpBits) - 1)
74 * Opcode effective-address decode tables.
75 * Note that we only emulate instructions that have at least one memory
76 * operand (excluding implicit stack references). We assume that stack
77 * references and instruction fetches will never occur in special memory
78 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
82 /* Operand sizes: 8-bit operands or specified/overridden size. */
83 #define ByteOp (1<<0) /* 8-bit operands. */
84 /* Destination operand type. */
86 #define ImplicitOps (OpImplicit << DstShift)
87 #define DstReg (OpReg << DstShift)
88 #define DstMem (OpMem << DstShift)
89 #define DstAcc (OpAcc << DstShift)
90 #define DstDI (OpDI << DstShift)
91 #define DstMem64 (OpMem64 << DstShift)
92 #define DstMem16 (OpMem16 << DstShift)
93 #define DstImmUByte (OpImmUByte << DstShift)
94 #define DstDX (OpDX << DstShift)
95 #define DstAccLo (OpAccLo << DstShift)
96 #define DstMask (OpMask << DstShift)
97 /* Source operand type. */
99 #define SrcNone (OpNone << SrcShift)
100 #define SrcReg (OpReg << SrcShift)
101 #define SrcMem (OpMem << SrcShift)
102 #define SrcMem16 (OpMem16 << SrcShift)
103 #define SrcMem32 (OpMem32 << SrcShift)
104 #define SrcImm (OpImm << SrcShift)
105 #define SrcImmByte (OpImmByte << SrcShift)
106 #define SrcOne (OpOne << SrcShift)
107 #define SrcImmUByte (OpImmUByte << SrcShift)
108 #define SrcImmU (OpImmU << SrcShift)
109 #define SrcSI (OpSI << SrcShift)
110 #define SrcXLat (OpXLat << SrcShift)
111 #define SrcImmFAddr (OpImmFAddr << SrcShift)
112 #define SrcMemFAddr (OpMemFAddr << SrcShift)
113 #define SrcAcc (OpAcc << SrcShift)
114 #define SrcImmU16 (OpImmU16 << SrcShift)
115 #define SrcImm64 (OpImm64 << SrcShift)
116 #define SrcDX (OpDX << SrcShift)
117 #define SrcMem8 (OpMem8 << SrcShift)
118 #define SrcAccHi (OpAccHi << SrcShift)
119 #define SrcMask (OpMask << SrcShift)
120 #define BitOp (1<<11)
121 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
122 #define String (1<<13) /* String instruction (rep capable) */
123 #define Stack (1<<14) /* Stack instruction (push/pop) */
124 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
125 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
126 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
127 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
128 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
129 #define Escape (5<<15) /* Escape to coprocessor instruction */
130 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
131 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
132 #define Sse (1<<18) /* SSE Vector instruction */
133 /* Generic ModRM decode. */
134 #define ModRM (1<<19)
135 /* Destination is only written; never read. */
138 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
139 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
140 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
141 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
142 #define Undefined (1<<25) /* No Such Instruction */
143 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
144 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
146 #define PageTable (1 << 29) /* instruction used to write page table */
147 #define NotImpl (1 << 30) /* instruction is not implemented */
148 /* Source 2 operand type */
149 #define Src2Shift (31)
150 #define Src2None (OpNone << Src2Shift)
151 #define Src2Mem (OpMem << Src2Shift)
152 #define Src2CL (OpCL << Src2Shift)
153 #define Src2ImmByte (OpImmByte << Src2Shift)
154 #define Src2One (OpOne << Src2Shift)
155 #define Src2Imm (OpImm << Src2Shift)
156 #define Src2ES (OpES << Src2Shift)
157 #define Src2CS (OpCS << Src2Shift)
158 #define Src2SS (OpSS << Src2Shift)
159 #define Src2DS (OpDS << Src2Shift)
160 #define Src2FS (OpFS << Src2Shift)
161 #define Src2GS (OpGS << Src2Shift)
162 #define Src2Mask (OpMask << Src2Shift)
163 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
164 #define AlignMask ((u64)7 << 41)
165 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
166 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
167 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
168 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
169 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
170 #define NoWrite ((u64)1 << 45) /* No writeback */
171 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
172 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
173 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
174 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
175 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
176 #define NearBranch ((u64)1 << 52) /* Near branches */
177 #define No16 ((u64)1 << 53) /* No 16 bit operand */
178 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
179 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
180 #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
182 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
184 #define X2(x...) x, x
185 #define X3(x...) X2(x), x
186 #define X4(x...) X2(x), X2(x)
187 #define X5(x...) X4(x), x
188 #define X6(x...) X4(x), X2(x)
189 #define X7(x...) X4(x), X3(x)
190 #define X8(x...) X4(x), X4(x)
191 #define X16(x...) X8(x), X8(x)
198 int (*execute)(struct x86_emulate_ctxt *ctxt);
199 const struct opcode *group;
200 const struct group_dual *gdual;
201 const struct gprefix *gprefix;
202 const struct escape *esc;
203 const struct instr_dual *idual;
204 const struct mode_dual *mdual;
205 void (*fastop)(struct fastop *fake);
207 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
211 struct opcode mod012[8];
212 struct opcode mod3[8];
216 struct opcode pfx_no;
217 struct opcode pfx_66;
218 struct opcode pfx_f2;
219 struct opcode pfx_f3;
224 struct opcode high[64];
228 struct opcode mod012;
233 struct opcode mode32;
234 struct opcode mode64;
237 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
239 enum x86_transfer_type {
241 X86_TRANSFER_CALL_JMP,
243 X86_TRANSFER_TASK_SWITCH,
246 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
248 unsigned long dirty = ctxt->regs_dirty;
251 for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
252 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
255 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
257 ctxt->regs_dirty = 0;
258 ctxt->regs_valid = 0;
262 * These EFLAGS bits are restored from saved value during emulation, and
263 * any changes are written back to the saved value after emulation.
265 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
266 X86_EFLAGS_PF|X86_EFLAGS_CF)
275 * fastop functions have a special calling convention:
280 * flags: rflags (in/out)
281 * ex: rsi (in:fastop pointer, out:zero if exception)
283 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
284 * different operand sizes can be reached by calculation, rather than a jump
285 * table (which would be bigger than the code).
287 * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
288 * and 1 for the straight line speculation INT3, leaves 7 bytes for the
289 * body of the function. Currently none is larger than 4.
291 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
293 #define FASTOP_SIZE 16
295 #define __FOP_FUNC(name) \
296 ".align " __stringify(FASTOP_SIZE) " \n\t" \
297 ".type " name ", @function \n\t" \
302 #define FOP_FUNC(name) \
305 #define __FOP_RET(name) \
307 ".size " name ", .-" name "\n\t"
309 #define FOP_RET(name) \
312 #define __FOP_START(op, align) \
313 extern void em_##op(struct fastop *fake); \
314 asm(".pushsection .text, \"ax\" \n\t" \
315 ".global em_" #op " \n\t" \
316 ".align " __stringify(align) " \n\t" \
319 #define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
324 #define __FOPNOP(name) \
329 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
331 #define FOP1E(op, dst) \
332 __FOP_FUNC(#op "_" #dst) \
333 "10: " #op " %" #dst " \n\t" \
334 __FOP_RET(#op "_" #dst)
336 #define FOP1EEX(op, dst) \
337 FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
339 #define FASTOP1(op) \
344 ON64(FOP1E(op##q, rax)) \
347 /* 1-operand, using src2 (for MUL/DIV r/m) */
348 #define FASTOP1SRC2(op, name) \
353 ON64(FOP1E(op, rcx)) \
356 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
357 #define FASTOP1SRC2EX(op, name) \
362 ON64(FOP1EEX(op, rcx)) \
365 #define FOP2E(op, dst, src) \
366 __FOP_FUNC(#op "_" #dst "_" #src) \
367 #op " %" #src ", %" #dst " \n\t" \
368 __FOP_RET(#op "_" #dst "_" #src)
370 #define FASTOP2(op) \
372 FOP2E(op##b, al, dl) \
373 FOP2E(op##w, ax, dx) \
374 FOP2E(op##l, eax, edx) \
375 ON64(FOP2E(op##q, rax, rdx)) \
378 /* 2 operand, word only */
379 #define FASTOP2W(op) \
382 FOP2E(op##w, ax, dx) \
383 FOP2E(op##l, eax, edx) \
384 ON64(FOP2E(op##q, rax, rdx)) \
387 /* 2 operand, src is CL */
388 #define FASTOP2CL(op) \
390 FOP2E(op##b, al, cl) \
391 FOP2E(op##w, ax, cl) \
392 FOP2E(op##l, eax, cl) \
393 ON64(FOP2E(op##q, rax, cl)) \
396 /* 2 operand, src and dest are reversed */
397 #define FASTOP2R(op, name) \
399 FOP2E(op##b, dl, al) \
400 FOP2E(op##w, dx, ax) \
401 FOP2E(op##l, edx, eax) \
402 ON64(FOP2E(op##q, rdx, rax)) \
405 #define FOP3E(op, dst, src, src2) \
406 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
407 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
408 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
410 /* 3-operand, word-only, src2=cl */
411 #define FASTOP3WCL(op) \
414 FOP3E(op##w, ax, dx, cl) \
415 FOP3E(op##l, eax, edx, cl) \
416 ON64(FOP3E(op##q, rax, rdx, cl)) \
419 /* Special case for SETcc - 1 instruction per cc */
420 #define FOP_SETCC(op) \
446 "pushf; sbb %al, %al; popf \n\t"
451 * XXX: inoutclob user must know where the argument is being expanded.
452 * Using asm goto would allow us to remove _fault.
454 #define asm_safe(insn, inoutclob...) \
458 asm volatile("1:" insn "\n" \
460 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
461 : [_fault] "+r"(_fault) inoutclob ); \
463 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
466 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
467 enum x86_intercept intercept,
468 enum x86_intercept_stage stage)
470 struct x86_instruction_info info = {
471 .intercept = intercept,
472 .rep_prefix = ctxt->rep_prefix,
473 .modrm_mod = ctxt->modrm_mod,
474 .modrm_reg = ctxt->modrm_reg,
475 .modrm_rm = ctxt->modrm_rm,
476 .src_val = ctxt->src.val64,
477 .dst_val = ctxt->dst.val64,
478 .src_bytes = ctxt->src.bytes,
479 .dst_bytes = ctxt->dst.bytes,
480 .ad_bytes = ctxt->ad_bytes,
481 .next_rip = ctxt->eip,
484 return ctxt->ops->intercept(ctxt, &info, stage);
487 static void assign_masked(ulong *dest, ulong src, ulong mask)
489 *dest = (*dest & ~mask) | (src & mask);
492 static void assign_register(unsigned long *reg, u64 val, int bytes)
494 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
497 *(u8 *)reg = (u8)val;
500 *(u16 *)reg = (u16)val;
504 break; /* 64b: zero-extend */
511 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
513 return (1UL << (ctxt->ad_bytes << 3)) - 1;
516 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
519 struct desc_struct ss;
521 if (ctxt->mode == X86EMUL_MODE_PROT64)
523 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
524 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
527 static int stack_size(struct x86_emulate_ctxt *ctxt)
529 return (__fls(stack_mask(ctxt)) + 1) >> 3;
532 /* Access/update address held in a register, based on addressing mode. */
533 static inline unsigned long
534 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
536 if (ctxt->ad_bytes == sizeof(unsigned long))
539 return reg & ad_mask(ctxt);
542 static inline unsigned long
543 register_address(struct x86_emulate_ctxt *ctxt, int reg)
545 return address_mask(ctxt, reg_read(ctxt, reg));
548 static void masked_increment(ulong *reg, ulong mask, int inc)
550 assign_masked(reg, *reg + inc, mask);
554 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
556 ulong *preg = reg_rmw(ctxt, reg);
558 assign_register(preg, *preg + inc, ctxt->ad_bytes);
561 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
563 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
566 static u32 desc_limit_scaled(struct desc_struct *desc)
568 u32 limit = get_desc_limit(desc);
570 return desc->g ? (limit << 12) | 0xfff : limit;
573 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
575 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
578 return ctxt->ops->get_cached_segment_base(ctxt, seg);
581 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
582 u32 error, bool valid)
584 if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
585 return X86EMUL_UNHANDLEABLE;
587 ctxt->exception.vector = vec;
588 ctxt->exception.error_code = error;
589 ctxt->exception.error_code_valid = valid;
590 return X86EMUL_PROPAGATE_FAULT;
593 static int emulate_db(struct x86_emulate_ctxt *ctxt)
595 return emulate_exception(ctxt, DB_VECTOR, 0, false);
598 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
600 return emulate_exception(ctxt, GP_VECTOR, err, true);
603 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
605 return emulate_exception(ctxt, SS_VECTOR, err, true);
608 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
610 return emulate_exception(ctxt, UD_VECTOR, 0, false);
613 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
615 return emulate_exception(ctxt, TS_VECTOR, err, true);
618 static int emulate_de(struct x86_emulate_ctxt *ctxt)
620 return emulate_exception(ctxt, DE_VECTOR, 0, false);
623 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
625 return emulate_exception(ctxt, NM_VECTOR, 0, false);
628 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
631 struct desc_struct desc;
633 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
637 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
642 struct desc_struct desc;
644 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
645 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
648 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
650 return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
653 static inline bool emul_is_noncanonical_address(u64 la,
654 struct x86_emulate_ctxt *ctxt)
656 return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
660 * x86 defines three classes of vector instructions: explicitly
661 * aligned, explicitly unaligned, and the rest, which change behaviour
662 * depending on whether they're AVX encoded or not.
664 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
665 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
666 * 512 bytes of data must be aligned to a 16 byte boundary.
668 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
670 u64 alignment = ctxt->d & AlignMask;
672 if (likely(size < 16))
687 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
688 struct segmented_address addr,
689 unsigned *max_size, unsigned size,
690 bool write, bool fetch,
691 enum x86emul_mode mode, ulong *linear)
693 struct desc_struct desc;
700 la = seg_base(ctxt, addr.seg) + addr.ea;
703 case X86EMUL_MODE_PROT64:
705 va_bits = ctxt_virt_addr_bits(ctxt);
706 if (!__is_canonical_address(la, va_bits))
709 *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
710 if (size > *max_size)
714 *linear = la = (u32)la;
715 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
719 /* code segment in protected mode or read-only data segment */
720 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
721 || !(desc.type & 2)) && write)
723 /* unreadable code segment */
724 if (!fetch && (desc.type & 8) && !(desc.type & 2))
726 lim = desc_limit_scaled(&desc);
727 if (!(desc.type & 8) && (desc.type & 4)) {
728 /* expand-down segment */
731 lim = desc.d ? 0xffffffff : 0xffff;
735 if (lim == 0xffffffff)
738 *max_size = (u64)lim + 1 - addr.ea;
739 if (size > *max_size)
744 if (la & (insn_alignment(ctxt, size) - 1))
745 return emulate_gp(ctxt, 0);
746 return X86EMUL_CONTINUE;
748 if (addr.seg == VCPU_SREG_SS)
749 return emulate_ss(ctxt, 0);
751 return emulate_gp(ctxt, 0);
754 static int linearize(struct x86_emulate_ctxt *ctxt,
755 struct segmented_address addr,
756 unsigned size, bool write,
760 return __linearize(ctxt, addr, &max_size, size, write, false,
764 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
769 struct segmented_address addr = { .seg = VCPU_SREG_CS,
772 if (ctxt->op_bytes != sizeof(unsigned long))
773 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
774 rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
775 if (rc == X86EMUL_CONTINUE)
776 ctxt->_eip = addr.ea;
780 static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
783 struct desc_struct cs;
787 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
789 if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
790 /* Real mode. cpu must not have long mode active */
792 return X86EMUL_UNHANDLEABLE;
793 ctxt->mode = X86EMUL_MODE_REAL;
794 return X86EMUL_CONTINUE;
797 if (ctxt->eflags & X86_EFLAGS_VM) {
798 /* Protected/VM86 mode. cpu must not have long mode active */
800 return X86EMUL_UNHANDLEABLE;
801 ctxt->mode = X86EMUL_MODE_VM86;
802 return X86EMUL_CONTINUE;
805 if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
806 return X86EMUL_UNHANDLEABLE;
808 if (efer & EFER_LMA) {
810 /* Proper long mode */
811 ctxt->mode = X86EMUL_MODE_PROT64;
813 /* 32 bit compatibility mode*/
814 ctxt->mode = X86EMUL_MODE_PROT32;
816 ctxt->mode = X86EMUL_MODE_PROT16;
819 /* Legacy 32 bit / 16 bit mode */
820 ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
823 return X86EMUL_CONTINUE;
826 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
828 return assign_eip(ctxt, dst);
831 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
833 int rc = emulator_recalc_and_set_mode(ctxt);
835 if (rc != X86EMUL_CONTINUE)
838 return assign_eip(ctxt, dst);
841 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
843 return assign_eip_near(ctxt, ctxt->_eip + rel);
846 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
847 void *data, unsigned size)
849 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
852 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
853 ulong linear, void *data,
856 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
859 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
860 struct segmented_address addr,
867 rc = linearize(ctxt, addr, size, false, &linear);
868 if (rc != X86EMUL_CONTINUE)
870 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
873 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
874 struct segmented_address addr,
881 rc = linearize(ctxt, addr, size, true, &linear);
882 if (rc != X86EMUL_CONTINUE)
884 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
888 * Prefetch the remaining bytes of the instruction without crossing page
889 * boundary if they are not in fetch_cache yet.
891 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
894 unsigned size, max_size;
895 unsigned long linear;
896 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
897 struct segmented_address addr = { .seg = VCPU_SREG_CS,
898 .ea = ctxt->eip + cur_size };
901 * We do not know exactly how many bytes will be needed, and
902 * __linearize is expensive, so fetch as much as possible. We
903 * just have to avoid going beyond the 15 byte limit, the end
904 * of the segment, or the end of the page.
906 * __linearize is called with size 0 so that it does not do any
907 * boundary check itself. Instead, we use max_size to check
910 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
912 if (unlikely(rc != X86EMUL_CONTINUE))
915 size = min_t(unsigned, 15UL ^ cur_size, max_size);
916 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
919 * One instruction can only straddle two pages,
920 * and one has been loaded at the beginning of
921 * x86_decode_insn. So, if not enough bytes
922 * still, we must have hit the 15-byte boundary.
924 if (unlikely(size < op_size))
925 return emulate_gp(ctxt, 0);
927 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
928 size, &ctxt->exception);
929 if (unlikely(rc != X86EMUL_CONTINUE))
931 ctxt->fetch.end += size;
932 return X86EMUL_CONTINUE;
935 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
938 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
940 if (unlikely(done_size < size))
941 return __do_insn_fetch_bytes(ctxt, size - done_size);
943 return X86EMUL_CONTINUE;
946 /* Fetch next part of the instruction being emulated. */
947 #define insn_fetch(_type, _ctxt) \
950 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
951 if (rc != X86EMUL_CONTINUE) \
953 ctxt->_eip += sizeof(_type); \
954 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
955 ctxt->fetch.ptr += sizeof(_type); \
959 #define insn_fetch_arr(_arr, _size, _ctxt) \
961 rc = do_insn_fetch_bytes(_ctxt, _size); \
962 if (rc != X86EMUL_CONTINUE) \
964 ctxt->_eip += (_size); \
965 memcpy(_arr, ctxt->fetch.ptr, _size); \
966 ctxt->fetch.ptr += (_size); \
970 * Given the 'reg' portion of a ModRM byte, and a register block, return a
971 * pointer into the block that addresses the relevant register.
972 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
974 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
978 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
980 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
981 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
983 p = reg_rmw(ctxt, modrm_reg);
987 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
988 struct segmented_address addr,
989 u16 *size, unsigned long *address, int op_bytes)
996 rc = segmented_read_std(ctxt, addr, size, 2);
997 if (rc != X86EMUL_CONTINUE)
1000 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1014 FASTOP1SRC2(mul, mul_ex);
1015 FASTOP1SRC2(imul, imul_ex);
1016 FASTOP1SRC2EX(div, div_ex);
1017 FASTOP1SRC2EX(idiv, idiv_ex);
1046 FASTOP2R(cmp, cmp_r);
1048 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1050 /* If src is zero, do not writeback, but update flags */
1051 if (ctxt->src.val == 0)
1052 ctxt->dst.type = OP_NONE;
1053 return fastop(ctxt, em_bsf);
1056 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1058 /* If src is zero, do not writeback, but update flags */
1059 if (ctxt->src.val == 0)
1060 ctxt->dst.type = OP_NONE;
1061 return fastop(ctxt, em_bsr);
1064 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1067 void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
1069 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1070 asm("push %[flags]; popf; " CALL_NOSPEC
1071 : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1075 static void fetch_register_operand(struct operand *op)
1077 switch (op->bytes) {
1079 op->val = *(u8 *)op->addr.reg;
1082 op->val = *(u16 *)op->addr.reg;
1085 op->val = *(u32 *)op->addr.reg;
1088 op->val = *(u64 *)op->addr.reg;
1093 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1095 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1096 return emulate_nm(ctxt);
1099 asm volatile("fninit");
1101 return X86EMUL_CONTINUE;
1104 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1108 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1109 return emulate_nm(ctxt);
1112 asm volatile("fnstcw %0": "+m"(fcw));
1115 ctxt->dst.val = fcw;
1117 return X86EMUL_CONTINUE;
1120 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1124 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1125 return emulate_nm(ctxt);
1128 asm volatile("fnstsw %0": "+m"(fsw));
1131 ctxt->dst.val = fsw;
1133 return X86EMUL_CONTINUE;
1136 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1141 if (ctxt->d & ModRM)
1142 reg = ctxt->modrm_reg;
1144 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1146 if (ctxt->d & Sse) {
1150 kvm_read_sse_reg(reg, &op->vec_val);
1153 if (ctxt->d & Mmx) {
1162 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1163 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1165 fetch_register_operand(op);
1166 op->orig_val = op->val;
1169 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1171 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1172 ctxt->modrm_seg = VCPU_SREG_SS;
1175 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1179 int index_reg, base_reg, scale;
1180 int rc = X86EMUL_CONTINUE;
1183 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1184 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1185 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1187 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1188 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1189 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1190 ctxt->modrm_seg = VCPU_SREG_DS;
1192 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1194 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1195 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1197 if (ctxt->d & Sse) {
1200 op->addr.xmm = ctxt->modrm_rm;
1201 kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1204 if (ctxt->d & Mmx) {
1207 op->addr.mm = ctxt->modrm_rm & 7;
1210 fetch_register_operand(op);
1216 if (ctxt->ad_bytes == 2) {
1217 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1218 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1219 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1220 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1222 /* 16-bit ModR/M decode. */
1223 switch (ctxt->modrm_mod) {
1225 if (ctxt->modrm_rm == 6)
1226 modrm_ea += insn_fetch(u16, ctxt);
1229 modrm_ea += insn_fetch(s8, ctxt);
1232 modrm_ea += insn_fetch(u16, ctxt);
1235 switch (ctxt->modrm_rm) {
1237 modrm_ea += bx + si;
1240 modrm_ea += bx + di;
1243 modrm_ea += bp + si;
1246 modrm_ea += bp + di;
1255 if (ctxt->modrm_mod != 0)
1262 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1263 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1264 ctxt->modrm_seg = VCPU_SREG_SS;
1265 modrm_ea = (u16)modrm_ea;
1267 /* 32/64-bit ModR/M decode. */
1268 if ((ctxt->modrm_rm & 7) == 4) {
1269 sib = insn_fetch(u8, ctxt);
1270 index_reg |= (sib >> 3) & 7;
1271 base_reg |= sib & 7;
1274 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1275 modrm_ea += insn_fetch(s32, ctxt);
1277 modrm_ea += reg_read(ctxt, base_reg);
1278 adjust_modrm_seg(ctxt, base_reg);
1279 /* Increment ESP on POP [ESP] */
1280 if ((ctxt->d & IncSP) &&
1281 base_reg == VCPU_REGS_RSP)
1282 modrm_ea += ctxt->op_bytes;
1285 modrm_ea += reg_read(ctxt, index_reg) << scale;
1286 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1287 modrm_ea += insn_fetch(s32, ctxt);
1288 if (ctxt->mode == X86EMUL_MODE_PROT64)
1289 ctxt->rip_relative = 1;
1291 base_reg = ctxt->modrm_rm;
1292 modrm_ea += reg_read(ctxt, base_reg);
1293 adjust_modrm_seg(ctxt, base_reg);
1295 switch (ctxt->modrm_mod) {
1297 modrm_ea += insn_fetch(s8, ctxt);
1300 modrm_ea += insn_fetch(s32, ctxt);
1304 op->addr.mem.ea = modrm_ea;
1305 if (ctxt->ad_bytes != 8)
1306 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1312 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1315 int rc = X86EMUL_CONTINUE;
1318 switch (ctxt->ad_bytes) {
1320 op->addr.mem.ea = insn_fetch(u16, ctxt);
1323 op->addr.mem.ea = insn_fetch(u32, ctxt);
1326 op->addr.mem.ea = insn_fetch(u64, ctxt);
1333 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1337 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1338 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1340 if (ctxt->src.bytes == 2)
1341 sv = (s16)ctxt->src.val & (s16)mask;
1342 else if (ctxt->src.bytes == 4)
1343 sv = (s32)ctxt->src.val & (s32)mask;
1345 sv = (s64)ctxt->src.val & (s64)mask;
1347 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1348 ctxt->dst.addr.mem.ea + (sv >> 3));
1351 /* only subword offset */
1352 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1355 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1356 unsigned long addr, void *dest, unsigned size)
1359 struct read_cache *mc = &ctxt->mem_read;
1361 if (mc->pos < mc->end)
1364 if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1365 return X86EMUL_UNHANDLEABLE;
1367 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1369 if (rc != X86EMUL_CONTINUE)
1375 memcpy(dest, mc->data + mc->pos, size);
1377 return X86EMUL_CONTINUE;
1380 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1381 struct segmented_address addr,
1388 rc = linearize(ctxt, addr, size, false, &linear);
1389 if (rc != X86EMUL_CONTINUE)
1391 return read_emulated(ctxt, linear, data, size);
1394 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1395 struct segmented_address addr,
1402 rc = linearize(ctxt, addr, size, true, &linear);
1403 if (rc != X86EMUL_CONTINUE)
1405 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1409 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1410 struct segmented_address addr,
1411 const void *orig_data, const void *data,
1417 rc = linearize(ctxt, addr, size, true, &linear);
1418 if (rc != X86EMUL_CONTINUE)
1420 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1421 size, &ctxt->exception);
1424 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1425 unsigned int size, unsigned short port,
1428 struct read_cache *rc = &ctxt->io_read;
1430 if (rc->pos == rc->end) { /* refill pio read ahead */
1431 unsigned int in_page, n;
1432 unsigned int count = ctxt->rep_prefix ?
1433 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1434 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1435 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1436 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1437 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1440 rc->pos = rc->end = 0;
1441 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1446 if (ctxt->rep_prefix && (ctxt->d & String) &&
1447 !(ctxt->eflags & X86_EFLAGS_DF)) {
1448 ctxt->dst.data = rc->data + rc->pos;
1449 ctxt->dst.type = OP_MEM_STR;
1450 ctxt->dst.count = (rc->end - rc->pos) / size;
1453 memcpy(dest, rc->data + rc->pos, size);
1459 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1460 u16 index, struct desc_struct *desc)
1465 ctxt->ops->get_idt(ctxt, &dt);
1467 if (dt.size < index * 8 + 7)
1468 return emulate_gp(ctxt, index << 3 | 0x2);
1470 addr = dt.address + index * 8;
1471 return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1474 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1475 u16 selector, struct desc_ptr *dt)
1477 const struct x86_emulate_ops *ops = ctxt->ops;
1480 if (selector & 1 << 2) {
1481 struct desc_struct desc;
1484 memset(dt, 0, sizeof(*dt));
1485 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1489 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1490 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1492 ops->get_gdt(ctxt, dt);
1495 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1496 u16 selector, ulong *desc_addr_p)
1499 u16 index = selector >> 3;
1502 get_descriptor_table_ptr(ctxt, selector, &dt);
1504 if (dt.size < index * 8 + 7)
1505 return emulate_gp(ctxt, selector & 0xfffc);
1507 addr = dt.address + index * 8;
1509 #ifdef CONFIG_X86_64
1510 if (addr >> 32 != 0) {
1513 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1514 if (!(efer & EFER_LMA))
1519 *desc_addr_p = addr;
1520 return X86EMUL_CONTINUE;
1523 /* allowed just for 8 bytes segments */
1524 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1525 u16 selector, struct desc_struct *desc,
1530 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1531 if (rc != X86EMUL_CONTINUE)
1534 return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1537 /* allowed just for 8 bytes segments */
1538 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1539 u16 selector, struct desc_struct *desc)
1544 rc = get_descriptor_ptr(ctxt, selector, &addr);
1545 if (rc != X86EMUL_CONTINUE)
1548 return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1551 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1552 u16 selector, int seg, u8 cpl,
1553 enum x86_transfer_type transfer,
1554 struct desc_struct *desc)
1556 struct desc_struct seg_desc, old_desc;
1558 unsigned err_vec = GP_VECTOR;
1560 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1566 memset(&seg_desc, 0, sizeof(seg_desc));
1568 if (ctxt->mode == X86EMUL_MODE_REAL) {
1569 /* set real mode segment descriptor (keep limit etc. for
1571 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1572 set_desc_base(&seg_desc, selector << 4);
1574 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1575 /* VM86 needs a clean new segment descriptor */
1576 set_desc_base(&seg_desc, selector << 4);
1577 set_desc_limit(&seg_desc, 0xffff);
1587 /* TR should be in GDT only */
1588 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1591 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1592 if (null_selector) {
1593 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1596 if (seg == VCPU_SREG_SS) {
1597 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1601 * ctxt->ops->set_segment expects the CPL to be in
1602 * SS.DPL, so fake an expand-up 32-bit data segment.
1612 /* Skip all following checks */
1616 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1617 if (ret != X86EMUL_CONTINUE)
1620 err_code = selector & 0xfffc;
1621 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1624 /* can't load system descriptor into segment selector */
1625 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1626 if (transfer == X86_TRANSFER_CALL_JMP)
1627 return X86EMUL_UNHANDLEABLE;
1636 * segment is not a writable data segment or segment
1637 * selector's RPL != CPL or DPL != CPL
1639 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1644 * KVM uses "none" when loading CS as part of emulating Real
1645 * Mode exceptions and IRET (handled above). In all other
1646 * cases, loading CS without a control transfer is a KVM bug.
1648 if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1651 if (!(seg_desc.type & 8))
1654 if (transfer == X86_TRANSFER_RET) {
1655 /* RET can never return to an inner privilege level. */
1658 /* Outer-privilege level return is not implemented */
1660 return X86EMUL_UNHANDLEABLE;
1662 if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1663 if (seg_desc.type & 4) {
1672 } else { /* X86_TRANSFER_CALL_JMP */
1673 if (seg_desc.type & 4) {
1679 if (rpl > cpl || dpl != cpl)
1683 /* in long-mode d/b must be clear if l is set */
1684 if (seg_desc.d && seg_desc.l) {
1687 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1688 if (efer & EFER_LMA)
1692 /* CS(RPL) <- CPL */
1693 selector = (selector & 0xfffc) | cpl;
1696 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1699 case VCPU_SREG_LDTR:
1700 if (seg_desc.s || seg_desc.type != 2)
1703 default: /* DS, ES, FS, or GS */
1705 * segment is not a data or readable code segment or
1706 * ((segment is a data or nonconforming code segment)
1707 * and ((RPL > DPL) or (CPL > DPL)))
1709 if ((seg_desc.type & 0xa) == 0x8 ||
1710 (((seg_desc.type & 0xc) != 0xc) &&
1711 (rpl > dpl || cpl > dpl)))
1717 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1722 /* mark segment as accessed */
1723 if (!(seg_desc.type & 1)) {
1725 ret = write_segment_descriptor(ctxt, selector,
1727 if (ret != X86EMUL_CONTINUE)
1730 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1731 ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1732 if (ret != X86EMUL_CONTINUE)
1734 if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1735 ((u64)base3 << 32), ctxt))
1736 return emulate_gp(ctxt, err_code);
1739 if (seg == VCPU_SREG_TR) {
1740 old_desc = seg_desc;
1741 seg_desc.type |= 2; /* busy */
1742 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1743 sizeof(seg_desc), &ctxt->exception);
1744 if (ret != X86EMUL_CONTINUE)
1748 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1751 return X86EMUL_CONTINUE;
1753 return emulate_exception(ctxt, err_vec, err_code, true);
1756 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1757 u16 selector, int seg)
1759 u8 cpl = ctxt->ops->cpl(ctxt);
1762 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1763 * they can load it at CPL<3 (Intel's manual says only LSS can,
1766 * However, the Intel manual says that putting IST=1/DPL=3 in
1767 * an interrupt gate will result in SS=3 (the AMD manual instead
1768 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1769 * and only forbid it here.
1771 if (seg == VCPU_SREG_SS && selector == 3 &&
1772 ctxt->mode == X86EMUL_MODE_PROT64)
1773 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1775 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1776 X86_TRANSFER_NONE, NULL);
1779 static void write_register_operand(struct operand *op)
1781 return assign_register(op->addr.reg, op->val, op->bytes);
1784 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1788 write_register_operand(op);
1791 if (ctxt->lock_prefix)
1792 return segmented_cmpxchg(ctxt,
1798 return segmented_write(ctxt,
1803 return segmented_write(ctxt,
1806 op->bytes * op->count);
1808 kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1811 kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1819 return X86EMUL_CONTINUE;
1822 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1824 struct segmented_address addr;
1826 rsp_increment(ctxt, -bytes);
1827 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1828 addr.seg = VCPU_SREG_SS;
1830 return segmented_write(ctxt, addr, data, bytes);
1833 static int em_push(struct x86_emulate_ctxt *ctxt)
1835 /* Disable writeback. */
1836 ctxt->dst.type = OP_NONE;
1837 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1840 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1841 void *dest, int len)
1844 struct segmented_address addr;
1846 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1847 addr.seg = VCPU_SREG_SS;
1848 rc = segmented_read(ctxt, addr, dest, len);
1849 if (rc != X86EMUL_CONTINUE)
1852 rsp_increment(ctxt, len);
1856 static int em_pop(struct x86_emulate_ctxt *ctxt)
1858 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1861 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1862 void *dest, int len)
1865 unsigned long val, change_mask;
1866 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1867 int cpl = ctxt->ops->cpl(ctxt);
1869 rc = emulate_pop(ctxt, &val, len);
1870 if (rc != X86EMUL_CONTINUE)
1873 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1874 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1875 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1876 X86_EFLAGS_AC | X86_EFLAGS_ID;
1878 switch(ctxt->mode) {
1879 case X86EMUL_MODE_PROT64:
1880 case X86EMUL_MODE_PROT32:
1881 case X86EMUL_MODE_PROT16:
1883 change_mask |= X86_EFLAGS_IOPL;
1885 change_mask |= X86_EFLAGS_IF;
1887 case X86EMUL_MODE_VM86:
1889 return emulate_gp(ctxt, 0);
1890 change_mask |= X86_EFLAGS_IF;
1892 default: /* real mode */
1893 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1897 *(unsigned long *)dest =
1898 (ctxt->eflags & ~change_mask) | (val & change_mask);
1903 static int em_popf(struct x86_emulate_ctxt *ctxt)
1905 ctxt->dst.type = OP_REG;
1906 ctxt->dst.addr.reg = &ctxt->eflags;
1907 ctxt->dst.bytes = ctxt->op_bytes;
1908 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1911 static int em_enter(struct x86_emulate_ctxt *ctxt)
1914 unsigned frame_size = ctxt->src.val;
1915 unsigned nesting_level = ctxt->src2.val & 31;
1919 return X86EMUL_UNHANDLEABLE;
1921 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1922 rc = push(ctxt, &rbp, stack_size(ctxt));
1923 if (rc != X86EMUL_CONTINUE)
1925 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1927 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1928 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1930 return X86EMUL_CONTINUE;
1933 static int em_leave(struct x86_emulate_ctxt *ctxt)
1935 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1937 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1940 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1942 int seg = ctxt->src2.val;
1944 ctxt->src.val = get_segment_selector(ctxt, seg);
1945 if (ctxt->op_bytes == 4) {
1946 rsp_increment(ctxt, -2);
1950 return em_push(ctxt);
1953 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1955 int seg = ctxt->src2.val;
1956 unsigned long selector;
1959 rc = emulate_pop(ctxt, &selector, 2);
1960 if (rc != X86EMUL_CONTINUE)
1963 if (seg == VCPU_SREG_SS)
1964 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1965 if (ctxt->op_bytes > 2)
1966 rsp_increment(ctxt, ctxt->op_bytes - 2);
1968 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1972 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1974 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1975 int rc = X86EMUL_CONTINUE;
1976 int reg = VCPU_REGS_RAX;
1978 while (reg <= VCPU_REGS_RDI) {
1979 (reg == VCPU_REGS_RSP) ?
1980 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1983 if (rc != X86EMUL_CONTINUE)
1992 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1994 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1995 return em_push(ctxt);
1998 static int em_popa(struct x86_emulate_ctxt *ctxt)
2000 int rc = X86EMUL_CONTINUE;
2001 int reg = VCPU_REGS_RDI;
2004 while (reg >= VCPU_REGS_RAX) {
2005 if (reg == VCPU_REGS_RSP) {
2006 rsp_increment(ctxt, ctxt->op_bytes);
2010 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2011 if (rc != X86EMUL_CONTINUE)
2013 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2019 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2021 const struct x86_emulate_ops *ops = ctxt->ops;
2028 /* TODO: Add limit checks */
2029 ctxt->src.val = ctxt->eflags;
2031 if (rc != X86EMUL_CONTINUE)
2034 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2036 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2038 if (rc != X86EMUL_CONTINUE)
2041 ctxt->src.val = ctxt->_eip;
2043 if (rc != X86EMUL_CONTINUE)
2046 ops->get_idt(ctxt, &dt);
2048 eip_addr = dt.address + (irq << 2);
2049 cs_addr = dt.address + (irq << 2) + 2;
2051 rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2052 if (rc != X86EMUL_CONTINUE)
2055 rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2056 if (rc != X86EMUL_CONTINUE)
2059 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2060 if (rc != X86EMUL_CONTINUE)
2068 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2072 invalidate_registers(ctxt);
2073 rc = __emulate_int_real(ctxt, irq);
2074 if (rc == X86EMUL_CONTINUE)
2075 writeback_registers(ctxt);
2079 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2081 switch(ctxt->mode) {
2082 case X86EMUL_MODE_REAL:
2083 return __emulate_int_real(ctxt, irq);
2084 case X86EMUL_MODE_VM86:
2085 case X86EMUL_MODE_PROT16:
2086 case X86EMUL_MODE_PROT32:
2087 case X86EMUL_MODE_PROT64:
2089 /* Protected mode interrupts unimplemented yet */
2090 return X86EMUL_UNHANDLEABLE;
2094 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2096 int rc = X86EMUL_CONTINUE;
2097 unsigned long temp_eip = 0;
2098 unsigned long temp_eflags = 0;
2099 unsigned long cs = 0;
2100 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2101 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2102 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2103 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2104 X86_EFLAGS_AC | X86_EFLAGS_ID |
2106 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2109 /* TODO: Add stack limit check */
2111 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2113 if (rc != X86EMUL_CONTINUE)
2116 if (temp_eip & ~0xffff)
2117 return emulate_gp(ctxt, 0);
2119 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2121 if (rc != X86EMUL_CONTINUE)
2124 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2126 if (rc != X86EMUL_CONTINUE)
2129 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2131 if (rc != X86EMUL_CONTINUE)
2134 ctxt->_eip = temp_eip;
2136 if (ctxt->op_bytes == 4)
2137 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2138 else if (ctxt->op_bytes == 2) {
2139 ctxt->eflags &= ~0xffff;
2140 ctxt->eflags |= temp_eflags;
2143 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2144 ctxt->eflags |= X86_EFLAGS_FIXED;
2145 ctxt->ops->set_nmi_mask(ctxt, false);
2150 static int em_iret(struct x86_emulate_ctxt *ctxt)
2152 switch(ctxt->mode) {
2153 case X86EMUL_MODE_REAL:
2154 return emulate_iret_real(ctxt);
2155 case X86EMUL_MODE_VM86:
2156 case X86EMUL_MODE_PROT16:
2157 case X86EMUL_MODE_PROT32:
2158 case X86EMUL_MODE_PROT64:
2160 /* iret from protected mode unimplemented yet */
2161 return X86EMUL_UNHANDLEABLE;
2165 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2169 struct desc_struct new_desc;
2170 u8 cpl = ctxt->ops->cpl(ctxt);
2172 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2174 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2175 X86_TRANSFER_CALL_JMP,
2177 if (rc != X86EMUL_CONTINUE)
2180 rc = assign_eip_far(ctxt, ctxt->src.val);
2181 /* Error handling is not implemented. */
2182 if (rc != X86EMUL_CONTINUE)
2183 return X86EMUL_UNHANDLEABLE;
2188 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2190 return assign_eip_near(ctxt, ctxt->src.val);
2193 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2198 old_eip = ctxt->_eip;
2199 rc = assign_eip_near(ctxt, ctxt->src.val);
2200 if (rc != X86EMUL_CONTINUE)
2202 ctxt->src.val = old_eip;
2207 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2209 u64 old = ctxt->dst.orig_val64;
2211 if (ctxt->dst.bytes == 16)
2212 return X86EMUL_UNHANDLEABLE;
2214 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2215 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2216 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2217 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2218 ctxt->eflags &= ~X86_EFLAGS_ZF;
2220 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2221 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2223 ctxt->eflags |= X86_EFLAGS_ZF;
2225 return X86EMUL_CONTINUE;
2228 static int em_ret(struct x86_emulate_ctxt *ctxt)
2233 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2234 if (rc != X86EMUL_CONTINUE)
2237 return assign_eip_near(ctxt, eip);
2240 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2243 unsigned long eip, cs;
2244 int cpl = ctxt->ops->cpl(ctxt);
2245 struct desc_struct new_desc;
2247 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2248 if (rc != X86EMUL_CONTINUE)
2250 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2251 if (rc != X86EMUL_CONTINUE)
2253 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2256 if (rc != X86EMUL_CONTINUE)
2258 rc = assign_eip_far(ctxt, eip);
2259 /* Error handling is not implemented. */
2260 if (rc != X86EMUL_CONTINUE)
2261 return X86EMUL_UNHANDLEABLE;
2266 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2270 rc = em_ret_far(ctxt);
2271 if (rc != X86EMUL_CONTINUE)
2273 rsp_increment(ctxt, ctxt->src.val);
2274 return X86EMUL_CONTINUE;
2277 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2279 /* Save real source value, then compare EAX against destination. */
2280 ctxt->dst.orig_val = ctxt->dst.val;
2281 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2282 ctxt->src.orig_val = ctxt->src.val;
2283 ctxt->src.val = ctxt->dst.orig_val;
2284 fastop(ctxt, em_cmp);
2286 if (ctxt->eflags & X86_EFLAGS_ZF) {
2287 /* Success: write back to memory; no update of EAX */
2288 ctxt->src.type = OP_NONE;
2289 ctxt->dst.val = ctxt->src.orig_val;
2291 /* Failure: write the value we saw to EAX. */
2292 ctxt->src.type = OP_REG;
2293 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2294 ctxt->src.val = ctxt->dst.orig_val;
2295 /* Create write-cycle to dest by writing the same value */
2296 ctxt->dst.val = ctxt->dst.orig_val;
2298 return X86EMUL_CONTINUE;
2301 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2303 int seg = ctxt->src2.val;
2307 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2309 rc = load_segment_descriptor(ctxt, sel, seg);
2310 if (rc != X86EMUL_CONTINUE)
2313 ctxt->dst.val = ctxt->src.val;
2317 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2319 if (!ctxt->ops->is_smm(ctxt))
2320 return emulate_ud(ctxt);
2322 if (ctxt->ops->leave_smm(ctxt))
2323 ctxt->ops->triple_fault(ctxt);
2325 return emulator_recalc_and_set_mode(ctxt);
2329 setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2331 cs->l = 0; /* will be adjusted later */
2332 set_desc_base(cs, 0); /* flat segment */
2333 cs->g = 1; /* 4kb granularity */
2334 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2335 cs->type = 0x0b; /* Read, Execute, Accessed */
2337 cs->dpl = 0; /* will be adjusted later */
2342 set_desc_base(ss, 0); /* flat segment */
2343 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2344 ss->g = 1; /* 4kb granularity */
2346 ss->type = 0x03; /* Read/Write, Accessed */
2347 ss->d = 1; /* 32bit stack segment */
2354 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2356 u32 eax, ebx, ecx, edx;
2359 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2360 return is_guest_vendor_intel(ebx, ecx, edx);
2363 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2365 const struct x86_emulate_ops *ops = ctxt->ops;
2366 u32 eax, ebx, ecx, edx;
2369 * syscall should always be enabled in longmode - so only become
2370 * vendor specific (cpuid) if other modes are active...
2372 if (ctxt->mode == X86EMUL_MODE_PROT64)
2377 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2379 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2380 * 64bit guest with a 32bit compat-app running will #UD !! While this
2381 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2382 * AMD can't behave like Intel.
2384 if (is_guest_vendor_intel(ebx, ecx, edx))
2387 if (is_guest_vendor_amd(ebx, ecx, edx) ||
2388 is_guest_vendor_hygon(ebx, ecx, edx))
2392 * default: (not Intel, not AMD, not Hygon), apply Intel's
2398 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2400 const struct x86_emulate_ops *ops = ctxt->ops;
2401 struct desc_struct cs, ss;
2406 /* syscall is not available in real mode */
2407 if (ctxt->mode == X86EMUL_MODE_REAL ||
2408 ctxt->mode == X86EMUL_MODE_VM86)
2409 return emulate_ud(ctxt);
2411 if (!(em_syscall_is_enabled(ctxt)))
2412 return emulate_ud(ctxt);
2414 ops->get_msr(ctxt, MSR_EFER, &efer);
2415 if (!(efer & EFER_SCE))
2416 return emulate_ud(ctxt);
2418 setup_syscalls_segments(&cs, &ss);
2419 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2421 cs_sel = (u16)(msr_data & 0xfffc);
2422 ss_sel = (u16)(msr_data + 8);
2424 if (efer & EFER_LMA) {
2428 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2429 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2431 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2432 if (efer & EFER_LMA) {
2433 #ifdef CONFIG_X86_64
2434 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2437 ctxt->mode == X86EMUL_MODE_PROT64 ?
2438 MSR_LSTAR : MSR_CSTAR, &msr_data);
2439 ctxt->_eip = msr_data;
2441 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2442 ctxt->eflags &= ~msr_data;
2443 ctxt->eflags |= X86_EFLAGS_FIXED;
2447 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2448 ctxt->_eip = (u32)msr_data;
2450 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2453 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2454 return X86EMUL_CONTINUE;
2457 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2459 const struct x86_emulate_ops *ops = ctxt->ops;
2460 struct desc_struct cs, ss;
2465 ops->get_msr(ctxt, MSR_EFER, &efer);
2466 /* inject #GP if in real mode */
2467 if (ctxt->mode == X86EMUL_MODE_REAL)
2468 return emulate_gp(ctxt, 0);
2471 * Not recognized on AMD in compat mode (but is recognized in legacy
2474 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2475 && !vendor_intel(ctxt))
2476 return emulate_ud(ctxt);
2478 /* sysenter/sysexit have not been tested in 64bit mode. */
2479 if (ctxt->mode == X86EMUL_MODE_PROT64)
2480 return X86EMUL_UNHANDLEABLE;
2482 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2483 if ((msr_data & 0xfffc) == 0x0)
2484 return emulate_gp(ctxt, 0);
2486 setup_syscalls_segments(&cs, &ss);
2487 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2488 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2489 ss_sel = cs_sel + 8;
2490 if (efer & EFER_LMA) {
2495 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2496 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2498 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2499 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2501 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2502 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2504 if (efer & EFER_LMA)
2505 ctxt->mode = X86EMUL_MODE_PROT64;
2507 return X86EMUL_CONTINUE;
2510 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2512 const struct x86_emulate_ops *ops = ctxt->ops;
2513 struct desc_struct cs, ss;
2514 u64 msr_data, rcx, rdx;
2516 u16 cs_sel = 0, ss_sel = 0;
2518 /* inject #GP if in real mode or Virtual 8086 mode */
2519 if (ctxt->mode == X86EMUL_MODE_REAL ||
2520 ctxt->mode == X86EMUL_MODE_VM86)
2521 return emulate_gp(ctxt, 0);
2523 setup_syscalls_segments(&cs, &ss);
2525 if ((ctxt->rex_prefix & 0x8) != 0x0)
2526 usermode = X86EMUL_MODE_PROT64;
2528 usermode = X86EMUL_MODE_PROT32;
2530 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2531 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2535 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2537 case X86EMUL_MODE_PROT32:
2538 cs_sel = (u16)(msr_data + 16);
2539 if ((msr_data & 0xfffc) == 0x0)
2540 return emulate_gp(ctxt, 0);
2541 ss_sel = (u16)(msr_data + 24);
2545 case X86EMUL_MODE_PROT64:
2546 cs_sel = (u16)(msr_data + 32);
2547 if (msr_data == 0x0)
2548 return emulate_gp(ctxt, 0);
2549 ss_sel = cs_sel + 8;
2552 if (emul_is_noncanonical_address(rcx, ctxt) ||
2553 emul_is_noncanonical_address(rdx, ctxt))
2554 return emulate_gp(ctxt, 0);
2557 cs_sel |= SEGMENT_RPL_MASK;
2558 ss_sel |= SEGMENT_RPL_MASK;
2560 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2561 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2564 ctxt->mode = usermode;
2565 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2567 return X86EMUL_CONTINUE;
2570 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2573 if (ctxt->mode == X86EMUL_MODE_REAL)
2575 if (ctxt->mode == X86EMUL_MODE_VM86)
2577 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2578 return ctxt->ops->cpl(ctxt) > iopl;
2581 #define VMWARE_PORT_VMPORT (0x5658)
2582 #define VMWARE_PORT_VMRPC (0x5659)
2584 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2587 const struct x86_emulate_ops *ops = ctxt->ops;
2588 struct desc_struct tr_seg;
2591 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2592 unsigned mask = (1 << len) - 1;
2596 * VMware allows access to these ports even if denied
2597 * by TSS I/O permission bitmap. Mimic behavior.
2599 if (enable_vmware_backdoor &&
2600 ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2603 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2606 if (desc_limit_scaled(&tr_seg) < 103)
2608 base = get_desc_base(&tr_seg);
2609 #ifdef CONFIG_X86_64
2610 base |= ((u64)base3) << 32;
2612 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2613 if (r != X86EMUL_CONTINUE)
2615 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2617 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2618 if (r != X86EMUL_CONTINUE)
2620 if ((perm >> bit_idx) & mask)
2625 static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt,
2631 if (emulator_bad_iopl(ctxt))
2632 if (!emulator_io_port_access_allowed(ctxt, port, len))
2635 ctxt->perm_ok = true;
2640 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2643 * Intel CPUs mask the counter and pointers in quite strange
2644 * manner when ECX is zero due to REP-string optimizations.
2646 #ifdef CONFIG_X86_64
2647 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2650 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2653 case 0xa4: /* movsb */
2654 case 0xa5: /* movsd/w */
2655 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2657 case 0xaa: /* stosb */
2658 case 0xab: /* stosd/w */
2659 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2664 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2665 struct tss_segment_16 *tss)
2667 tss->ip = ctxt->_eip;
2668 tss->flag = ctxt->eflags;
2669 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2670 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2671 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2672 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2673 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2674 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2675 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2676 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2678 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2679 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2680 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2681 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2682 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2685 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2686 struct tss_segment_16 *tss)
2691 ctxt->_eip = tss->ip;
2692 ctxt->eflags = tss->flag | 2;
2693 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2694 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2695 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2696 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2697 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2698 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2699 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2700 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2703 * SDM says that segment selectors are loaded before segment
2706 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2707 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2708 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2709 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2710 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2715 * Now load segment descriptors. If fault happens at this stage
2716 * it is handled in a context of new task
2718 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2719 X86_TRANSFER_TASK_SWITCH, NULL);
2720 if (ret != X86EMUL_CONTINUE)
2722 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2723 X86_TRANSFER_TASK_SWITCH, NULL);
2724 if (ret != X86EMUL_CONTINUE)
2726 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2727 X86_TRANSFER_TASK_SWITCH, NULL);
2728 if (ret != X86EMUL_CONTINUE)
2730 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2731 X86_TRANSFER_TASK_SWITCH, NULL);
2732 if (ret != X86EMUL_CONTINUE)
2734 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2735 X86_TRANSFER_TASK_SWITCH, NULL);
2736 if (ret != X86EMUL_CONTINUE)
2739 return X86EMUL_CONTINUE;
2742 static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2743 ulong old_tss_base, struct desc_struct *new_desc)
2745 struct tss_segment_16 tss_seg;
2747 u32 new_tss_base = get_desc_base(new_desc);
2749 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2750 if (ret != X86EMUL_CONTINUE)
2753 save_state_to_tss16(ctxt, &tss_seg);
2755 ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2756 if (ret != X86EMUL_CONTINUE)
2759 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2760 if (ret != X86EMUL_CONTINUE)
2763 if (old_tss_sel != 0xffff) {
2764 tss_seg.prev_task_link = old_tss_sel;
2766 ret = linear_write_system(ctxt, new_tss_base,
2767 &tss_seg.prev_task_link,
2768 sizeof(tss_seg.prev_task_link));
2769 if (ret != X86EMUL_CONTINUE)
2773 return load_state_from_tss16(ctxt, &tss_seg);
2776 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2777 struct tss_segment_32 *tss)
2779 /* CR3 and ldt selector are not saved intentionally */
2780 tss->eip = ctxt->_eip;
2781 tss->eflags = ctxt->eflags;
2782 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2783 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2784 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2785 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2786 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2787 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2788 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2789 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2791 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2792 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2793 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2794 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2795 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2796 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2799 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2800 struct tss_segment_32 *tss)
2805 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2806 return emulate_gp(ctxt, 0);
2807 ctxt->_eip = tss->eip;
2808 ctxt->eflags = tss->eflags | 2;
2810 /* General purpose registers */
2811 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2812 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2813 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2814 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2815 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2816 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2817 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2818 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2821 * SDM says that segment selectors are loaded before segment
2822 * descriptors. This is important because CPL checks will
2825 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2826 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2827 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2828 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2829 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2830 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2831 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2834 * If we're switching between Protected Mode and VM86, we need to make
2835 * sure to update the mode before loading the segment descriptors so
2836 * that the selectors are interpreted correctly.
2838 if (ctxt->eflags & X86_EFLAGS_VM) {
2839 ctxt->mode = X86EMUL_MODE_VM86;
2842 ctxt->mode = X86EMUL_MODE_PROT32;
2847 * Now load segment descriptors. If fault happens at this stage
2848 * it is handled in a context of new task
2850 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2851 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2852 if (ret != X86EMUL_CONTINUE)
2854 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2855 X86_TRANSFER_TASK_SWITCH, NULL);
2856 if (ret != X86EMUL_CONTINUE)
2858 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2859 X86_TRANSFER_TASK_SWITCH, NULL);
2860 if (ret != X86EMUL_CONTINUE)
2862 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2863 X86_TRANSFER_TASK_SWITCH, NULL);
2864 if (ret != X86EMUL_CONTINUE)
2866 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2867 X86_TRANSFER_TASK_SWITCH, NULL);
2868 if (ret != X86EMUL_CONTINUE)
2870 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2871 X86_TRANSFER_TASK_SWITCH, NULL);
2872 if (ret != X86EMUL_CONTINUE)
2874 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2875 X86_TRANSFER_TASK_SWITCH, NULL);
2880 static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2881 ulong old_tss_base, struct desc_struct *new_desc)
2883 struct tss_segment_32 tss_seg;
2885 u32 new_tss_base = get_desc_base(new_desc);
2886 u32 eip_offset = offsetof(struct tss_segment_32, eip);
2887 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2889 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2890 if (ret != X86EMUL_CONTINUE)
2893 save_state_to_tss32(ctxt, &tss_seg);
2895 /* Only GP registers and segment selectors are saved */
2896 ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2897 ldt_sel_offset - eip_offset);
2898 if (ret != X86EMUL_CONTINUE)
2901 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2902 if (ret != X86EMUL_CONTINUE)
2905 if (old_tss_sel != 0xffff) {
2906 tss_seg.prev_task_link = old_tss_sel;
2908 ret = linear_write_system(ctxt, new_tss_base,
2909 &tss_seg.prev_task_link,
2910 sizeof(tss_seg.prev_task_link));
2911 if (ret != X86EMUL_CONTINUE)
2915 return load_state_from_tss32(ctxt, &tss_seg);
2918 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2919 u16 tss_selector, int idt_index, int reason,
2920 bool has_error_code, u32 error_code)
2922 const struct x86_emulate_ops *ops = ctxt->ops;
2923 struct desc_struct curr_tss_desc, next_tss_desc;
2925 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2926 ulong old_tss_base =
2927 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2929 ulong desc_addr, dr7;
2931 /* FIXME: old_tss_base == ~0 ? */
2933 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2934 if (ret != X86EMUL_CONTINUE)
2936 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2937 if (ret != X86EMUL_CONTINUE)
2940 /* FIXME: check that next_tss_desc is tss */
2943 * Check privileges. The three cases are task switch caused by...
2945 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2946 * 2. Exception/IRQ/iret: No check is performed
2947 * 3. jmp/call to TSS/task-gate: No check is performed since the
2948 * hardware checks it before exiting.
2950 if (reason == TASK_SWITCH_GATE) {
2951 if (idt_index != -1) {
2952 /* Software interrupts */
2953 struct desc_struct task_gate_desc;
2956 ret = read_interrupt_descriptor(ctxt, idt_index,
2958 if (ret != X86EMUL_CONTINUE)
2961 dpl = task_gate_desc.dpl;
2962 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2963 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2967 desc_limit = desc_limit_scaled(&next_tss_desc);
2968 if (!next_tss_desc.p ||
2969 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2970 desc_limit < 0x2b)) {
2971 return emulate_ts(ctxt, tss_selector & 0xfffc);
2974 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2975 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2976 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2979 if (reason == TASK_SWITCH_IRET)
2980 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2982 /* set back link to prev task only if NT bit is set in eflags
2983 note that old_tss_sel is not used after this point */
2984 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2985 old_tss_sel = 0xffff;
2987 if (next_tss_desc.type & 8)
2988 ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
2990 ret = task_switch_16(ctxt, old_tss_sel,
2991 old_tss_base, &next_tss_desc);
2992 if (ret != X86EMUL_CONTINUE)
2995 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2996 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2998 if (reason != TASK_SWITCH_IRET) {
2999 next_tss_desc.type |= (1 << 1); /* set busy flag */
3000 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3003 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3004 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3006 if (has_error_code) {
3007 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3008 ctxt->lock_prefix = 0;
3009 ctxt->src.val = (unsigned long) error_code;
3010 ret = em_push(ctxt);
3013 ops->get_dr(ctxt, 7, &dr7);
3014 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3019 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3020 u16 tss_selector, int idt_index, int reason,
3021 bool has_error_code, u32 error_code)
3025 invalidate_registers(ctxt);
3026 ctxt->_eip = ctxt->eip;
3027 ctxt->dst.type = OP_NONE;
3029 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3030 has_error_code, error_code);
3032 if (rc == X86EMUL_CONTINUE) {
3033 ctxt->eip = ctxt->_eip;
3034 writeback_registers(ctxt);
3037 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3040 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3043 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3045 register_address_increment(ctxt, reg, df * op->bytes);
3046 op->addr.mem.ea = register_address(ctxt, reg);
3049 static int em_das(struct x86_emulate_ctxt *ctxt)
3052 bool af, cf, old_cf;
3054 cf = ctxt->eflags & X86_EFLAGS_CF;
3060 af = ctxt->eflags & X86_EFLAGS_AF;
3061 if ((al & 0x0f) > 9 || af) {
3063 cf = old_cf | (al >= 250);
3068 if (old_al > 0x99 || old_cf) {
3074 /* Set PF, ZF, SF */
3075 ctxt->src.type = OP_IMM;
3077 ctxt->src.bytes = 1;
3078 fastop(ctxt, em_or);
3079 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3081 ctxt->eflags |= X86_EFLAGS_CF;
3083 ctxt->eflags |= X86_EFLAGS_AF;
3084 return X86EMUL_CONTINUE;
3087 static int em_aam(struct x86_emulate_ctxt *ctxt)
3091 if (ctxt->src.val == 0)
3092 return emulate_de(ctxt);
3094 al = ctxt->dst.val & 0xff;
3095 ah = al / ctxt->src.val;
3096 al %= ctxt->src.val;
3098 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3100 /* Set PF, ZF, SF */
3101 ctxt->src.type = OP_IMM;
3103 ctxt->src.bytes = 1;
3104 fastop(ctxt, em_or);
3106 return X86EMUL_CONTINUE;
3109 static int em_aad(struct x86_emulate_ctxt *ctxt)
3111 u8 al = ctxt->dst.val & 0xff;
3112 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3114 al = (al + (ah * ctxt->src.val)) & 0xff;
3116 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3118 /* Set PF, ZF, SF */
3119 ctxt->src.type = OP_IMM;
3121 ctxt->src.bytes = 1;
3122 fastop(ctxt, em_or);
3124 return X86EMUL_CONTINUE;
3127 static int em_call(struct x86_emulate_ctxt *ctxt)
3130 long rel = ctxt->src.val;
3132 ctxt->src.val = (unsigned long)ctxt->_eip;
3133 rc = jmp_rel(ctxt, rel);
3134 if (rc != X86EMUL_CONTINUE)
3136 return em_push(ctxt);
3139 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3144 struct desc_struct old_desc, new_desc;
3145 const struct x86_emulate_ops *ops = ctxt->ops;
3146 int cpl = ctxt->ops->cpl(ctxt);
3147 enum x86emul_mode prev_mode = ctxt->mode;
3149 old_eip = ctxt->_eip;
3150 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3152 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3153 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3154 X86_TRANSFER_CALL_JMP, &new_desc);
3155 if (rc != X86EMUL_CONTINUE)
3158 rc = assign_eip_far(ctxt, ctxt->src.val);
3159 if (rc != X86EMUL_CONTINUE)
3162 ctxt->src.val = old_cs;
3164 if (rc != X86EMUL_CONTINUE)
3167 ctxt->src.val = old_eip;
3169 /* If we failed, we tainted the memory, but the very least we should
3171 if (rc != X86EMUL_CONTINUE) {
3172 pr_warn_once("faulting far call emulation tainted memory\n");
3177 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3178 ctxt->mode = prev_mode;
3183 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3188 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3189 if (rc != X86EMUL_CONTINUE)
3191 rc = assign_eip_near(ctxt, eip);
3192 if (rc != X86EMUL_CONTINUE)
3194 rsp_increment(ctxt, ctxt->src.val);
3195 return X86EMUL_CONTINUE;
3198 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3200 /* Write back the register source. */
3201 ctxt->src.val = ctxt->dst.val;
3202 write_register_operand(&ctxt->src);
3204 /* Write back the memory destination with implicit LOCK prefix. */
3205 ctxt->dst.val = ctxt->src.orig_val;
3206 ctxt->lock_prefix = 1;
3207 return X86EMUL_CONTINUE;
3210 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3212 ctxt->dst.val = ctxt->src2.val;
3213 return fastop(ctxt, em_imul);
3216 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3218 ctxt->dst.type = OP_REG;
3219 ctxt->dst.bytes = ctxt->src.bytes;
3220 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3221 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3223 return X86EMUL_CONTINUE;
3226 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3230 if (!ctxt->ops->guest_has_rdpid(ctxt))
3231 return emulate_ud(ctxt);
3233 ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3234 ctxt->dst.val = tsc_aux;
3235 return X86EMUL_CONTINUE;
3238 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3242 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3243 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3244 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3245 return X86EMUL_CONTINUE;
3248 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3252 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3253 return emulate_gp(ctxt, 0);
3254 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3255 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3256 return X86EMUL_CONTINUE;
3259 static int em_mov(struct x86_emulate_ctxt *ctxt)
3261 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3262 return X86EMUL_CONTINUE;
3265 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3269 if (!ctxt->ops->guest_has_movbe(ctxt))
3270 return emulate_ud(ctxt);
3272 switch (ctxt->op_bytes) {
3275 * From MOVBE definition: "...When the operand size is 16 bits,
3276 * the upper word of the destination register remains unchanged
3279 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3280 * rules so we have to do the operation almost per hand.
3282 tmp = (u16)ctxt->src.val;
3283 ctxt->dst.val &= ~0xffffUL;
3284 ctxt->dst.val |= (unsigned long)swab16(tmp);
3287 ctxt->dst.val = swab32((u32)ctxt->src.val);
3290 ctxt->dst.val = swab64(ctxt->src.val);
3295 return X86EMUL_CONTINUE;
3298 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3300 int cr_num = ctxt->modrm_reg;
3303 if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3304 return emulate_gp(ctxt, 0);
3306 /* Disable writeback. */
3307 ctxt->dst.type = OP_NONE;
3311 * CR0 write might have updated CR0.PE and/or CR0.PG
3312 * which can affect the cpu's execution mode.
3314 r = emulator_recalc_and_set_mode(ctxt);
3315 if (r != X86EMUL_CONTINUE)
3319 return X86EMUL_CONTINUE;
3322 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3326 if (ctxt->mode == X86EMUL_MODE_PROT64)
3327 val = ctxt->src.val & ~0ULL;
3329 val = ctxt->src.val & ~0U;
3331 /* #UD condition is already handled. */
3332 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3333 return emulate_gp(ctxt, 0);
3335 /* Disable writeback. */
3336 ctxt->dst.type = OP_NONE;
3337 return X86EMUL_CONTINUE;
3340 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3342 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3346 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3347 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3348 r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3350 if (r == X86EMUL_PROPAGATE_FAULT)
3351 return emulate_gp(ctxt, 0);
3356 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3358 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3362 r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3364 if (r == X86EMUL_PROPAGATE_FAULT)
3365 return emulate_gp(ctxt, 0);
3367 if (r == X86EMUL_CONTINUE) {
3368 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3369 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3374 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3376 if (segment > VCPU_SREG_GS &&
3377 (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3378 ctxt->ops->cpl(ctxt) > 0)
3379 return emulate_gp(ctxt, 0);
3381 ctxt->dst.val = get_segment_selector(ctxt, segment);
3382 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3383 ctxt->dst.bytes = 2;
3384 return X86EMUL_CONTINUE;
3387 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3389 if (ctxt->modrm_reg > VCPU_SREG_GS)
3390 return emulate_ud(ctxt);
3392 return em_store_sreg(ctxt, ctxt->modrm_reg);
3395 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3397 u16 sel = ctxt->src.val;
3399 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3400 return emulate_ud(ctxt);
3402 if (ctxt->modrm_reg == VCPU_SREG_SS)
3403 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3405 /* Disable writeback. */
3406 ctxt->dst.type = OP_NONE;
3407 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3410 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3412 return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3415 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3417 u16 sel = ctxt->src.val;
3419 /* Disable writeback. */
3420 ctxt->dst.type = OP_NONE;
3421 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3424 static int em_str(struct x86_emulate_ctxt *ctxt)
3426 return em_store_sreg(ctxt, VCPU_SREG_TR);
3429 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3431 u16 sel = ctxt->src.val;
3433 /* Disable writeback. */
3434 ctxt->dst.type = OP_NONE;
3435 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3438 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3443 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3444 if (rc == X86EMUL_CONTINUE)
3445 ctxt->ops->invlpg(ctxt, linear);
3446 /* Disable writeback. */
3447 ctxt->dst.type = OP_NONE;
3448 return X86EMUL_CONTINUE;
3451 static int em_clts(struct x86_emulate_ctxt *ctxt)
3455 cr0 = ctxt->ops->get_cr(ctxt, 0);
3457 ctxt->ops->set_cr(ctxt, 0, cr0);
3458 return X86EMUL_CONTINUE;
3461 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3463 int rc = ctxt->ops->fix_hypercall(ctxt);
3465 if (rc != X86EMUL_CONTINUE)
3468 /* Let the processor re-execute the fixed hypercall */
3469 ctxt->_eip = ctxt->eip;
3470 /* Disable writeback. */
3471 ctxt->dst.type = OP_NONE;
3472 return X86EMUL_CONTINUE;
3475 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3476 void (*get)(struct x86_emulate_ctxt *ctxt,
3477 struct desc_ptr *ptr))
3479 struct desc_ptr desc_ptr;
3481 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3482 ctxt->ops->cpl(ctxt) > 0)
3483 return emulate_gp(ctxt, 0);
3485 if (ctxt->mode == X86EMUL_MODE_PROT64)
3487 get(ctxt, &desc_ptr);
3488 if (ctxt->op_bytes == 2) {
3490 desc_ptr.address &= 0x00ffffff;
3492 /* Disable writeback. */
3493 ctxt->dst.type = OP_NONE;
3494 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3495 &desc_ptr, 2 + ctxt->op_bytes);
3498 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3500 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3503 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3505 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3508 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3510 struct desc_ptr desc_ptr;
3513 if (ctxt->mode == X86EMUL_MODE_PROT64)
3515 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3516 &desc_ptr.size, &desc_ptr.address,
3518 if (rc != X86EMUL_CONTINUE)
3520 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3521 emul_is_noncanonical_address(desc_ptr.address, ctxt))
3522 return emulate_gp(ctxt, 0);
3524 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3526 ctxt->ops->set_idt(ctxt, &desc_ptr);
3527 /* Disable writeback. */
3528 ctxt->dst.type = OP_NONE;
3529 return X86EMUL_CONTINUE;
3532 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3534 return em_lgdt_lidt(ctxt, true);
3537 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3539 return em_lgdt_lidt(ctxt, false);
3542 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3544 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3545 ctxt->ops->cpl(ctxt) > 0)
3546 return emulate_gp(ctxt, 0);
3548 if (ctxt->dst.type == OP_MEM)
3549 ctxt->dst.bytes = 2;
3550 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3551 return X86EMUL_CONTINUE;
3554 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3556 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3557 | (ctxt->src.val & 0x0f));
3558 ctxt->dst.type = OP_NONE;
3559 return X86EMUL_CONTINUE;
3562 static int em_loop(struct x86_emulate_ctxt *ctxt)
3564 int rc = X86EMUL_CONTINUE;
3566 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3567 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3568 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3569 rc = jmp_rel(ctxt, ctxt->src.val);
3574 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3576 int rc = X86EMUL_CONTINUE;
3578 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3579 rc = jmp_rel(ctxt, ctxt->src.val);
3584 static int em_in(struct x86_emulate_ctxt *ctxt)
3586 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3588 return X86EMUL_IO_NEEDED;
3590 return X86EMUL_CONTINUE;
3593 static int em_out(struct x86_emulate_ctxt *ctxt)
3595 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3597 /* Disable writeback. */
3598 ctxt->dst.type = OP_NONE;
3599 return X86EMUL_CONTINUE;
3602 static int em_cli(struct x86_emulate_ctxt *ctxt)
3604 if (emulator_bad_iopl(ctxt))
3605 return emulate_gp(ctxt, 0);
3607 ctxt->eflags &= ~X86_EFLAGS_IF;
3608 return X86EMUL_CONTINUE;
3611 static int em_sti(struct x86_emulate_ctxt *ctxt)
3613 if (emulator_bad_iopl(ctxt))
3614 return emulate_gp(ctxt, 0);
3616 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3617 ctxt->eflags |= X86_EFLAGS_IF;
3618 return X86EMUL_CONTINUE;
3621 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3623 u32 eax, ebx, ecx, edx;
3626 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3627 if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3628 ctxt->ops->cpl(ctxt)) {
3629 return emulate_gp(ctxt, 0);
3632 eax = reg_read(ctxt, VCPU_REGS_RAX);
3633 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3634 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3635 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3636 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3637 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3638 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3639 return X86EMUL_CONTINUE;
3642 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3646 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3648 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3650 ctxt->eflags &= ~0xffUL;
3651 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3652 return X86EMUL_CONTINUE;
3655 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3657 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3658 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3659 return X86EMUL_CONTINUE;
3662 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3664 switch (ctxt->op_bytes) {
3665 #ifdef CONFIG_X86_64
3667 asm("bswap %0" : "+r"(ctxt->dst.val));
3671 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3674 return X86EMUL_CONTINUE;
3677 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3679 /* emulating clflush regardless of cpuid */
3680 return X86EMUL_CONTINUE;
3683 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3685 /* emulating clflushopt regardless of cpuid */
3686 return X86EMUL_CONTINUE;
3689 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3691 ctxt->dst.val = (s32) ctxt->src.val;
3692 return X86EMUL_CONTINUE;
3695 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3697 if (!ctxt->ops->guest_has_fxsr(ctxt))
3698 return emulate_ud(ctxt);
3700 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3701 return emulate_nm(ctxt);
3704 * Don't emulate a case that should never be hit, instead of working
3705 * around a lack of fxsave64/fxrstor64 on old compilers.
3707 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3708 return X86EMUL_UNHANDLEABLE;
3710 return X86EMUL_CONTINUE;
3714 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3715 * and restore MXCSR.
3717 static size_t __fxstate_size(int nregs)
3719 return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3722 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3725 if (ctxt->mode == X86EMUL_MODE_PROT64)
3726 return __fxstate_size(16);
3728 cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
3729 return __fxstate_size(cr4_osfxsr ? 8 : 0);
3733 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3736 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3737 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3739 * 3) 64-bit mode with REX.W prefix
3740 * - like (2), but XMM 8-15 are being saved and restored
3741 * 4) 64-bit mode without REX.W prefix
3742 * - like (3), but FIP and FDP are 64 bit
3744 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3745 * desired result. (4) is not emulated.
3747 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3748 * and FPU DS) should match.
3750 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3752 struct fxregs_state fx_state;
3755 rc = check_fxsr(ctxt);
3756 if (rc != X86EMUL_CONTINUE)
3761 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3765 if (rc != X86EMUL_CONTINUE)
3768 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
3769 fxstate_size(ctxt));
3773 * FXRSTOR might restore XMM registers not provided by the guest. Fill
3774 * in the host registers (via FXSAVE) instead, so they won't be modified.
3775 * (preemption has to stay disabled until FXRSTOR).
3777 * Use noinline to keep the stack for other functions called by callers small.
3779 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
3780 const size_t used_size)
3782 struct fxregs_state fx_tmp;
3785 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
3786 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
3787 __fxstate_size(16) - used_size);
3792 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3794 struct fxregs_state fx_state;
3798 rc = check_fxsr(ctxt);
3799 if (rc != X86EMUL_CONTINUE)
3802 size = fxstate_size(ctxt);
3803 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3804 if (rc != X86EMUL_CONTINUE)
3809 if (size < __fxstate_size(16)) {
3810 rc = fxregs_fixup(&fx_state, size);
3811 if (rc != X86EMUL_CONTINUE)
3815 if (fx_state.mxcsr >> 16) {
3816 rc = emulate_gp(ctxt, 0);
3820 if (rc == X86EMUL_CONTINUE)
3821 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3829 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
3833 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
3834 return emulate_ud(ctxt);
3836 eax = reg_read(ctxt, VCPU_REGS_RAX);
3837 edx = reg_read(ctxt, VCPU_REGS_RDX);
3838 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3840 if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
3841 return emulate_gp(ctxt, 0);
3843 return X86EMUL_CONTINUE;
3846 static bool valid_cr(int nr)
3858 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
3860 if (!valid_cr(ctxt->modrm_reg))
3861 return emulate_ud(ctxt);
3863 return X86EMUL_CONTINUE;
3866 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
3870 ctxt->ops->get_dr(ctxt, 7, &dr7);
3872 return dr7 & DR7_GD;
3875 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3877 int dr = ctxt->modrm_reg;
3881 return emulate_ud(ctxt);
3883 cr4 = ctxt->ops->get_cr(ctxt, 4);
3884 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3885 return emulate_ud(ctxt);
3887 if (check_dr7_gd(ctxt)) {
3890 ctxt->ops->get_dr(ctxt, 6, &dr6);
3891 dr6 &= ~DR_TRAP_BITS;
3892 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
3893 ctxt->ops->set_dr(ctxt, 6, dr6);
3894 return emulate_db(ctxt);
3897 return X86EMUL_CONTINUE;
3900 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3902 u64 new_val = ctxt->src.val64;
3903 int dr = ctxt->modrm_reg;
3905 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3906 return emulate_gp(ctxt, 0);
3908 return check_dr_read(ctxt);
3911 static int check_svme(struct x86_emulate_ctxt *ctxt)
3915 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3917 if (!(efer & EFER_SVME))
3918 return emulate_ud(ctxt);
3920 return X86EMUL_CONTINUE;
3923 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3925 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3927 /* Valid physical address? */
3928 if (rax & 0xffff000000000000ULL)
3929 return emulate_gp(ctxt, 0);
3931 return check_svme(ctxt);
3934 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3936 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3938 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3939 return emulate_gp(ctxt, 0);
3941 return X86EMUL_CONTINUE;
3944 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3946 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3947 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3950 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
3951 * in Ring3 when CR4.PCE=0.
3953 if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
3954 return X86EMUL_CONTINUE;
3957 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
3958 * check however is unnecessary because CPL is always 0 outside
3961 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3962 ctxt->ops->check_pmc(ctxt, rcx))
3963 return emulate_gp(ctxt, 0);
3965 return X86EMUL_CONTINUE;
3968 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3970 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3971 if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes))
3972 return emulate_gp(ctxt, 0);
3974 return X86EMUL_CONTINUE;
3977 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3979 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3980 if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes))
3981 return emulate_gp(ctxt, 0);
3983 return X86EMUL_CONTINUE;
3986 #define D(_y) { .flags = (_y) }
3987 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3988 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3989 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3990 #define N D(NotImpl)
3991 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3992 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3993 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3994 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
3995 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
3996 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3997 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3998 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3999 #define II(_f, _e, _i) \
4000 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4001 #define IIP(_f, _e, _i, _p) \
4002 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4003 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4004 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4006 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4007 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4008 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4009 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4010 #define I2bvIP(_f, _e, _i, _p) \
4011 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4013 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4014 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4015 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4017 static const struct opcode group7_rm0[] = {
4019 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4023 static const struct opcode group7_rm1[] = {
4024 DI(SrcNone | Priv, monitor),
4025 DI(SrcNone | Priv, mwait),
4029 static const struct opcode group7_rm2[] = {
4031 II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4035 static const struct opcode group7_rm3[] = {
4036 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4037 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4038 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4039 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4040 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4041 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4042 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4043 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4046 static const struct opcode group7_rm7[] = {
4048 DIP(SrcNone, rdtscp, check_rdtsc),
4052 static const struct opcode group1[] = {
4054 F(Lock | PageTable, em_or),
4057 F(Lock | PageTable, em_and),
4063 static const struct opcode group1A[] = {
4064 I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4067 static const struct opcode group2[] = {
4068 F(DstMem | ModRM, em_rol),
4069 F(DstMem | ModRM, em_ror),
4070 F(DstMem | ModRM, em_rcl),
4071 F(DstMem | ModRM, em_rcr),
4072 F(DstMem | ModRM, em_shl),
4073 F(DstMem | ModRM, em_shr),
4074 F(DstMem | ModRM, em_shl),
4075 F(DstMem | ModRM, em_sar),
4078 static const struct opcode group3[] = {
4079 F(DstMem | SrcImm | NoWrite, em_test),
4080 F(DstMem | SrcImm | NoWrite, em_test),
4081 F(DstMem | SrcNone | Lock, em_not),
4082 F(DstMem | SrcNone | Lock, em_neg),
4083 F(DstXacc | Src2Mem, em_mul_ex),
4084 F(DstXacc | Src2Mem, em_imul_ex),
4085 F(DstXacc | Src2Mem, em_div_ex),
4086 F(DstXacc | Src2Mem, em_idiv_ex),
4089 static const struct opcode group4[] = {
4090 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4091 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4095 static const struct opcode group5[] = {
4096 F(DstMem | SrcNone | Lock, em_inc),
4097 F(DstMem | SrcNone | Lock, em_dec),
4098 I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
4099 I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
4100 I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
4101 I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4102 I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4105 static const struct opcode group6[] = {
4106 II(Prot | DstMem, em_sldt, sldt),
4107 II(Prot | DstMem, em_str, str),
4108 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4109 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4113 static const struct group_dual group7 = { {
4114 II(Mov | DstMem, em_sgdt, sgdt),
4115 II(Mov | DstMem, em_sidt, sidt),
4116 II(SrcMem | Priv, em_lgdt, lgdt),
4117 II(SrcMem | Priv, em_lidt, lidt),
4118 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4119 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4120 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4126 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4127 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4131 static const struct opcode group8[] = {
4133 F(DstMem | SrcImmByte | NoWrite, em_bt),
4134 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4135 F(DstMem | SrcImmByte | Lock, em_btr),
4136 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4140 * The "memory" destination is actually always a register, since we come
4141 * from the register case of group9.
4143 static const struct gprefix pfx_0f_c7_7 = {
4144 N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4148 static const struct group_dual group9 = { {
4149 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4151 N, N, N, N, N, N, N,
4152 GP(0, &pfx_0f_c7_7),
4155 static const struct opcode group11[] = {
4156 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4160 static const struct gprefix pfx_0f_ae_7 = {
4161 I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4164 static const struct group_dual group15 = { {
4165 I(ModRM | Aligned16, em_fxsave),
4166 I(ModRM | Aligned16, em_fxrstor),
4167 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4169 N, N, N, N, N, N, N, N,
4172 static const struct gprefix pfx_0f_6f_0f_7f = {
4173 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4176 static const struct instr_dual instr_dual_0f_2b = {
4180 static const struct gprefix pfx_0f_2b = {
4181 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4184 static const struct gprefix pfx_0f_10_0f_11 = {
4185 I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4188 static const struct gprefix pfx_0f_28_0f_29 = {
4189 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4192 static const struct gprefix pfx_0f_e7 = {
4193 N, I(Sse, em_mov), N, N,
4196 static const struct escape escape_d9 = { {
4197 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4200 N, N, N, N, N, N, N, N,
4202 N, N, N, N, N, N, N, N,
4204 N, N, N, N, N, N, N, N,
4206 N, N, N, N, N, N, N, N,
4208 N, N, N, N, N, N, N, N,
4210 N, N, N, N, N, N, N, N,
4212 N, N, N, N, N, N, N, N,
4214 N, N, N, N, N, N, N, N,
4217 static const struct escape escape_db = { {
4218 N, N, N, N, N, N, N, N,
4221 N, N, N, N, N, N, N, N,
4223 N, N, N, N, N, N, N, N,
4225 N, N, N, N, N, N, N, N,
4227 N, N, N, N, N, N, N, N,
4229 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4231 N, N, N, N, N, N, N, N,
4233 N, N, N, N, N, N, N, N,
4235 N, N, N, N, N, N, N, N,
4238 static const struct escape escape_dd = { {
4239 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4242 N, N, N, N, N, N, N, N,
4244 N, N, N, N, N, N, N, N,
4246 N, N, N, N, N, N, N, N,
4248 N, N, N, N, N, N, N, N,
4250 N, N, N, N, N, N, N, N,
4252 N, N, N, N, N, N, N, N,
4254 N, N, N, N, N, N, N, N,
4256 N, N, N, N, N, N, N, N,
4259 static const struct instr_dual instr_dual_0f_c3 = {
4260 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4263 static const struct mode_dual mode_dual_63 = {
4264 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4267 static const struct instr_dual instr_dual_8d = {
4268 D(DstReg | SrcMem | ModRM | NoAccess), N
4271 static const struct opcode opcode_table[256] = {
4273 F6ALU(Lock, em_add),
4274 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4275 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4277 F6ALU(Lock | PageTable, em_or),
4278 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4281 F6ALU(Lock, em_adc),
4282 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4283 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4285 F6ALU(Lock, em_sbb),
4286 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4287 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4289 F6ALU(Lock | PageTable, em_and), N, N,
4291 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4293 F6ALU(Lock, em_xor), N, N,
4295 F6ALU(NoWrite, em_cmp), N, N,
4297 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4299 X8(I(SrcReg | Stack, em_push)),
4301 X8(I(DstReg | Stack, em_pop)),
4303 I(ImplicitOps | Stack | No64, em_pusha),
4304 I(ImplicitOps | Stack | No64, em_popa),
4305 N, MD(ModRM, &mode_dual_63),
4308 I(SrcImm | Mov | Stack, em_push),
4309 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4310 I(SrcImmByte | Mov | Stack, em_push),
4311 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4312 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4313 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4315 X16(D(SrcImmByte | NearBranch | IsBranch)),
4317 G(ByteOp | DstMem | SrcImm, group1),
4318 G(DstMem | SrcImm, group1),
4319 G(ByteOp | DstMem | SrcImm | No64, group1),
4320 G(DstMem | SrcImmByte, group1),
4321 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4322 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4324 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4325 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4326 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4327 ID(0, &instr_dual_8d),
4328 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4331 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4333 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4334 I(SrcImmFAddr | No64 | IsBranch, em_call_far), N,
4335 II(ImplicitOps | Stack, em_pushf, pushf),
4336 II(ImplicitOps | Stack, em_popf, popf),
4337 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4339 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4340 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4341 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4342 F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4344 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4345 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4346 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4347 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4349 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4351 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4353 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4354 I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm),
4355 I(ImplicitOps | NearBranch | IsBranch, em_ret),
4356 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4357 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4358 G(ByteOp, group11), G(0, group11),
4360 I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
4361 I(Stack | IsBranch, em_leave),
4362 I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
4363 I(ImplicitOps | IsBranch, em_ret_far),
4364 D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
4365 D(ImplicitOps | No64 | IsBranch),
4366 II(ImplicitOps | IsBranch, em_iret, iret),
4368 G(Src2One | ByteOp, group2), G(Src2One, group2),
4369 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4370 I(DstAcc | SrcImmUByte | No64, em_aam),
4371 I(DstAcc | SrcImmUByte | No64, em_aad),
4372 F(DstAcc | ByteOp | No64, em_salc),
4373 I(DstAcc | SrcXLat | ByteOp, em_mov),
4375 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4377 X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4378 I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4379 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4380 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4382 I(SrcImm | NearBranch | IsBranch, em_call),
4383 D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4384 I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4385 D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4386 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4387 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4389 N, DI(ImplicitOps, icebp), N, N,
4390 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4391 G(ByteOp, group3), G(0, group3),
4393 D(ImplicitOps), D(ImplicitOps),
4394 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4395 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4398 static const struct opcode twobyte_table[256] = {
4400 G(0, group6), GD(0, &group7), N, N,
4401 N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
4402 II(ImplicitOps | Priv, em_clts, clts), N,
4403 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4404 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4406 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4407 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4409 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4410 D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4411 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4412 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4413 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4414 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4416 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4417 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4418 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4420 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4423 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4424 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4425 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4428 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4429 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4430 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4431 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4432 I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
4433 I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
4435 N, N, N, N, N, N, N, N,
4437 X16(D(DstReg | SrcMem | ModRM)),
4439 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4444 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4449 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4451 X16(D(SrcImm | NearBranch | IsBranch)),
4453 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4455 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4456 II(ImplicitOps, em_cpuid, cpuid),
4457 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4458 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4459 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4461 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4462 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4463 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4464 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4465 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4466 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4468 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4469 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4470 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4471 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4472 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4473 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4477 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4478 I(DstReg | SrcMem | ModRM, em_bsf_c),
4479 I(DstReg | SrcMem | ModRM, em_bsr_c),
4480 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4482 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4483 N, ID(0, &instr_dual_0f_c3),
4484 N, N, N, GD(0, &group9),
4486 X8(I(DstReg, em_bswap)),
4488 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4490 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4491 N, N, N, N, N, N, N, N,
4493 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4496 static const struct instr_dual instr_dual_0f_38_f0 = {
4497 I(DstReg | SrcMem | Mov, em_movbe), N
4500 static const struct instr_dual instr_dual_0f_38_f1 = {
4501 I(DstMem | SrcReg | Mov, em_movbe), N
4504 static const struct gprefix three_byte_0f_38_f0 = {
4505 ID(0, &instr_dual_0f_38_f0), N, N, N
4508 static const struct gprefix three_byte_0f_38_f1 = {
4509 ID(0, &instr_dual_0f_38_f1), N, N, N
4513 * Insns below are selected by the prefix which indexed by the third opcode
4516 static const struct opcode opcode_map_0f_38[256] = {
4518 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4520 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4522 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4523 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4544 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4548 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4554 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4555 unsigned size, bool sign_extension)
4557 int rc = X86EMUL_CONTINUE;
4561 op->addr.mem.ea = ctxt->_eip;
4562 /* NB. Immediates are sign-extended as necessary. */
4563 switch (op->bytes) {
4565 op->val = insn_fetch(s8, ctxt);
4568 op->val = insn_fetch(s16, ctxt);
4571 op->val = insn_fetch(s32, ctxt);
4574 op->val = insn_fetch(s64, ctxt);
4577 if (!sign_extension) {
4578 switch (op->bytes) {
4586 op->val &= 0xffffffff;
4594 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4597 int rc = X86EMUL_CONTINUE;
4601 decode_register_operand(ctxt, op);
4604 rc = decode_imm(ctxt, op, 1, false);
4607 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4611 if (ctxt->d & BitOp)
4612 fetch_bit_operand(ctxt);
4613 op->orig_val = op->val;
4616 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4620 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4621 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4622 fetch_register_operand(op);
4623 op->orig_val = op->val;
4627 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4628 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4629 fetch_register_operand(op);
4630 op->orig_val = op->val;
4633 if (ctxt->d & ByteOp) {
4638 op->bytes = ctxt->op_bytes;
4639 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4640 fetch_register_operand(op);
4641 op->orig_val = op->val;
4645 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4647 register_address(ctxt, VCPU_REGS_RDI);
4648 op->addr.mem.seg = VCPU_SREG_ES;
4655 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4656 fetch_register_operand(op);
4661 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4664 rc = decode_imm(ctxt, op, 1, true);
4672 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4675 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4678 ctxt->memop.bytes = 1;
4679 if (ctxt->memop.type == OP_REG) {
4680 ctxt->memop.addr.reg = decode_register(ctxt,
4681 ctxt->modrm_rm, true);
4682 fetch_register_operand(&ctxt->memop);
4686 ctxt->memop.bytes = 2;
4689 ctxt->memop.bytes = 4;
4692 rc = decode_imm(ctxt, op, 2, false);
4695 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4699 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4701 register_address(ctxt, VCPU_REGS_RSI);
4702 op->addr.mem.seg = ctxt->seg_override;
4708 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4711 reg_read(ctxt, VCPU_REGS_RBX) +
4712 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4713 op->addr.mem.seg = ctxt->seg_override;
4718 op->addr.mem.ea = ctxt->_eip;
4719 op->bytes = ctxt->op_bytes + 2;
4720 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4723 ctxt->memop.bytes = ctxt->op_bytes + 2;
4727 op->val = VCPU_SREG_ES;
4731 op->val = VCPU_SREG_CS;
4735 op->val = VCPU_SREG_SS;
4739 op->val = VCPU_SREG_DS;
4743 op->val = VCPU_SREG_FS;
4747 op->val = VCPU_SREG_GS;
4750 /* Special instructions do their own operand decoding. */
4752 op->type = OP_NONE; /* Disable writeback. */
4760 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
4762 int rc = X86EMUL_CONTINUE;
4763 int mode = ctxt->mode;
4764 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4765 bool op_prefix = false;
4766 bool has_seg_override = false;
4767 struct opcode opcode;
4769 struct desc_struct desc;
4771 ctxt->memop.type = OP_NONE;
4772 ctxt->memopp = NULL;
4773 ctxt->_eip = ctxt->eip;
4774 ctxt->fetch.ptr = ctxt->fetch.data;
4775 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4776 ctxt->opcode_len = 1;
4777 ctxt->intercept = x86_intercept_none;
4779 memcpy(ctxt->fetch.data, insn, insn_len);
4781 rc = __do_insn_fetch_bytes(ctxt, 1);
4782 if (rc != X86EMUL_CONTINUE)
4787 case X86EMUL_MODE_REAL:
4788 case X86EMUL_MODE_VM86:
4789 def_op_bytes = def_ad_bytes = 2;
4790 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
4792 def_op_bytes = def_ad_bytes = 4;
4794 case X86EMUL_MODE_PROT16:
4795 def_op_bytes = def_ad_bytes = 2;
4797 case X86EMUL_MODE_PROT32:
4798 def_op_bytes = def_ad_bytes = 4;
4800 #ifdef CONFIG_X86_64
4801 case X86EMUL_MODE_PROT64:
4807 return EMULATION_FAILED;
4810 ctxt->op_bytes = def_op_bytes;
4811 ctxt->ad_bytes = def_ad_bytes;
4813 /* Legacy prefixes. */
4815 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4816 case 0x66: /* operand-size override */
4818 /* switch between 2/4 bytes */
4819 ctxt->op_bytes = def_op_bytes ^ 6;
4821 case 0x67: /* address-size override */
4822 if (mode == X86EMUL_MODE_PROT64)
4823 /* switch between 4/8 bytes */
4824 ctxt->ad_bytes = def_ad_bytes ^ 12;
4826 /* switch between 2/4 bytes */
4827 ctxt->ad_bytes = def_ad_bytes ^ 6;
4829 case 0x26: /* ES override */
4830 has_seg_override = true;
4831 ctxt->seg_override = VCPU_SREG_ES;
4833 case 0x2e: /* CS override */
4834 has_seg_override = true;
4835 ctxt->seg_override = VCPU_SREG_CS;
4837 case 0x36: /* SS override */
4838 has_seg_override = true;
4839 ctxt->seg_override = VCPU_SREG_SS;
4841 case 0x3e: /* DS override */
4842 has_seg_override = true;
4843 ctxt->seg_override = VCPU_SREG_DS;
4845 case 0x64: /* FS override */
4846 has_seg_override = true;
4847 ctxt->seg_override = VCPU_SREG_FS;
4849 case 0x65: /* GS override */
4850 has_seg_override = true;
4851 ctxt->seg_override = VCPU_SREG_GS;
4853 case 0x40 ... 0x4f: /* REX */
4854 if (mode != X86EMUL_MODE_PROT64)
4856 ctxt->rex_prefix = ctxt->b;
4858 case 0xf0: /* LOCK */
4859 ctxt->lock_prefix = 1;
4861 case 0xf2: /* REPNE/REPNZ */
4862 case 0xf3: /* REP/REPE/REPZ */
4863 ctxt->rep_prefix = ctxt->b;
4869 /* Any legacy prefix after a REX prefix nullifies its effect. */
4871 ctxt->rex_prefix = 0;
4877 if (ctxt->rex_prefix & 8)
4878 ctxt->op_bytes = 8; /* REX.W */
4880 /* Opcode byte(s). */
4881 opcode = opcode_table[ctxt->b];
4882 /* Two-byte opcode? */
4883 if (ctxt->b == 0x0f) {
4884 ctxt->opcode_len = 2;
4885 ctxt->b = insn_fetch(u8, ctxt);
4886 opcode = twobyte_table[ctxt->b];
4888 /* 0F_38 opcode map */
4889 if (ctxt->b == 0x38) {
4890 ctxt->opcode_len = 3;
4891 ctxt->b = insn_fetch(u8, ctxt);
4892 opcode = opcode_map_0f_38[ctxt->b];
4895 ctxt->d = opcode.flags;
4897 if (ctxt->d & ModRM)
4898 ctxt->modrm = insn_fetch(u8, ctxt);
4900 /* vex-prefix instructions are not implemented */
4901 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4902 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4906 while (ctxt->d & GroupMask) {
4907 switch (ctxt->d & GroupMask) {
4909 goffset = (ctxt->modrm >> 3) & 7;
4910 opcode = opcode.u.group[goffset];
4913 goffset = (ctxt->modrm >> 3) & 7;
4914 if ((ctxt->modrm >> 6) == 3)
4915 opcode = opcode.u.gdual->mod3[goffset];
4917 opcode = opcode.u.gdual->mod012[goffset];
4920 goffset = ctxt->modrm & 7;
4921 opcode = opcode.u.group[goffset];
4924 if (ctxt->rep_prefix && op_prefix)
4925 return EMULATION_FAILED;
4926 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4927 switch (simd_prefix) {
4928 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4929 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4930 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4931 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4935 if (ctxt->modrm > 0xbf) {
4936 size_t size = ARRAY_SIZE(opcode.u.esc->high);
4937 u32 index = array_index_nospec(
4938 ctxt->modrm - 0xc0, size);
4940 opcode = opcode.u.esc->high[index];
4942 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4946 if ((ctxt->modrm >> 6) == 3)
4947 opcode = opcode.u.idual->mod3;
4949 opcode = opcode.u.idual->mod012;
4952 if (ctxt->mode == X86EMUL_MODE_PROT64)
4953 opcode = opcode.u.mdual->mode64;
4955 opcode = opcode.u.mdual->mode32;
4958 return EMULATION_FAILED;
4961 ctxt->d &= ~(u64)GroupMask;
4962 ctxt->d |= opcode.flags;
4965 ctxt->is_branch = opcode.flags & IsBranch;
4969 return EMULATION_FAILED;
4971 ctxt->execute = opcode.u.execute;
4973 if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
4974 likely(!(ctxt->d & EmulateOnUD)))
4975 return EMULATION_FAILED;
4977 if (unlikely(ctxt->d &
4978 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
4981 * These are copied unconditionally here, and checked unconditionally
4982 * in x86_emulate_insn.
4984 ctxt->check_perm = opcode.check_perm;
4985 ctxt->intercept = opcode.intercept;
4987 if (ctxt->d & NotImpl)
4988 return EMULATION_FAILED;
4990 if (mode == X86EMUL_MODE_PROT64) {
4991 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
4993 else if (ctxt->d & NearBranch)
4997 if (ctxt->d & Op3264) {
4998 if (mode == X86EMUL_MODE_PROT64)
5004 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5008 ctxt->op_bytes = 16;
5009 else if (ctxt->d & Mmx)
5013 /* ModRM and SIB bytes. */
5014 if (ctxt->d & ModRM) {
5015 rc = decode_modrm(ctxt, &ctxt->memop);
5016 if (!has_seg_override) {
5017 has_seg_override = true;
5018 ctxt->seg_override = ctxt->modrm_seg;
5020 } else if (ctxt->d & MemAbs)
5021 rc = decode_abs(ctxt, &ctxt->memop);
5022 if (rc != X86EMUL_CONTINUE)
5025 if (!has_seg_override)
5026 ctxt->seg_override = VCPU_SREG_DS;
5028 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5031 * Decode and fetch the source operand: register, memory
5034 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5035 if (rc != X86EMUL_CONTINUE)
5039 * Decode and fetch the second source operand: register, memory
5042 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5043 if (rc != X86EMUL_CONTINUE)
5046 /* Decode and fetch the destination operand: register or memory. */
5047 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5049 if (ctxt->rip_relative && likely(ctxt->memopp))
5050 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5051 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5054 if (rc == X86EMUL_PROPAGATE_FAULT)
5055 ctxt->have_exception = true;
5056 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5059 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5061 return ctxt->d & PageTable;
5064 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5066 /* The second termination condition only applies for REPE
5067 * and REPNE. Test if the repeat string operation prefix is
5068 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5069 * corresponding termination condition according to:
5070 * - if REPE/REPZ and ZF = 0 then done
5071 * - if REPNE/REPNZ and ZF = 1 then done
5073 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5074 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5075 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5076 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5077 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5078 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5084 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5089 rc = asm_safe("fwait");
5092 if (unlikely(rc != X86EMUL_CONTINUE))
5093 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5095 return X86EMUL_CONTINUE;
5098 static void fetch_possible_mmx_operand(struct operand *op)
5100 if (op->type == OP_MM)
5101 kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5104 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5106 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5108 if (!(ctxt->d & ByteOp))
5109 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5111 asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5112 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5113 [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5114 : "c"(ctxt->src2.val));
5116 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5117 if (!fop) /* exception is returned in fop variable */
5118 return emulate_de(ctxt);
5119 return X86EMUL_CONTINUE;
5122 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5124 /* Clear fields that are set conditionally but read without a guard. */
5125 ctxt->rip_relative = false;
5126 ctxt->rex_prefix = 0;
5127 ctxt->lock_prefix = 0;
5128 ctxt->rep_prefix = 0;
5129 ctxt->regs_valid = 0;
5130 ctxt->regs_dirty = 0;
5132 ctxt->io_read.pos = 0;
5133 ctxt->io_read.end = 0;
5134 ctxt->mem_read.end = 0;
5137 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5139 const struct x86_emulate_ops *ops = ctxt->ops;
5140 int rc = X86EMUL_CONTINUE;
5141 int saved_dst_type = ctxt->dst.type;
5142 bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt);
5144 ctxt->mem_read.pos = 0;
5146 /* LOCK prefix is allowed only with some instructions */
5147 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5148 rc = emulate_ud(ctxt);
5152 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5153 rc = emulate_ud(ctxt);
5157 if (unlikely(ctxt->d &
5158 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5159 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5160 (ctxt->d & Undefined)) {
5161 rc = emulate_ud(ctxt);
5165 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5166 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5167 rc = emulate_ud(ctxt);
5171 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5172 rc = emulate_nm(ctxt);
5176 if (ctxt->d & Mmx) {
5177 rc = flush_pending_x87_faults(ctxt);
5178 if (rc != X86EMUL_CONTINUE)
5181 * Now that we know the fpu is exception safe, we can fetch
5184 fetch_possible_mmx_operand(&ctxt->src);
5185 fetch_possible_mmx_operand(&ctxt->src2);
5186 if (!(ctxt->d & Mov))
5187 fetch_possible_mmx_operand(&ctxt->dst);
5190 if (unlikely(is_guest_mode) && ctxt->intercept) {
5191 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5192 X86_ICPT_PRE_EXCEPT);
5193 if (rc != X86EMUL_CONTINUE)
5197 /* Instruction can only be executed in protected mode */
5198 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5199 rc = emulate_ud(ctxt);
5203 /* Privileged instruction can be executed only in CPL=0 */
5204 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5205 if (ctxt->d & PrivUD)
5206 rc = emulate_ud(ctxt);
5208 rc = emulate_gp(ctxt, 0);
5212 /* Do instruction specific permission checks */
5213 if (ctxt->d & CheckPerm) {
5214 rc = ctxt->check_perm(ctxt);
5215 if (rc != X86EMUL_CONTINUE)
5219 if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5220 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5221 X86_ICPT_POST_EXCEPT);
5222 if (rc != X86EMUL_CONTINUE)
5226 if (ctxt->rep_prefix && (ctxt->d & String)) {
5227 /* All REP prefixes have the same first termination condition */
5228 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5229 string_registers_quirk(ctxt);
5230 ctxt->eip = ctxt->_eip;
5231 ctxt->eflags &= ~X86_EFLAGS_RF;
5237 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5238 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5239 ctxt->src.valptr, ctxt->src.bytes);
5240 if (rc != X86EMUL_CONTINUE)
5242 ctxt->src.orig_val64 = ctxt->src.val64;
5245 if (ctxt->src2.type == OP_MEM) {
5246 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5247 &ctxt->src2.val, ctxt->src2.bytes);
5248 if (rc != X86EMUL_CONTINUE)
5252 if ((ctxt->d & DstMask) == ImplicitOps)
5256 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5257 /* optimisation - avoid slow emulated read if Mov */
5258 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5259 &ctxt->dst.val, ctxt->dst.bytes);
5260 if (rc != X86EMUL_CONTINUE) {
5261 if (!(ctxt->d & NoWrite) &&
5262 rc == X86EMUL_PROPAGATE_FAULT &&
5263 ctxt->exception.vector == PF_VECTOR)
5264 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5268 /* Copy full 64-bit value for CMPXCHG8B. */
5269 ctxt->dst.orig_val64 = ctxt->dst.val64;
5273 if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5274 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5275 X86_ICPT_POST_MEMACCESS);
5276 if (rc != X86EMUL_CONTINUE)
5280 if (ctxt->rep_prefix && (ctxt->d & String))
5281 ctxt->eflags |= X86_EFLAGS_RF;
5283 ctxt->eflags &= ~X86_EFLAGS_RF;
5285 if (ctxt->execute) {
5286 if (ctxt->d & Fastop)
5287 rc = fastop(ctxt, ctxt->fop);
5289 rc = ctxt->execute(ctxt);
5290 if (rc != X86EMUL_CONTINUE)
5295 if (ctxt->opcode_len == 2)
5297 else if (ctxt->opcode_len == 3)
5298 goto threebyte_insn;
5301 case 0x70 ... 0x7f: /* jcc (short) */
5302 if (test_cc(ctxt->b, ctxt->eflags))
5303 rc = jmp_rel(ctxt, ctxt->src.val);
5305 case 0x8d: /* lea r16/r32, m */
5306 ctxt->dst.val = ctxt->src.addr.mem.ea;
5308 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5309 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5310 ctxt->dst.type = OP_NONE;
5314 case 0x98: /* cbw/cwde/cdqe */
5315 switch (ctxt->op_bytes) {
5316 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5317 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5318 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5321 case 0xcc: /* int3 */
5322 rc = emulate_int(ctxt, 3);
5324 case 0xcd: /* int n */
5325 rc = emulate_int(ctxt, ctxt->src.val);
5327 case 0xce: /* into */
5328 if (ctxt->eflags & X86_EFLAGS_OF)
5329 rc = emulate_int(ctxt, 4);
5331 case 0xe9: /* jmp rel */
5332 case 0xeb: /* jmp rel short */
5333 rc = jmp_rel(ctxt, ctxt->src.val);
5334 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5336 case 0xf4: /* hlt */
5337 ctxt->ops->halt(ctxt);
5339 case 0xf5: /* cmc */
5340 /* complement carry flag from eflags reg */
5341 ctxt->eflags ^= X86_EFLAGS_CF;
5343 case 0xf8: /* clc */
5344 ctxt->eflags &= ~X86_EFLAGS_CF;
5346 case 0xf9: /* stc */
5347 ctxt->eflags |= X86_EFLAGS_CF;
5349 case 0xfc: /* cld */
5350 ctxt->eflags &= ~X86_EFLAGS_DF;
5352 case 0xfd: /* std */
5353 ctxt->eflags |= X86_EFLAGS_DF;
5356 goto cannot_emulate;
5359 if (rc != X86EMUL_CONTINUE)
5363 if (ctxt->d & SrcWrite) {
5364 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5365 rc = writeback(ctxt, &ctxt->src);
5366 if (rc != X86EMUL_CONTINUE)
5369 if (!(ctxt->d & NoWrite)) {
5370 rc = writeback(ctxt, &ctxt->dst);
5371 if (rc != X86EMUL_CONTINUE)
5376 * restore dst type in case the decoding will be reused
5377 * (happens for string instruction )
5379 ctxt->dst.type = saved_dst_type;
5381 if ((ctxt->d & SrcMask) == SrcSI)
5382 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5384 if ((ctxt->d & DstMask) == DstDI)
5385 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5387 if (ctxt->rep_prefix && (ctxt->d & String)) {
5389 struct read_cache *r = &ctxt->io_read;
5390 if ((ctxt->d & SrcMask) == SrcSI)
5391 count = ctxt->src.count;
5393 count = ctxt->dst.count;
5394 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5396 if (!string_insn_completed(ctxt)) {
5398 * Re-enter guest when pio read ahead buffer is empty
5399 * or, if it is not used, after each 1024 iteration.
5401 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5402 (r->end == 0 || r->end != r->pos)) {
5404 * Reset read cache. Usually happens before
5405 * decode, but since instruction is restarted
5406 * we have to do it here.
5408 ctxt->mem_read.end = 0;
5409 writeback_registers(ctxt);
5410 return EMULATION_RESTART;
5412 goto done; /* skip rip writeback */
5414 ctxt->eflags &= ~X86_EFLAGS_RF;
5417 ctxt->eip = ctxt->_eip;
5418 if (ctxt->mode != X86EMUL_MODE_PROT64)
5419 ctxt->eip = (u32)ctxt->_eip;
5422 if (rc == X86EMUL_PROPAGATE_FAULT) {
5423 if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5424 return EMULATION_FAILED;
5425 ctxt->have_exception = true;
5427 if (rc == X86EMUL_INTERCEPTED)
5428 return EMULATION_INTERCEPTED;
5430 if (rc == X86EMUL_CONTINUE)
5431 writeback_registers(ctxt);
5433 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5437 case 0x09: /* wbinvd */
5438 (ctxt->ops->wbinvd)(ctxt);
5440 case 0x08: /* invd */
5441 case 0x0d: /* GrpP (prefetch) */
5442 case 0x18: /* Grp16 (prefetch/nop) */
5443 case 0x1f: /* nop */
5445 case 0x20: /* mov cr, reg */
5446 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5448 case 0x21: /* mov from dr to reg */
5449 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5451 case 0x40 ... 0x4f: /* cmov */
5452 if (test_cc(ctxt->b, ctxt->eflags))
5453 ctxt->dst.val = ctxt->src.val;
5454 else if (ctxt->op_bytes != 4)
5455 ctxt->dst.type = OP_NONE; /* no writeback */
5457 case 0x80 ... 0x8f: /* jnz rel, etc*/
5458 if (test_cc(ctxt->b, ctxt->eflags))
5459 rc = jmp_rel(ctxt, ctxt->src.val);
5461 case 0x90 ... 0x9f: /* setcc r/m8 */
5462 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5464 case 0xb6 ... 0xb7: /* movzx */
5465 ctxt->dst.bytes = ctxt->op_bytes;
5466 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5467 : (u16) ctxt->src.val;
5469 case 0xbe ... 0xbf: /* movsx */
5470 ctxt->dst.bytes = ctxt->op_bytes;
5471 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5472 (s16) ctxt->src.val;
5475 goto cannot_emulate;
5480 if (rc != X86EMUL_CONTINUE)
5486 return EMULATION_FAILED;
5489 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5491 invalidate_registers(ctxt);
5494 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5496 writeback_registers(ctxt);
5499 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5501 if (ctxt->rep_prefix && (ctxt->d & String))
5504 if (ctxt->d & TwoMemOp)