1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
7 * Copyright (c) 2005 Keir Fraser
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
21 #include <linux/kvm_host.h>
22 #include "kvm_cache_regs.h"
23 #include "kvm_emulate.h"
24 #include <linux/stringify.h>
25 #include <asm/debugreg.h>
26 #include <asm/nospec-branch.h>
37 #define OpImplicit 1ull /* No generic decode */
38 #define OpReg 2ull /* Register */
39 #define OpMem 3ull /* Memory */
40 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
41 #define OpDI 5ull /* ES:DI/EDI/RDI */
42 #define OpMem64 6ull /* Memory, 64-bit */
43 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
44 #define OpDX 8ull /* DX register */
45 #define OpCL 9ull /* CL register (for shifts) */
46 #define OpImmByte 10ull /* 8-bit sign extended immediate */
47 #define OpOne 11ull /* Implied 1 */
48 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
49 #define OpMem16 13ull /* Memory operand (16-bit). */
50 #define OpMem32 14ull /* Memory operand (32-bit). */
51 #define OpImmU 15ull /* Immediate operand, zero extended */
52 #define OpSI 16ull /* SI/ESI/RSI */
53 #define OpImmFAddr 17ull /* Immediate far address */
54 #define OpMemFAddr 18ull /* Far address in memory */
55 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
56 #define OpES 20ull /* ES */
57 #define OpCS 21ull /* CS */
58 #define OpSS 22ull /* SS */
59 #define OpDS 23ull /* DS */
60 #define OpFS 24ull /* FS */
61 #define OpGS 25ull /* GS */
62 #define OpMem8 26ull /* 8-bit zero extended memory operand */
63 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
64 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
65 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
66 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
68 #define OpBits 5 /* Width of operand field */
69 #define OpMask ((1ull << OpBits) - 1)
72 * Opcode effective-address decode tables.
73 * Note that we only emulate instructions that have at least one memory
74 * operand (excluding implicit stack references). We assume that stack
75 * references and instruction fetches will never occur in special memory
76 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
80 /* Operand sizes: 8-bit operands or specified/overridden size. */
81 #define ByteOp (1<<0) /* 8-bit operands. */
82 /* Destination operand type. */
84 #define ImplicitOps (OpImplicit << DstShift)
85 #define DstReg (OpReg << DstShift)
86 #define DstMem (OpMem << DstShift)
87 #define DstAcc (OpAcc << DstShift)
88 #define DstDI (OpDI << DstShift)
89 #define DstMem64 (OpMem64 << DstShift)
90 #define DstMem16 (OpMem16 << DstShift)
91 #define DstImmUByte (OpImmUByte << DstShift)
92 #define DstDX (OpDX << DstShift)
93 #define DstAccLo (OpAccLo << DstShift)
94 #define DstMask (OpMask << DstShift)
95 /* Source operand type. */
97 #define SrcNone (OpNone << SrcShift)
98 #define SrcReg (OpReg << SrcShift)
99 #define SrcMem (OpMem << SrcShift)
100 #define SrcMem16 (OpMem16 << SrcShift)
101 #define SrcMem32 (OpMem32 << SrcShift)
102 #define SrcImm (OpImm << SrcShift)
103 #define SrcImmByte (OpImmByte << SrcShift)
104 #define SrcOne (OpOne << SrcShift)
105 #define SrcImmUByte (OpImmUByte << SrcShift)
106 #define SrcImmU (OpImmU << SrcShift)
107 #define SrcSI (OpSI << SrcShift)
108 #define SrcXLat (OpXLat << SrcShift)
109 #define SrcImmFAddr (OpImmFAddr << SrcShift)
110 #define SrcMemFAddr (OpMemFAddr << SrcShift)
111 #define SrcAcc (OpAcc << SrcShift)
112 #define SrcImmU16 (OpImmU16 << SrcShift)
113 #define SrcImm64 (OpImm64 << SrcShift)
114 #define SrcDX (OpDX << SrcShift)
115 #define SrcMem8 (OpMem8 << SrcShift)
116 #define SrcAccHi (OpAccHi << SrcShift)
117 #define SrcMask (OpMask << SrcShift)
118 #define BitOp (1<<11)
119 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
120 #define String (1<<13) /* String instruction (rep capable) */
121 #define Stack (1<<14) /* Stack instruction (push/pop) */
122 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
123 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
124 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
125 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
126 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
127 #define Escape (5<<15) /* Escape to coprocessor instruction */
128 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
129 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
130 #define Sse (1<<18) /* SSE Vector instruction */
131 /* Generic ModRM decode. */
132 #define ModRM (1<<19)
133 /* Destination is only written; never read. */
136 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
137 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
138 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
139 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
140 #define Undefined (1<<25) /* No Such Instruction */
141 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
142 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
144 #define PageTable (1 << 29) /* instruction used to write page table */
145 #define NotImpl (1 << 30) /* instruction is not implemented */
146 /* Source 2 operand type */
147 #define Src2Shift (31)
148 #define Src2None (OpNone << Src2Shift)
149 #define Src2Mem (OpMem << Src2Shift)
150 #define Src2CL (OpCL << Src2Shift)
151 #define Src2ImmByte (OpImmByte << Src2Shift)
152 #define Src2One (OpOne << Src2Shift)
153 #define Src2Imm (OpImm << Src2Shift)
154 #define Src2ES (OpES << Src2Shift)
155 #define Src2CS (OpCS << Src2Shift)
156 #define Src2SS (OpSS << Src2Shift)
157 #define Src2DS (OpDS << Src2Shift)
158 #define Src2FS (OpFS << Src2Shift)
159 #define Src2GS (OpGS << Src2Shift)
160 #define Src2Mask (OpMask << Src2Shift)
161 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
162 #define AlignMask ((u64)7 << 41)
163 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
164 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
165 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
166 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
167 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
168 #define NoWrite ((u64)1 << 45) /* No writeback */
169 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
170 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
171 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
172 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
173 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
174 #define NearBranch ((u64)1 << 52) /* Near branches */
175 #define No16 ((u64)1 << 53) /* No 16 bit operand */
176 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
177 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
179 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
181 #define X2(x...) x, x
182 #define X3(x...) X2(x), x
183 #define X4(x...) X2(x), X2(x)
184 #define X5(x...) X4(x), x
185 #define X6(x...) X4(x), X2(x)
186 #define X7(x...) X4(x), X3(x)
187 #define X8(x...) X4(x), X4(x)
188 #define X16(x...) X8(x), X8(x)
190 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
191 #define FASTOP_SIZE 8
197 int (*execute)(struct x86_emulate_ctxt *ctxt);
198 const struct opcode *group;
199 const struct group_dual *gdual;
200 const struct gprefix *gprefix;
201 const struct escape *esc;
202 const struct instr_dual *idual;
203 const struct mode_dual *mdual;
204 void (*fastop)(struct fastop *fake);
206 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
210 struct opcode mod012[8];
211 struct opcode mod3[8];
215 struct opcode pfx_no;
216 struct opcode pfx_66;
217 struct opcode pfx_f2;
218 struct opcode pfx_f3;
223 struct opcode high[64];
227 struct opcode mod012;
232 struct opcode mode32;
233 struct opcode mode64;
236 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
238 enum x86_transfer_type {
240 X86_TRANSFER_CALL_JMP,
242 X86_TRANSFER_TASK_SWITCH,
245 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
247 if (!(ctxt->regs_valid & (1 << nr))) {
248 ctxt->regs_valid |= 1 << nr;
249 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
251 return ctxt->_regs[nr];
254 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
256 ctxt->regs_valid |= 1 << nr;
257 ctxt->regs_dirty |= 1 << nr;
258 return &ctxt->_regs[nr];
261 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
264 return reg_write(ctxt, nr);
267 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
271 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
272 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
275 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
277 ctxt->regs_dirty = 0;
278 ctxt->regs_valid = 0;
282 * These EFLAGS bits are restored from saved value during emulation, and
283 * any changes are written back to the saved value after emulation.
285 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
286 X86_EFLAGS_PF|X86_EFLAGS_CF)
295 * fastop functions have a special calling convention:
300 * flags: rflags (in/out)
301 * ex: rsi (in:fastop pointer, out:zero if exception)
303 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
304 * different operand sizes can be reached by calculation, rather than a jump
305 * table (which would be bigger than the code).
307 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
309 #define __FOP_FUNC(name) \
310 ".align " __stringify(FASTOP_SIZE) " \n\t" \
311 ".type " name ", @function \n\t" \
314 #define FOP_FUNC(name) \
317 #define __FOP_RET(name) \
319 ".size " name ", .-" name "\n\t"
321 #define FOP_RET(name) \
324 #define FOP_START(op) \
325 extern void em_##op(struct fastop *fake); \
326 asm(".pushsection .text, \"ax\" \n\t" \
327 ".global em_" #op " \n\t" \
328 ".align " __stringify(FASTOP_SIZE) " \n\t" \
334 #define __FOPNOP(name) \
339 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
341 #define FOP1E(op, dst) \
342 __FOP_FUNC(#op "_" #dst) \
343 "10: " #op " %" #dst " \n\t" \
344 __FOP_RET(#op "_" #dst)
346 #define FOP1EEX(op, dst) \
347 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
349 #define FASTOP1(op) \
354 ON64(FOP1E(op##q, rax)) \
357 /* 1-operand, using src2 (for MUL/DIV r/m) */
358 #define FASTOP1SRC2(op, name) \
363 ON64(FOP1E(op, rcx)) \
366 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
367 #define FASTOP1SRC2EX(op, name) \
372 ON64(FOP1EEX(op, rcx)) \
375 #define FOP2E(op, dst, src) \
376 __FOP_FUNC(#op "_" #dst "_" #src) \
377 #op " %" #src ", %" #dst " \n\t" \
378 __FOP_RET(#op "_" #dst "_" #src)
380 #define FASTOP2(op) \
382 FOP2E(op##b, al, dl) \
383 FOP2E(op##w, ax, dx) \
384 FOP2E(op##l, eax, edx) \
385 ON64(FOP2E(op##q, rax, rdx)) \
388 /* 2 operand, word only */
389 #define FASTOP2W(op) \
392 FOP2E(op##w, ax, dx) \
393 FOP2E(op##l, eax, edx) \
394 ON64(FOP2E(op##q, rax, rdx)) \
397 /* 2 operand, src is CL */
398 #define FASTOP2CL(op) \
400 FOP2E(op##b, al, cl) \
401 FOP2E(op##w, ax, cl) \
402 FOP2E(op##l, eax, cl) \
403 ON64(FOP2E(op##q, rax, cl)) \
406 /* 2 operand, src and dest are reversed */
407 #define FASTOP2R(op, name) \
409 FOP2E(op##b, dl, al) \
410 FOP2E(op##w, dx, ax) \
411 FOP2E(op##l, edx, eax) \
412 ON64(FOP2E(op##q, rdx, rax)) \
415 #define FOP3E(op, dst, src, src2) \
416 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
417 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
418 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
420 /* 3-operand, word-only, src2=cl */
421 #define FASTOP3WCL(op) \
424 FOP3E(op##w, ax, dx, cl) \
425 FOP3E(op##l, eax, edx, cl) \
426 ON64(FOP3E(op##q, rax, rdx, cl)) \
429 /* Special case for SETcc - 1 instruction per cc */
430 #define FOP_SETCC(op) \
432 ".type " #op ", @function \n\t" \
437 asm(".pushsection .fixup, \"ax\"\n"
438 "kvm_fastop_exception: xor %esi, %esi; ret\n"
462 "pushf; sbb %al, %al; popf \n\t"
467 * XXX: inoutclob user must know where the argument is being expanded.
468 * Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault.
470 #define asm_safe(insn, inoutclob...) \
474 asm volatile("1:" insn "\n" \
476 ".pushsection .fixup, \"ax\"\n" \
477 "3: movl $1, %[_fault]\n" \
480 _ASM_EXTABLE(1b, 3b) \
481 : [_fault] "+qm"(_fault) inoutclob ); \
483 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
486 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
487 enum x86_intercept intercept,
488 enum x86_intercept_stage stage)
490 struct x86_instruction_info info = {
491 .intercept = intercept,
492 .rep_prefix = ctxt->rep_prefix,
493 .modrm_mod = ctxt->modrm_mod,
494 .modrm_reg = ctxt->modrm_reg,
495 .modrm_rm = ctxt->modrm_rm,
496 .src_val = ctxt->src.val64,
497 .dst_val = ctxt->dst.val64,
498 .src_bytes = ctxt->src.bytes,
499 .dst_bytes = ctxt->dst.bytes,
500 .ad_bytes = ctxt->ad_bytes,
501 .next_rip = ctxt->eip,
504 return ctxt->ops->intercept(ctxt, &info, stage);
507 static void assign_masked(ulong *dest, ulong src, ulong mask)
509 *dest = (*dest & ~mask) | (src & mask);
512 static void assign_register(unsigned long *reg, u64 val, int bytes)
514 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
517 *(u8 *)reg = (u8)val;
520 *(u16 *)reg = (u16)val;
524 break; /* 64b: zero-extend */
531 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
533 return (1UL << (ctxt->ad_bytes << 3)) - 1;
536 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
539 struct desc_struct ss;
541 if (ctxt->mode == X86EMUL_MODE_PROT64)
543 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
544 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
547 static int stack_size(struct x86_emulate_ctxt *ctxt)
549 return (__fls(stack_mask(ctxt)) + 1) >> 3;
552 /* Access/update address held in a register, based on addressing mode. */
553 static inline unsigned long
554 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
556 if (ctxt->ad_bytes == sizeof(unsigned long))
559 return reg & ad_mask(ctxt);
562 static inline unsigned long
563 register_address(struct x86_emulate_ctxt *ctxt, int reg)
565 return address_mask(ctxt, reg_read(ctxt, reg));
568 static void masked_increment(ulong *reg, ulong mask, int inc)
570 assign_masked(reg, *reg + inc, mask);
574 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
576 ulong *preg = reg_rmw(ctxt, reg);
578 assign_register(preg, *preg + inc, ctxt->ad_bytes);
581 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
583 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
586 static u32 desc_limit_scaled(struct desc_struct *desc)
588 u32 limit = get_desc_limit(desc);
590 return desc->g ? (limit << 12) | 0xfff : limit;
593 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
595 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
598 return ctxt->ops->get_cached_segment_base(ctxt, seg);
601 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
602 u32 error, bool valid)
605 ctxt->exception.vector = vec;
606 ctxt->exception.error_code = error;
607 ctxt->exception.error_code_valid = valid;
608 return X86EMUL_PROPAGATE_FAULT;
611 static int emulate_db(struct x86_emulate_ctxt *ctxt)
613 return emulate_exception(ctxt, DB_VECTOR, 0, false);
616 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
618 return emulate_exception(ctxt, GP_VECTOR, err, true);
621 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
623 return emulate_exception(ctxt, SS_VECTOR, err, true);
626 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
628 return emulate_exception(ctxt, UD_VECTOR, 0, false);
631 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
633 return emulate_exception(ctxt, TS_VECTOR, err, true);
636 static int emulate_de(struct x86_emulate_ctxt *ctxt)
638 return emulate_exception(ctxt, DE_VECTOR, 0, false);
641 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
643 return emulate_exception(ctxt, NM_VECTOR, 0, false);
646 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
649 struct desc_struct desc;
651 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
655 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
660 struct desc_struct desc;
662 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
663 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
666 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
668 return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
671 static inline bool emul_is_noncanonical_address(u64 la,
672 struct x86_emulate_ctxt *ctxt)
674 return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
678 * x86 defines three classes of vector instructions: explicitly
679 * aligned, explicitly unaligned, and the rest, which change behaviour
680 * depending on whether they're AVX encoded or not.
682 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
683 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
684 * 512 bytes of data must be aligned to a 16 byte boundary.
686 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
688 u64 alignment = ctxt->d & AlignMask;
690 if (likely(size < 16))
705 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
706 struct segmented_address addr,
707 unsigned *max_size, unsigned size,
708 bool write, bool fetch,
709 enum x86emul_mode mode, ulong *linear)
711 struct desc_struct desc;
718 la = seg_base(ctxt, addr.seg) + addr.ea;
721 case X86EMUL_MODE_PROT64:
723 va_bits = ctxt_virt_addr_bits(ctxt);
724 if (get_canonical(la, va_bits) != la)
727 *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
728 if (size > *max_size)
732 *linear = la = (u32)la;
733 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
737 /* code segment in protected mode or read-only data segment */
738 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
739 || !(desc.type & 2)) && write)
741 /* unreadable code segment */
742 if (!fetch && (desc.type & 8) && !(desc.type & 2))
744 lim = desc_limit_scaled(&desc);
745 if (!(desc.type & 8) && (desc.type & 4)) {
746 /* expand-down segment */
749 lim = desc.d ? 0xffffffff : 0xffff;
753 if (lim == 0xffffffff)
756 *max_size = (u64)lim + 1 - addr.ea;
757 if (size > *max_size)
762 if (la & (insn_alignment(ctxt, size) - 1))
763 return emulate_gp(ctxt, 0);
764 return X86EMUL_CONTINUE;
766 if (addr.seg == VCPU_SREG_SS)
767 return emulate_ss(ctxt, 0);
769 return emulate_gp(ctxt, 0);
772 static int linearize(struct x86_emulate_ctxt *ctxt,
773 struct segmented_address addr,
774 unsigned size, bool write,
778 return __linearize(ctxt, addr, &max_size, size, write, false,
782 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
783 enum x86emul_mode mode)
788 struct segmented_address addr = { .seg = VCPU_SREG_CS,
791 if (ctxt->op_bytes != sizeof(unsigned long))
792 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
793 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
794 if (rc == X86EMUL_CONTINUE)
795 ctxt->_eip = addr.ea;
799 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
801 return assign_eip(ctxt, dst, ctxt->mode);
804 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
805 const struct desc_struct *cs_desc)
807 enum x86emul_mode mode = ctxt->mode;
811 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
815 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
817 mode = X86EMUL_MODE_PROT64;
819 mode = X86EMUL_MODE_PROT32; /* temporary value */
822 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
823 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
824 rc = assign_eip(ctxt, dst, mode);
825 if (rc == X86EMUL_CONTINUE)
830 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
832 return assign_eip_near(ctxt, ctxt->_eip + rel);
835 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
836 void *data, unsigned size)
838 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
841 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
842 ulong linear, void *data,
845 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
848 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
849 struct segmented_address addr,
856 rc = linearize(ctxt, addr, size, false, &linear);
857 if (rc != X86EMUL_CONTINUE)
859 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
862 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
863 struct segmented_address addr,
870 rc = linearize(ctxt, addr, size, true, &linear);
871 if (rc != X86EMUL_CONTINUE)
873 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
877 * Prefetch the remaining bytes of the instruction without crossing page
878 * boundary if they are not in fetch_cache yet.
880 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
883 unsigned size, max_size;
884 unsigned long linear;
885 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
886 struct segmented_address addr = { .seg = VCPU_SREG_CS,
887 .ea = ctxt->eip + cur_size };
890 * We do not know exactly how many bytes will be needed, and
891 * __linearize is expensive, so fetch as much as possible. We
892 * just have to avoid going beyond the 15 byte limit, the end
893 * of the segment, or the end of the page.
895 * __linearize is called with size 0 so that it does not do any
896 * boundary check itself. Instead, we use max_size to check
899 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
901 if (unlikely(rc != X86EMUL_CONTINUE))
904 size = min_t(unsigned, 15UL ^ cur_size, max_size);
905 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
908 * One instruction can only straddle two pages,
909 * and one has been loaded at the beginning of
910 * x86_decode_insn. So, if not enough bytes
911 * still, we must have hit the 15-byte boundary.
913 if (unlikely(size < op_size))
914 return emulate_gp(ctxt, 0);
916 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
917 size, &ctxt->exception);
918 if (unlikely(rc != X86EMUL_CONTINUE))
920 ctxt->fetch.end += size;
921 return X86EMUL_CONTINUE;
924 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
927 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
929 if (unlikely(done_size < size))
930 return __do_insn_fetch_bytes(ctxt, size - done_size);
932 return X86EMUL_CONTINUE;
935 /* Fetch next part of the instruction being emulated. */
936 #define insn_fetch(_type, _ctxt) \
939 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
940 if (rc != X86EMUL_CONTINUE) \
942 ctxt->_eip += sizeof(_type); \
943 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
944 ctxt->fetch.ptr += sizeof(_type); \
948 #define insn_fetch_arr(_arr, _size, _ctxt) \
950 rc = do_insn_fetch_bytes(_ctxt, _size); \
951 if (rc != X86EMUL_CONTINUE) \
953 ctxt->_eip += (_size); \
954 memcpy(_arr, ctxt->fetch.ptr, _size); \
955 ctxt->fetch.ptr += (_size); \
959 * Given the 'reg' portion of a ModRM byte, and a register block, return a
960 * pointer into the block that addresses the relevant register.
961 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
963 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
967 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
969 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
970 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
972 p = reg_rmw(ctxt, modrm_reg);
976 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
977 struct segmented_address addr,
978 u16 *size, unsigned long *address, int op_bytes)
985 rc = segmented_read_std(ctxt, addr, size, 2);
986 if (rc != X86EMUL_CONTINUE)
989 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1003 FASTOP1SRC2(mul, mul_ex);
1004 FASTOP1SRC2(imul, imul_ex);
1005 FASTOP1SRC2EX(div, div_ex);
1006 FASTOP1SRC2EX(idiv, idiv_ex);
1035 FASTOP2R(cmp, cmp_r);
1037 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1039 /* If src is zero, do not writeback, but update flags */
1040 if (ctxt->src.val == 0)
1041 ctxt->dst.type = OP_NONE;
1042 return fastop(ctxt, em_bsf);
1045 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1047 /* If src is zero, do not writeback, but update flags */
1048 if (ctxt->src.val == 0)
1049 ctxt->dst.type = OP_NONE;
1050 return fastop(ctxt, em_bsr);
1053 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1056 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1058 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1059 asm("push %[flags]; popf; " CALL_NOSPEC
1060 : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1064 static void fetch_register_operand(struct operand *op)
1066 switch (op->bytes) {
1068 op->val = *(u8 *)op->addr.reg;
1071 op->val = *(u16 *)op->addr.reg;
1074 op->val = *(u32 *)op->addr.reg;
1077 op->val = *(u64 *)op->addr.reg;
1082 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1084 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1085 return emulate_nm(ctxt);
1088 asm volatile("fninit");
1090 return X86EMUL_CONTINUE;
1093 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1097 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1098 return emulate_nm(ctxt);
1101 asm volatile("fnstcw %0": "+m"(fcw));
1104 ctxt->dst.val = fcw;
1106 return X86EMUL_CONTINUE;
1109 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1113 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1114 return emulate_nm(ctxt);
1117 asm volatile("fnstsw %0": "+m"(fsw));
1120 ctxt->dst.val = fsw;
1122 return X86EMUL_CONTINUE;
1125 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1128 unsigned reg = ctxt->modrm_reg;
1130 if (!(ctxt->d & ModRM))
1131 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1133 if (ctxt->d & Sse) {
1137 kvm_read_sse_reg(reg, &op->vec_val);
1140 if (ctxt->d & Mmx) {
1149 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1150 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1152 fetch_register_operand(op);
1153 op->orig_val = op->val;
1156 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1158 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1159 ctxt->modrm_seg = VCPU_SREG_SS;
1162 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1166 int index_reg, base_reg, scale;
1167 int rc = X86EMUL_CONTINUE;
1170 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1171 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1172 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1174 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1175 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1176 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1177 ctxt->modrm_seg = VCPU_SREG_DS;
1179 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1181 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1182 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1184 if (ctxt->d & Sse) {
1187 op->addr.xmm = ctxt->modrm_rm;
1188 kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1191 if (ctxt->d & Mmx) {
1194 op->addr.mm = ctxt->modrm_rm & 7;
1197 fetch_register_operand(op);
1203 if (ctxt->ad_bytes == 2) {
1204 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1205 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1206 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1207 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1209 /* 16-bit ModR/M decode. */
1210 switch (ctxt->modrm_mod) {
1212 if (ctxt->modrm_rm == 6)
1213 modrm_ea += insn_fetch(u16, ctxt);
1216 modrm_ea += insn_fetch(s8, ctxt);
1219 modrm_ea += insn_fetch(u16, ctxt);
1222 switch (ctxt->modrm_rm) {
1224 modrm_ea += bx + si;
1227 modrm_ea += bx + di;
1230 modrm_ea += bp + si;
1233 modrm_ea += bp + di;
1242 if (ctxt->modrm_mod != 0)
1249 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1250 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1251 ctxt->modrm_seg = VCPU_SREG_SS;
1252 modrm_ea = (u16)modrm_ea;
1254 /* 32/64-bit ModR/M decode. */
1255 if ((ctxt->modrm_rm & 7) == 4) {
1256 sib = insn_fetch(u8, ctxt);
1257 index_reg |= (sib >> 3) & 7;
1258 base_reg |= sib & 7;
1261 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1262 modrm_ea += insn_fetch(s32, ctxt);
1264 modrm_ea += reg_read(ctxt, base_reg);
1265 adjust_modrm_seg(ctxt, base_reg);
1266 /* Increment ESP on POP [ESP] */
1267 if ((ctxt->d & IncSP) &&
1268 base_reg == VCPU_REGS_RSP)
1269 modrm_ea += ctxt->op_bytes;
1272 modrm_ea += reg_read(ctxt, index_reg) << scale;
1273 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1274 modrm_ea += insn_fetch(s32, ctxt);
1275 if (ctxt->mode == X86EMUL_MODE_PROT64)
1276 ctxt->rip_relative = 1;
1278 base_reg = ctxt->modrm_rm;
1279 modrm_ea += reg_read(ctxt, base_reg);
1280 adjust_modrm_seg(ctxt, base_reg);
1282 switch (ctxt->modrm_mod) {
1284 modrm_ea += insn_fetch(s8, ctxt);
1287 modrm_ea += insn_fetch(s32, ctxt);
1291 op->addr.mem.ea = modrm_ea;
1292 if (ctxt->ad_bytes != 8)
1293 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1299 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1302 int rc = X86EMUL_CONTINUE;
1305 switch (ctxt->ad_bytes) {
1307 op->addr.mem.ea = insn_fetch(u16, ctxt);
1310 op->addr.mem.ea = insn_fetch(u32, ctxt);
1313 op->addr.mem.ea = insn_fetch(u64, ctxt);
1320 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1324 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1325 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1327 if (ctxt->src.bytes == 2)
1328 sv = (s16)ctxt->src.val & (s16)mask;
1329 else if (ctxt->src.bytes == 4)
1330 sv = (s32)ctxt->src.val & (s32)mask;
1332 sv = (s64)ctxt->src.val & (s64)mask;
1334 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1335 ctxt->dst.addr.mem.ea + (sv >> 3));
1338 /* only subword offset */
1339 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1342 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1343 unsigned long addr, void *dest, unsigned size)
1346 struct read_cache *mc = &ctxt->mem_read;
1348 if (mc->pos < mc->end)
1351 WARN_ON((mc->end + size) >= sizeof(mc->data));
1353 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1355 if (rc != X86EMUL_CONTINUE)
1361 memcpy(dest, mc->data + mc->pos, size);
1363 return X86EMUL_CONTINUE;
1366 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1367 struct segmented_address addr,
1374 rc = linearize(ctxt, addr, size, false, &linear);
1375 if (rc != X86EMUL_CONTINUE)
1377 return read_emulated(ctxt, linear, data, size);
1380 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1381 struct segmented_address addr,
1388 rc = linearize(ctxt, addr, size, true, &linear);
1389 if (rc != X86EMUL_CONTINUE)
1391 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1395 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1396 struct segmented_address addr,
1397 const void *orig_data, const void *data,
1403 rc = linearize(ctxt, addr, size, true, &linear);
1404 if (rc != X86EMUL_CONTINUE)
1406 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1407 size, &ctxt->exception);
1410 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1411 unsigned int size, unsigned short port,
1414 struct read_cache *rc = &ctxt->io_read;
1416 if (rc->pos == rc->end) { /* refill pio read ahead */
1417 unsigned int in_page, n;
1418 unsigned int count = ctxt->rep_prefix ?
1419 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1420 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1421 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1422 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1423 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1426 rc->pos = rc->end = 0;
1427 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1432 if (ctxt->rep_prefix && (ctxt->d & String) &&
1433 !(ctxt->eflags & X86_EFLAGS_DF)) {
1434 ctxt->dst.data = rc->data + rc->pos;
1435 ctxt->dst.type = OP_MEM_STR;
1436 ctxt->dst.count = (rc->end - rc->pos) / size;
1439 memcpy(dest, rc->data + rc->pos, size);
1445 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1446 u16 index, struct desc_struct *desc)
1451 ctxt->ops->get_idt(ctxt, &dt);
1453 if (dt.size < index * 8 + 7)
1454 return emulate_gp(ctxt, index << 3 | 0x2);
1456 addr = dt.address + index * 8;
1457 return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1460 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1461 u16 selector, struct desc_ptr *dt)
1463 const struct x86_emulate_ops *ops = ctxt->ops;
1466 if (selector & 1 << 2) {
1467 struct desc_struct desc;
1470 memset(dt, 0, sizeof(*dt));
1471 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1475 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1476 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1478 ops->get_gdt(ctxt, dt);
1481 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1482 u16 selector, ulong *desc_addr_p)
1485 u16 index = selector >> 3;
1488 get_descriptor_table_ptr(ctxt, selector, &dt);
1490 if (dt.size < index * 8 + 7)
1491 return emulate_gp(ctxt, selector & 0xfffc);
1493 addr = dt.address + index * 8;
1495 #ifdef CONFIG_X86_64
1496 if (addr >> 32 != 0) {
1499 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1500 if (!(efer & EFER_LMA))
1505 *desc_addr_p = addr;
1506 return X86EMUL_CONTINUE;
1509 /* allowed just for 8 bytes segments */
1510 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1511 u16 selector, struct desc_struct *desc,
1516 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1517 if (rc != X86EMUL_CONTINUE)
1520 return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1523 /* allowed just for 8 bytes segments */
1524 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1525 u16 selector, struct desc_struct *desc)
1530 rc = get_descriptor_ptr(ctxt, selector, &addr);
1531 if (rc != X86EMUL_CONTINUE)
1534 return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1537 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1538 u16 selector, int seg, u8 cpl,
1539 enum x86_transfer_type transfer,
1540 struct desc_struct *desc)
1542 struct desc_struct seg_desc, old_desc;
1544 unsigned err_vec = GP_VECTOR;
1546 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1552 memset(&seg_desc, 0, sizeof(seg_desc));
1554 if (ctxt->mode == X86EMUL_MODE_REAL) {
1555 /* set real mode segment descriptor (keep limit etc. for
1557 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1558 set_desc_base(&seg_desc, selector << 4);
1560 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1561 /* VM86 needs a clean new segment descriptor */
1562 set_desc_base(&seg_desc, selector << 4);
1563 set_desc_limit(&seg_desc, 0xffff);
1573 /* TR should be in GDT only */
1574 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1577 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1578 if (null_selector) {
1579 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1582 if (seg == VCPU_SREG_SS) {
1583 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1587 * ctxt->ops->set_segment expects the CPL to be in
1588 * SS.DPL, so fake an expand-up 32-bit data segment.
1598 /* Skip all following checks */
1602 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1603 if (ret != X86EMUL_CONTINUE)
1606 err_code = selector & 0xfffc;
1607 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1610 /* can't load system descriptor into segment selector */
1611 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1612 if (transfer == X86_TRANSFER_CALL_JMP)
1613 return X86EMUL_UNHANDLEABLE;
1618 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1627 * segment is not a writable data segment or segment
1628 * selector's RPL != CPL or segment selector's RPL != CPL
1630 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1634 if (!(seg_desc.type & 8))
1637 if (seg_desc.type & 4) {
1643 if (rpl > cpl || dpl != cpl)
1646 /* in long-mode d/b must be clear if l is set */
1647 if (seg_desc.d && seg_desc.l) {
1650 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1651 if (efer & EFER_LMA)
1655 /* CS(RPL) <- CPL */
1656 selector = (selector & 0xfffc) | cpl;
1659 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1661 old_desc = seg_desc;
1662 seg_desc.type |= 2; /* busy */
1663 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1664 sizeof(seg_desc), &ctxt->exception);
1665 if (ret != X86EMUL_CONTINUE)
1668 case VCPU_SREG_LDTR:
1669 if (seg_desc.s || seg_desc.type != 2)
1672 default: /* DS, ES, FS, or GS */
1674 * segment is not a data or readable code segment or
1675 * ((segment is a data or nonconforming code segment)
1676 * and (both RPL and CPL > DPL))
1678 if ((seg_desc.type & 0xa) == 0x8 ||
1679 (((seg_desc.type & 0xc) != 0xc) &&
1680 (rpl > dpl && cpl > dpl)))
1686 /* mark segment as accessed */
1687 if (!(seg_desc.type & 1)) {
1689 ret = write_segment_descriptor(ctxt, selector,
1691 if (ret != X86EMUL_CONTINUE)
1694 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1695 ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1696 if (ret != X86EMUL_CONTINUE)
1698 if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1699 ((u64)base3 << 32), ctxt))
1700 return emulate_gp(ctxt, 0);
1703 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1706 return X86EMUL_CONTINUE;
1708 return emulate_exception(ctxt, err_vec, err_code, true);
1711 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1712 u16 selector, int seg)
1714 u8 cpl = ctxt->ops->cpl(ctxt);
1717 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1718 * they can load it at CPL<3 (Intel's manual says only LSS can,
1721 * However, the Intel manual says that putting IST=1/DPL=3 in
1722 * an interrupt gate will result in SS=3 (the AMD manual instead
1723 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1724 * and only forbid it here.
1726 if (seg == VCPU_SREG_SS && selector == 3 &&
1727 ctxt->mode == X86EMUL_MODE_PROT64)
1728 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1730 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1731 X86_TRANSFER_NONE, NULL);
1734 static void write_register_operand(struct operand *op)
1736 return assign_register(op->addr.reg, op->val, op->bytes);
1739 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1743 write_register_operand(op);
1746 if (ctxt->lock_prefix)
1747 return segmented_cmpxchg(ctxt,
1753 return segmented_write(ctxt,
1759 return segmented_write(ctxt,
1762 op->bytes * op->count);
1765 kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1768 kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1776 return X86EMUL_CONTINUE;
1779 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1781 struct segmented_address addr;
1783 rsp_increment(ctxt, -bytes);
1784 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1785 addr.seg = VCPU_SREG_SS;
1787 return segmented_write(ctxt, addr, data, bytes);
1790 static int em_push(struct x86_emulate_ctxt *ctxt)
1792 /* Disable writeback. */
1793 ctxt->dst.type = OP_NONE;
1794 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1797 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1798 void *dest, int len)
1801 struct segmented_address addr;
1803 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1804 addr.seg = VCPU_SREG_SS;
1805 rc = segmented_read(ctxt, addr, dest, len);
1806 if (rc != X86EMUL_CONTINUE)
1809 rsp_increment(ctxt, len);
1813 static int em_pop(struct x86_emulate_ctxt *ctxt)
1815 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1818 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1819 void *dest, int len)
1822 unsigned long val, change_mask;
1823 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1824 int cpl = ctxt->ops->cpl(ctxt);
1826 rc = emulate_pop(ctxt, &val, len);
1827 if (rc != X86EMUL_CONTINUE)
1830 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1831 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1832 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1833 X86_EFLAGS_AC | X86_EFLAGS_ID;
1835 switch(ctxt->mode) {
1836 case X86EMUL_MODE_PROT64:
1837 case X86EMUL_MODE_PROT32:
1838 case X86EMUL_MODE_PROT16:
1840 change_mask |= X86_EFLAGS_IOPL;
1842 change_mask |= X86_EFLAGS_IF;
1844 case X86EMUL_MODE_VM86:
1846 return emulate_gp(ctxt, 0);
1847 change_mask |= X86_EFLAGS_IF;
1849 default: /* real mode */
1850 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1854 *(unsigned long *)dest =
1855 (ctxt->eflags & ~change_mask) | (val & change_mask);
1860 static int em_popf(struct x86_emulate_ctxt *ctxt)
1862 ctxt->dst.type = OP_REG;
1863 ctxt->dst.addr.reg = &ctxt->eflags;
1864 ctxt->dst.bytes = ctxt->op_bytes;
1865 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1868 static int em_enter(struct x86_emulate_ctxt *ctxt)
1871 unsigned frame_size = ctxt->src.val;
1872 unsigned nesting_level = ctxt->src2.val & 31;
1876 return X86EMUL_UNHANDLEABLE;
1878 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1879 rc = push(ctxt, &rbp, stack_size(ctxt));
1880 if (rc != X86EMUL_CONTINUE)
1882 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1884 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1885 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1887 return X86EMUL_CONTINUE;
1890 static int em_leave(struct x86_emulate_ctxt *ctxt)
1892 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1894 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1897 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1899 int seg = ctxt->src2.val;
1901 ctxt->src.val = get_segment_selector(ctxt, seg);
1902 if (ctxt->op_bytes == 4) {
1903 rsp_increment(ctxt, -2);
1907 return em_push(ctxt);
1910 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1912 int seg = ctxt->src2.val;
1913 unsigned long selector;
1916 rc = emulate_pop(ctxt, &selector, 2);
1917 if (rc != X86EMUL_CONTINUE)
1920 if (ctxt->modrm_reg == VCPU_SREG_SS)
1921 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1922 if (ctxt->op_bytes > 2)
1923 rsp_increment(ctxt, ctxt->op_bytes - 2);
1925 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1929 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1931 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1932 int rc = X86EMUL_CONTINUE;
1933 int reg = VCPU_REGS_RAX;
1935 while (reg <= VCPU_REGS_RDI) {
1936 (reg == VCPU_REGS_RSP) ?
1937 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1940 if (rc != X86EMUL_CONTINUE)
1949 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1951 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1952 return em_push(ctxt);
1955 static int em_popa(struct x86_emulate_ctxt *ctxt)
1957 int rc = X86EMUL_CONTINUE;
1958 int reg = VCPU_REGS_RDI;
1961 while (reg >= VCPU_REGS_RAX) {
1962 if (reg == VCPU_REGS_RSP) {
1963 rsp_increment(ctxt, ctxt->op_bytes);
1967 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
1968 if (rc != X86EMUL_CONTINUE)
1970 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
1976 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1978 const struct x86_emulate_ops *ops = ctxt->ops;
1985 /* TODO: Add limit checks */
1986 ctxt->src.val = ctxt->eflags;
1988 if (rc != X86EMUL_CONTINUE)
1991 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
1993 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1995 if (rc != X86EMUL_CONTINUE)
1998 ctxt->src.val = ctxt->_eip;
2000 if (rc != X86EMUL_CONTINUE)
2003 ops->get_idt(ctxt, &dt);
2005 eip_addr = dt.address + (irq << 2);
2006 cs_addr = dt.address + (irq << 2) + 2;
2008 rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2009 if (rc != X86EMUL_CONTINUE)
2012 rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2013 if (rc != X86EMUL_CONTINUE)
2016 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2017 if (rc != X86EMUL_CONTINUE)
2025 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2029 invalidate_registers(ctxt);
2030 rc = __emulate_int_real(ctxt, irq);
2031 if (rc == X86EMUL_CONTINUE)
2032 writeback_registers(ctxt);
2036 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2038 switch(ctxt->mode) {
2039 case X86EMUL_MODE_REAL:
2040 return __emulate_int_real(ctxt, irq);
2041 case X86EMUL_MODE_VM86:
2042 case X86EMUL_MODE_PROT16:
2043 case X86EMUL_MODE_PROT32:
2044 case X86EMUL_MODE_PROT64:
2046 /* Protected mode interrupts unimplemented yet */
2047 return X86EMUL_UNHANDLEABLE;
2051 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2053 int rc = X86EMUL_CONTINUE;
2054 unsigned long temp_eip = 0;
2055 unsigned long temp_eflags = 0;
2056 unsigned long cs = 0;
2057 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2058 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2059 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2060 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2061 X86_EFLAGS_AC | X86_EFLAGS_ID |
2063 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2066 /* TODO: Add stack limit check */
2068 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2070 if (rc != X86EMUL_CONTINUE)
2073 if (temp_eip & ~0xffff)
2074 return emulate_gp(ctxt, 0);
2076 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2078 if (rc != X86EMUL_CONTINUE)
2081 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2083 if (rc != X86EMUL_CONTINUE)
2086 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2088 if (rc != X86EMUL_CONTINUE)
2091 ctxt->_eip = temp_eip;
2093 if (ctxt->op_bytes == 4)
2094 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2095 else if (ctxt->op_bytes == 2) {
2096 ctxt->eflags &= ~0xffff;
2097 ctxt->eflags |= temp_eflags;
2100 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2101 ctxt->eflags |= X86_EFLAGS_FIXED;
2102 ctxt->ops->set_nmi_mask(ctxt, false);
2107 static int em_iret(struct x86_emulate_ctxt *ctxt)
2109 switch(ctxt->mode) {
2110 case X86EMUL_MODE_REAL:
2111 return emulate_iret_real(ctxt);
2112 case X86EMUL_MODE_VM86:
2113 case X86EMUL_MODE_PROT16:
2114 case X86EMUL_MODE_PROT32:
2115 case X86EMUL_MODE_PROT64:
2117 /* iret from protected mode unimplemented yet */
2118 return X86EMUL_UNHANDLEABLE;
2122 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2126 struct desc_struct new_desc;
2127 u8 cpl = ctxt->ops->cpl(ctxt);
2129 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2131 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2132 X86_TRANSFER_CALL_JMP,
2134 if (rc != X86EMUL_CONTINUE)
2137 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2138 /* Error handling is not implemented. */
2139 if (rc != X86EMUL_CONTINUE)
2140 return X86EMUL_UNHANDLEABLE;
2145 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2147 return assign_eip_near(ctxt, ctxt->src.val);
2150 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2155 old_eip = ctxt->_eip;
2156 rc = assign_eip_near(ctxt, ctxt->src.val);
2157 if (rc != X86EMUL_CONTINUE)
2159 ctxt->src.val = old_eip;
2164 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2166 u64 old = ctxt->dst.orig_val64;
2168 if (ctxt->dst.bytes == 16)
2169 return X86EMUL_UNHANDLEABLE;
2171 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2172 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2173 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2174 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2175 ctxt->eflags &= ~X86_EFLAGS_ZF;
2177 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2178 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2180 ctxt->eflags |= X86_EFLAGS_ZF;
2182 return X86EMUL_CONTINUE;
2185 static int em_ret(struct x86_emulate_ctxt *ctxt)
2190 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2191 if (rc != X86EMUL_CONTINUE)
2194 return assign_eip_near(ctxt, eip);
2197 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2200 unsigned long eip, cs;
2201 int cpl = ctxt->ops->cpl(ctxt);
2202 struct desc_struct new_desc;
2204 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2205 if (rc != X86EMUL_CONTINUE)
2207 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2208 if (rc != X86EMUL_CONTINUE)
2210 /* Outer-privilege level return is not implemented */
2211 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2212 return X86EMUL_UNHANDLEABLE;
2213 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2216 if (rc != X86EMUL_CONTINUE)
2218 rc = assign_eip_far(ctxt, eip, &new_desc);
2219 /* Error handling is not implemented. */
2220 if (rc != X86EMUL_CONTINUE)
2221 return X86EMUL_UNHANDLEABLE;
2226 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2230 rc = em_ret_far(ctxt);
2231 if (rc != X86EMUL_CONTINUE)
2233 rsp_increment(ctxt, ctxt->src.val);
2234 return X86EMUL_CONTINUE;
2237 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2239 /* Save real source value, then compare EAX against destination. */
2240 ctxt->dst.orig_val = ctxt->dst.val;
2241 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2242 ctxt->src.orig_val = ctxt->src.val;
2243 ctxt->src.val = ctxt->dst.orig_val;
2244 fastop(ctxt, em_cmp);
2246 if (ctxt->eflags & X86_EFLAGS_ZF) {
2247 /* Success: write back to memory; no update of EAX */
2248 ctxt->src.type = OP_NONE;
2249 ctxt->dst.val = ctxt->src.orig_val;
2251 /* Failure: write the value we saw to EAX. */
2252 ctxt->src.type = OP_REG;
2253 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2254 ctxt->src.val = ctxt->dst.orig_val;
2255 /* Create write-cycle to dest by writing the same value */
2256 ctxt->dst.val = ctxt->dst.orig_val;
2258 return X86EMUL_CONTINUE;
2261 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2263 int seg = ctxt->src2.val;
2267 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2269 rc = load_segment_descriptor(ctxt, sel, seg);
2270 if (rc != X86EMUL_CONTINUE)
2273 ctxt->dst.val = ctxt->src.val;
2277 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2279 #ifdef CONFIG_X86_64
2280 return ctxt->ops->guest_has_long_mode(ctxt);
2286 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2288 desc->g = (flags >> 23) & 1;
2289 desc->d = (flags >> 22) & 1;
2290 desc->l = (flags >> 21) & 1;
2291 desc->avl = (flags >> 20) & 1;
2292 desc->p = (flags >> 15) & 1;
2293 desc->dpl = (flags >> 13) & 3;
2294 desc->s = (flags >> 12) & 1;
2295 desc->type = (flags >> 8) & 15;
2298 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
2301 struct desc_struct desc;
2305 selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
2308 offset = 0x7f84 + n * 12;
2310 offset = 0x7f2c + (n - 3) * 12;
2312 set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
2313 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
2314 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
2315 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2316 return X86EMUL_CONTINUE;
2319 #ifdef CONFIG_X86_64
2320 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
2323 struct desc_struct desc;
2328 offset = 0x7e00 + n * 16;
2330 selector = GET_SMSTATE(u16, smstate, offset);
2331 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
2332 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
2333 set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
2334 base3 = GET_SMSTATE(u32, smstate, offset + 12);
2336 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2337 return X86EMUL_CONTINUE;
2341 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2342 u64 cr0, u64 cr3, u64 cr4)
2347 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
2349 if (cr4 & X86_CR4_PCIDE) {
2354 bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2356 return X86EMUL_UNHANDLEABLE;
2359 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2360 * Then enable protected mode. However, PCID cannot be enabled
2361 * if EFER.LMA=0, so set it separately.
2363 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2365 return X86EMUL_UNHANDLEABLE;
2367 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2369 return X86EMUL_UNHANDLEABLE;
2371 if (cr4 & X86_CR4_PCIDE) {
2372 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2374 return X86EMUL_UNHANDLEABLE;
2376 bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2378 return X86EMUL_UNHANDLEABLE;
2383 return X86EMUL_CONTINUE;
2386 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
2387 const char *smstate)
2389 struct desc_struct desc;
2392 u32 val, cr0, cr3, cr4;
2395 cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
2396 cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
2397 ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
2398 ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
2400 for (i = 0; i < 8; i++)
2401 *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
2403 val = GET_SMSTATE(u32, smstate, 0x7fcc);
2405 if (ctxt->ops->set_dr(ctxt, 6, val))
2406 return X86EMUL_UNHANDLEABLE;
2408 val = GET_SMSTATE(u32, smstate, 0x7fc8);
2410 if (ctxt->ops->set_dr(ctxt, 7, val))
2411 return X86EMUL_UNHANDLEABLE;
2413 selector = GET_SMSTATE(u32, smstate, 0x7fc4);
2414 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
2415 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
2416 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
2417 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2419 selector = GET_SMSTATE(u32, smstate, 0x7fc0);
2420 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
2421 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
2422 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
2423 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2425 dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
2426 dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
2427 ctxt->ops->set_gdt(ctxt, &dt);
2429 dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
2430 dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
2431 ctxt->ops->set_idt(ctxt, &dt);
2433 for (i = 0; i < 6; i++) {
2434 int r = rsm_load_seg_32(ctxt, smstate, i);
2435 if (r != X86EMUL_CONTINUE)
2439 cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
2441 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
2443 return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2446 #ifdef CONFIG_X86_64
2447 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
2448 const char *smstate)
2450 struct desc_struct desc;
2452 u64 val, cr0, cr3, cr4;
2457 for (i = 0; i < 16; i++)
2458 *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
2460 ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
2461 ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
2463 val = GET_SMSTATE(u64, smstate, 0x7f68);
2465 if (ctxt->ops->set_dr(ctxt, 6, val))
2466 return X86EMUL_UNHANDLEABLE;
2468 val = GET_SMSTATE(u64, smstate, 0x7f60);
2470 if (ctxt->ops->set_dr(ctxt, 7, val))
2471 return X86EMUL_UNHANDLEABLE;
2473 cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
2474 cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
2475 cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
2476 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
2477 val = GET_SMSTATE(u64, smstate, 0x7ed0);
2479 if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
2480 return X86EMUL_UNHANDLEABLE;
2482 selector = GET_SMSTATE(u32, smstate, 0x7e90);
2483 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
2484 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
2485 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
2486 base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
2487 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2489 dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
2490 dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
2491 ctxt->ops->set_idt(ctxt, &dt);
2493 selector = GET_SMSTATE(u32, smstate, 0x7e70);
2494 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
2495 set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
2496 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
2497 base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
2498 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2500 dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
2501 dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
2502 ctxt->ops->set_gdt(ctxt, &dt);
2504 r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2505 if (r != X86EMUL_CONTINUE)
2508 for (i = 0; i < 6; i++) {
2509 r = rsm_load_seg_64(ctxt, smstate, i);
2510 if (r != X86EMUL_CONTINUE)
2514 return X86EMUL_CONTINUE;
2518 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2520 unsigned long cr0, cr4, efer;
2525 if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
2526 return emulate_ud(ctxt);
2528 smbase = ctxt->ops->get_smbase(ctxt);
2530 ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
2531 if (ret != X86EMUL_CONTINUE)
2532 return X86EMUL_UNHANDLEABLE;
2534 if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2535 ctxt->ops->set_nmi_mask(ctxt, false);
2537 ctxt->ops->exiting_smm(ctxt);
2540 * Get back to real mode, to prepare a safe state in which to load
2541 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2542 * supports long mode.
2544 if (emulator_has_longmode(ctxt)) {
2545 struct desc_struct cs_desc;
2547 /* Zero CR4.PCIDE before CR0.PG. */
2548 cr4 = ctxt->ops->get_cr(ctxt, 4);
2549 if (cr4 & X86_CR4_PCIDE)
2550 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2552 /* A 32-bit code segment is required to clear EFER.LMA. */
2553 memset(&cs_desc, 0, sizeof(cs_desc));
2555 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2556 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2559 /* For the 64-bit case, this will clear EFER.LMA. */
2560 cr0 = ctxt->ops->get_cr(ctxt, 0);
2561 if (cr0 & X86_CR0_PE)
2562 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2564 if (emulator_has_longmode(ctxt)) {
2565 /* Clear CR4.PAE before clearing EFER.LME. */
2566 cr4 = ctxt->ops->get_cr(ctxt, 4);
2567 if (cr4 & X86_CR4_PAE)
2568 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2570 /* And finally go back to 32-bit mode. */
2572 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2576 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
2577 * state (e.g. enter guest mode) before loading state from the SMM
2580 if (ctxt->ops->leave_smm(ctxt, buf))
2581 goto emulate_shutdown;
2583 #ifdef CONFIG_X86_64
2584 if (emulator_has_longmode(ctxt))
2585 ret = rsm_load_state_64(ctxt, buf);
2588 ret = rsm_load_state_32(ctxt, buf);
2590 if (ret != X86EMUL_CONTINUE)
2591 goto emulate_shutdown;
2594 * Note, the ctxt->ops callbacks are responsible for handling side
2595 * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
2596 * runtime updates, etc... If that changes, e.g. this flow is moved
2597 * out of the emulator to make it look more like enter_smm(), then
2598 * those side effects need to be explicitly handled for both success
2601 return X86EMUL_CONTINUE;
2604 ctxt->ops->triple_fault(ctxt);
2605 return X86EMUL_CONTINUE;
2609 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2610 struct desc_struct *cs, struct desc_struct *ss)
2612 cs->l = 0; /* will be adjusted later */
2613 set_desc_base(cs, 0); /* flat segment */
2614 cs->g = 1; /* 4kb granularity */
2615 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2616 cs->type = 0x0b; /* Read, Execute, Accessed */
2618 cs->dpl = 0; /* will be adjusted later */
2623 set_desc_base(ss, 0); /* flat segment */
2624 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2625 ss->g = 1; /* 4kb granularity */
2627 ss->type = 0x03; /* Read/Write, Accessed */
2628 ss->d = 1; /* 32bit stack segment */
2635 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2637 u32 eax, ebx, ecx, edx;
2640 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2641 return is_guest_vendor_intel(ebx, ecx, edx);
2644 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2646 const struct x86_emulate_ops *ops = ctxt->ops;
2647 u32 eax, ebx, ecx, edx;
2650 * syscall should always be enabled in longmode - so only become
2651 * vendor specific (cpuid) if other modes are active...
2653 if (ctxt->mode == X86EMUL_MODE_PROT64)
2658 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2660 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2661 * 64bit guest with a 32bit compat-app running will #UD !! While this
2662 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2663 * AMD can't behave like Intel.
2665 if (is_guest_vendor_intel(ebx, ecx, edx))
2668 if (is_guest_vendor_amd(ebx, ecx, edx) ||
2669 is_guest_vendor_hygon(ebx, ecx, edx))
2673 * default: (not Intel, not AMD, not Hygon), apply Intel's
2679 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2681 const struct x86_emulate_ops *ops = ctxt->ops;
2682 struct desc_struct cs, ss;
2687 /* syscall is not available in real mode */
2688 if (ctxt->mode == X86EMUL_MODE_REAL ||
2689 ctxt->mode == X86EMUL_MODE_VM86)
2690 return emulate_ud(ctxt);
2692 if (!(em_syscall_is_enabled(ctxt)))
2693 return emulate_ud(ctxt);
2695 ops->get_msr(ctxt, MSR_EFER, &efer);
2696 if (!(efer & EFER_SCE))
2697 return emulate_ud(ctxt);
2699 setup_syscalls_segments(ctxt, &cs, &ss);
2700 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2702 cs_sel = (u16)(msr_data & 0xfffc);
2703 ss_sel = (u16)(msr_data + 8);
2705 if (efer & EFER_LMA) {
2709 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2710 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2712 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2713 if (efer & EFER_LMA) {
2714 #ifdef CONFIG_X86_64
2715 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2718 ctxt->mode == X86EMUL_MODE_PROT64 ?
2719 MSR_LSTAR : MSR_CSTAR, &msr_data);
2720 ctxt->_eip = msr_data;
2722 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2723 ctxt->eflags &= ~msr_data;
2724 ctxt->eflags |= X86_EFLAGS_FIXED;
2728 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2729 ctxt->_eip = (u32)msr_data;
2731 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2734 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2735 return X86EMUL_CONTINUE;
2738 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2740 const struct x86_emulate_ops *ops = ctxt->ops;
2741 struct desc_struct cs, ss;
2746 ops->get_msr(ctxt, MSR_EFER, &efer);
2747 /* inject #GP if in real mode */
2748 if (ctxt->mode == X86EMUL_MODE_REAL)
2749 return emulate_gp(ctxt, 0);
2752 * Not recognized on AMD in compat mode (but is recognized in legacy
2755 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2756 && !vendor_intel(ctxt))
2757 return emulate_ud(ctxt);
2759 /* sysenter/sysexit have not been tested in 64bit mode. */
2760 if (ctxt->mode == X86EMUL_MODE_PROT64)
2761 return X86EMUL_UNHANDLEABLE;
2763 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2764 if ((msr_data & 0xfffc) == 0x0)
2765 return emulate_gp(ctxt, 0);
2767 setup_syscalls_segments(ctxt, &cs, &ss);
2768 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2769 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2770 ss_sel = cs_sel + 8;
2771 if (efer & EFER_LMA) {
2776 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2777 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2779 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2780 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2782 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2783 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2785 if (efer & EFER_LMA)
2786 ctxt->mode = X86EMUL_MODE_PROT64;
2788 return X86EMUL_CONTINUE;
2791 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2793 const struct x86_emulate_ops *ops = ctxt->ops;
2794 struct desc_struct cs, ss;
2795 u64 msr_data, rcx, rdx;
2797 u16 cs_sel = 0, ss_sel = 0;
2799 /* inject #GP if in real mode or Virtual 8086 mode */
2800 if (ctxt->mode == X86EMUL_MODE_REAL ||
2801 ctxt->mode == X86EMUL_MODE_VM86)
2802 return emulate_gp(ctxt, 0);
2804 setup_syscalls_segments(ctxt, &cs, &ss);
2806 if ((ctxt->rex_prefix & 0x8) != 0x0)
2807 usermode = X86EMUL_MODE_PROT64;
2809 usermode = X86EMUL_MODE_PROT32;
2811 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2812 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2816 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2818 case X86EMUL_MODE_PROT32:
2819 cs_sel = (u16)(msr_data + 16);
2820 if ((msr_data & 0xfffc) == 0x0)
2821 return emulate_gp(ctxt, 0);
2822 ss_sel = (u16)(msr_data + 24);
2826 case X86EMUL_MODE_PROT64:
2827 cs_sel = (u16)(msr_data + 32);
2828 if (msr_data == 0x0)
2829 return emulate_gp(ctxt, 0);
2830 ss_sel = cs_sel + 8;
2833 if (emul_is_noncanonical_address(rcx, ctxt) ||
2834 emul_is_noncanonical_address(rdx, ctxt))
2835 return emulate_gp(ctxt, 0);
2838 cs_sel |= SEGMENT_RPL_MASK;
2839 ss_sel |= SEGMENT_RPL_MASK;
2841 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2842 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2845 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2847 return X86EMUL_CONTINUE;
2850 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2853 if (ctxt->mode == X86EMUL_MODE_REAL)
2855 if (ctxt->mode == X86EMUL_MODE_VM86)
2857 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2858 return ctxt->ops->cpl(ctxt) > iopl;
2861 #define VMWARE_PORT_VMPORT (0x5658)
2862 #define VMWARE_PORT_VMRPC (0x5659)
2864 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2867 const struct x86_emulate_ops *ops = ctxt->ops;
2868 struct desc_struct tr_seg;
2871 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2872 unsigned mask = (1 << len) - 1;
2876 * VMware allows access to these ports even if denied
2877 * by TSS I/O permission bitmap. Mimic behavior.
2879 if (enable_vmware_backdoor &&
2880 ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2883 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2886 if (desc_limit_scaled(&tr_seg) < 103)
2888 base = get_desc_base(&tr_seg);
2889 #ifdef CONFIG_X86_64
2890 base |= ((u64)base3) << 32;
2892 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2893 if (r != X86EMUL_CONTINUE)
2895 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2897 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2898 if (r != X86EMUL_CONTINUE)
2900 if ((perm >> bit_idx) & mask)
2905 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2911 if (emulator_bad_iopl(ctxt))
2912 if (!emulator_io_port_access_allowed(ctxt, port, len))
2915 ctxt->perm_ok = true;
2920 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2923 * Intel CPUs mask the counter and pointers in quite strange
2924 * manner when ECX is zero due to REP-string optimizations.
2926 #ifdef CONFIG_X86_64
2927 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2930 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2933 case 0xa4: /* movsb */
2934 case 0xa5: /* movsd/w */
2935 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2937 case 0xaa: /* stosb */
2938 case 0xab: /* stosd/w */
2939 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2944 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2945 struct tss_segment_16 *tss)
2947 tss->ip = ctxt->_eip;
2948 tss->flag = ctxt->eflags;
2949 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2950 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2951 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2952 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2953 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2954 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2955 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2956 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2958 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2959 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2960 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2961 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2962 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2965 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2966 struct tss_segment_16 *tss)
2971 ctxt->_eip = tss->ip;
2972 ctxt->eflags = tss->flag | 2;
2973 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2974 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2975 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2976 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2977 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2978 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2979 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2980 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2983 * SDM says that segment selectors are loaded before segment
2986 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2987 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2988 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2989 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2990 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2995 * Now load segment descriptors. If fault happens at this stage
2996 * it is handled in a context of new task
2998 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2999 X86_TRANSFER_TASK_SWITCH, NULL);
3000 if (ret != X86EMUL_CONTINUE)
3002 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3003 X86_TRANSFER_TASK_SWITCH, NULL);
3004 if (ret != X86EMUL_CONTINUE)
3006 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3007 X86_TRANSFER_TASK_SWITCH, NULL);
3008 if (ret != X86EMUL_CONTINUE)
3010 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3011 X86_TRANSFER_TASK_SWITCH, NULL);
3012 if (ret != X86EMUL_CONTINUE)
3014 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3015 X86_TRANSFER_TASK_SWITCH, NULL);
3016 if (ret != X86EMUL_CONTINUE)
3019 return X86EMUL_CONTINUE;
3022 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
3023 u16 tss_selector, u16 old_tss_sel,
3024 ulong old_tss_base, struct desc_struct *new_desc)
3026 struct tss_segment_16 tss_seg;
3028 u32 new_tss_base = get_desc_base(new_desc);
3030 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3031 if (ret != X86EMUL_CONTINUE)
3034 save_state_to_tss16(ctxt, &tss_seg);
3036 ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3037 if (ret != X86EMUL_CONTINUE)
3040 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3041 if (ret != X86EMUL_CONTINUE)
3044 if (old_tss_sel != 0xffff) {
3045 tss_seg.prev_task_link = old_tss_sel;
3047 ret = linear_write_system(ctxt, new_tss_base,
3048 &tss_seg.prev_task_link,
3049 sizeof(tss_seg.prev_task_link));
3050 if (ret != X86EMUL_CONTINUE)
3054 return load_state_from_tss16(ctxt, &tss_seg);
3057 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
3058 struct tss_segment_32 *tss)
3060 /* CR3 and ldt selector are not saved intentionally */
3061 tss->eip = ctxt->_eip;
3062 tss->eflags = ctxt->eflags;
3063 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
3064 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
3065 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
3066 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3067 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3068 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3069 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3070 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3072 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3073 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3074 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3075 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3076 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3077 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3080 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3081 struct tss_segment_32 *tss)
3086 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3087 return emulate_gp(ctxt, 0);
3088 ctxt->_eip = tss->eip;
3089 ctxt->eflags = tss->eflags | 2;
3091 /* General purpose registers */
3092 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3093 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3094 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3095 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3096 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3097 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3098 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3099 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3102 * SDM says that segment selectors are loaded before segment
3103 * descriptors. This is important because CPL checks will
3106 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3107 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3108 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3109 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3110 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3111 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3112 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3115 * If we're switching between Protected Mode and VM86, we need to make
3116 * sure to update the mode before loading the segment descriptors so
3117 * that the selectors are interpreted correctly.
3119 if (ctxt->eflags & X86_EFLAGS_VM) {
3120 ctxt->mode = X86EMUL_MODE_VM86;
3123 ctxt->mode = X86EMUL_MODE_PROT32;
3128 * Now load segment descriptors. If fault happens at this stage
3129 * it is handled in a context of new task
3131 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3132 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3133 if (ret != X86EMUL_CONTINUE)
3135 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3136 X86_TRANSFER_TASK_SWITCH, NULL);
3137 if (ret != X86EMUL_CONTINUE)
3139 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3140 X86_TRANSFER_TASK_SWITCH, NULL);
3141 if (ret != X86EMUL_CONTINUE)
3143 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3144 X86_TRANSFER_TASK_SWITCH, NULL);
3145 if (ret != X86EMUL_CONTINUE)
3147 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3148 X86_TRANSFER_TASK_SWITCH, NULL);
3149 if (ret != X86EMUL_CONTINUE)
3151 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3152 X86_TRANSFER_TASK_SWITCH, NULL);
3153 if (ret != X86EMUL_CONTINUE)
3155 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3156 X86_TRANSFER_TASK_SWITCH, NULL);
3161 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3162 u16 tss_selector, u16 old_tss_sel,
3163 ulong old_tss_base, struct desc_struct *new_desc)
3165 struct tss_segment_32 tss_seg;
3167 u32 new_tss_base = get_desc_base(new_desc);
3168 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3169 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3171 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3172 if (ret != X86EMUL_CONTINUE)
3175 save_state_to_tss32(ctxt, &tss_seg);
3177 /* Only GP registers and segment selectors are saved */
3178 ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3179 ldt_sel_offset - eip_offset);
3180 if (ret != X86EMUL_CONTINUE)
3183 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3184 if (ret != X86EMUL_CONTINUE)
3187 if (old_tss_sel != 0xffff) {
3188 tss_seg.prev_task_link = old_tss_sel;
3190 ret = linear_write_system(ctxt, new_tss_base,
3191 &tss_seg.prev_task_link,
3192 sizeof(tss_seg.prev_task_link));
3193 if (ret != X86EMUL_CONTINUE)
3197 return load_state_from_tss32(ctxt, &tss_seg);
3200 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3201 u16 tss_selector, int idt_index, int reason,
3202 bool has_error_code, u32 error_code)
3204 const struct x86_emulate_ops *ops = ctxt->ops;
3205 struct desc_struct curr_tss_desc, next_tss_desc;
3207 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3208 ulong old_tss_base =
3209 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3211 ulong desc_addr, dr7;
3213 /* FIXME: old_tss_base == ~0 ? */
3215 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3216 if (ret != X86EMUL_CONTINUE)
3218 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3219 if (ret != X86EMUL_CONTINUE)
3222 /* FIXME: check that next_tss_desc is tss */
3225 * Check privileges. The three cases are task switch caused by...
3227 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3228 * 2. Exception/IRQ/iret: No check is performed
3229 * 3. jmp/call to TSS/task-gate: No check is performed since the
3230 * hardware checks it before exiting.
3232 if (reason == TASK_SWITCH_GATE) {
3233 if (idt_index != -1) {
3234 /* Software interrupts */
3235 struct desc_struct task_gate_desc;
3238 ret = read_interrupt_descriptor(ctxt, idt_index,
3240 if (ret != X86EMUL_CONTINUE)
3243 dpl = task_gate_desc.dpl;
3244 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3245 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3249 desc_limit = desc_limit_scaled(&next_tss_desc);
3250 if (!next_tss_desc.p ||
3251 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3252 desc_limit < 0x2b)) {
3253 return emulate_ts(ctxt, tss_selector & 0xfffc);
3256 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3257 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3258 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3261 if (reason == TASK_SWITCH_IRET)
3262 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3264 /* set back link to prev task only if NT bit is set in eflags
3265 note that old_tss_sel is not used after this point */
3266 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3267 old_tss_sel = 0xffff;
3269 if (next_tss_desc.type & 8)
3270 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3271 old_tss_base, &next_tss_desc);
3273 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3274 old_tss_base, &next_tss_desc);
3275 if (ret != X86EMUL_CONTINUE)
3278 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3279 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3281 if (reason != TASK_SWITCH_IRET) {
3282 next_tss_desc.type |= (1 << 1); /* set busy flag */
3283 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3286 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3287 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3289 if (has_error_code) {
3290 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3291 ctxt->lock_prefix = 0;
3292 ctxt->src.val = (unsigned long) error_code;
3293 ret = em_push(ctxt);
3296 ops->get_dr(ctxt, 7, &dr7);
3297 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3302 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3303 u16 tss_selector, int idt_index, int reason,
3304 bool has_error_code, u32 error_code)
3308 invalidate_registers(ctxt);
3309 ctxt->_eip = ctxt->eip;
3310 ctxt->dst.type = OP_NONE;
3312 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3313 has_error_code, error_code);
3315 if (rc == X86EMUL_CONTINUE) {
3316 ctxt->eip = ctxt->_eip;
3317 writeback_registers(ctxt);
3320 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3323 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3326 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3328 register_address_increment(ctxt, reg, df * op->bytes);
3329 op->addr.mem.ea = register_address(ctxt, reg);
3332 static int em_das(struct x86_emulate_ctxt *ctxt)
3335 bool af, cf, old_cf;
3337 cf = ctxt->eflags & X86_EFLAGS_CF;
3343 af = ctxt->eflags & X86_EFLAGS_AF;
3344 if ((al & 0x0f) > 9 || af) {
3346 cf = old_cf | (al >= 250);
3351 if (old_al > 0x99 || old_cf) {
3357 /* Set PF, ZF, SF */
3358 ctxt->src.type = OP_IMM;
3360 ctxt->src.bytes = 1;
3361 fastop(ctxt, em_or);
3362 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3364 ctxt->eflags |= X86_EFLAGS_CF;
3366 ctxt->eflags |= X86_EFLAGS_AF;
3367 return X86EMUL_CONTINUE;
3370 static int em_aam(struct x86_emulate_ctxt *ctxt)
3374 if (ctxt->src.val == 0)
3375 return emulate_de(ctxt);
3377 al = ctxt->dst.val & 0xff;
3378 ah = al / ctxt->src.val;
3379 al %= ctxt->src.val;
3381 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3383 /* Set PF, ZF, SF */
3384 ctxt->src.type = OP_IMM;
3386 ctxt->src.bytes = 1;
3387 fastop(ctxt, em_or);
3389 return X86EMUL_CONTINUE;
3392 static int em_aad(struct x86_emulate_ctxt *ctxt)
3394 u8 al = ctxt->dst.val & 0xff;
3395 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3397 al = (al + (ah * ctxt->src.val)) & 0xff;
3399 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3401 /* Set PF, ZF, SF */
3402 ctxt->src.type = OP_IMM;
3404 ctxt->src.bytes = 1;
3405 fastop(ctxt, em_or);
3407 return X86EMUL_CONTINUE;
3410 static int em_call(struct x86_emulate_ctxt *ctxt)
3413 long rel = ctxt->src.val;
3415 ctxt->src.val = (unsigned long)ctxt->_eip;
3416 rc = jmp_rel(ctxt, rel);
3417 if (rc != X86EMUL_CONTINUE)
3419 return em_push(ctxt);
3422 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3427 struct desc_struct old_desc, new_desc;
3428 const struct x86_emulate_ops *ops = ctxt->ops;
3429 int cpl = ctxt->ops->cpl(ctxt);
3430 enum x86emul_mode prev_mode = ctxt->mode;
3432 old_eip = ctxt->_eip;
3433 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3435 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3436 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3437 X86_TRANSFER_CALL_JMP, &new_desc);
3438 if (rc != X86EMUL_CONTINUE)
3441 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3442 if (rc != X86EMUL_CONTINUE)
3445 ctxt->src.val = old_cs;
3447 if (rc != X86EMUL_CONTINUE)
3450 ctxt->src.val = old_eip;
3452 /* If we failed, we tainted the memory, but the very least we should
3454 if (rc != X86EMUL_CONTINUE) {
3455 pr_warn_once("faulting far call emulation tainted memory\n");
3460 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3461 ctxt->mode = prev_mode;
3466 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3471 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3472 if (rc != X86EMUL_CONTINUE)
3474 rc = assign_eip_near(ctxt, eip);
3475 if (rc != X86EMUL_CONTINUE)
3477 rsp_increment(ctxt, ctxt->src.val);
3478 return X86EMUL_CONTINUE;
3481 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3483 /* Write back the register source. */
3484 ctxt->src.val = ctxt->dst.val;
3485 write_register_operand(&ctxt->src);
3487 /* Write back the memory destination with implicit LOCK prefix. */
3488 ctxt->dst.val = ctxt->src.orig_val;
3489 ctxt->lock_prefix = 1;
3490 return X86EMUL_CONTINUE;
3493 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3495 ctxt->dst.val = ctxt->src2.val;
3496 return fastop(ctxt, em_imul);
3499 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3501 ctxt->dst.type = OP_REG;
3502 ctxt->dst.bytes = ctxt->src.bytes;
3503 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3504 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3506 return X86EMUL_CONTINUE;
3509 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3513 if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
3514 return emulate_ud(ctxt);
3515 ctxt->dst.val = tsc_aux;
3516 return X86EMUL_CONTINUE;
3519 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3523 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3524 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3525 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3526 return X86EMUL_CONTINUE;
3529 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3533 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3534 return emulate_gp(ctxt, 0);
3535 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3536 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3537 return X86EMUL_CONTINUE;
3540 static int em_mov(struct x86_emulate_ctxt *ctxt)
3542 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3543 return X86EMUL_CONTINUE;
3546 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3550 if (!ctxt->ops->guest_has_movbe(ctxt))
3551 return emulate_ud(ctxt);
3553 switch (ctxt->op_bytes) {
3556 * From MOVBE definition: "...When the operand size is 16 bits,
3557 * the upper word of the destination register remains unchanged
3560 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3561 * rules so we have to do the operation almost per hand.
3563 tmp = (u16)ctxt->src.val;
3564 ctxt->dst.val &= ~0xffffUL;
3565 ctxt->dst.val |= (unsigned long)swab16(tmp);
3568 ctxt->dst.val = swab32((u32)ctxt->src.val);
3571 ctxt->dst.val = swab64(ctxt->src.val);
3576 return X86EMUL_CONTINUE;
3579 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3581 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3582 return emulate_gp(ctxt, 0);
3584 /* Disable writeback. */
3585 ctxt->dst.type = OP_NONE;
3586 return X86EMUL_CONTINUE;
3589 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3593 if (ctxt->mode == X86EMUL_MODE_PROT64)
3594 val = ctxt->src.val & ~0ULL;
3596 val = ctxt->src.val & ~0U;
3598 /* #UD condition is already handled. */
3599 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3600 return emulate_gp(ctxt, 0);
3602 /* Disable writeback. */
3603 ctxt->dst.type = OP_NONE;
3604 return X86EMUL_CONTINUE;
3607 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3609 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3613 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3614 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3615 r = ctxt->ops->set_msr(ctxt, msr_index, msr_data);
3617 if (r == X86EMUL_IO_NEEDED)
3621 return emulate_gp(ctxt, 0);
3623 return r < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
3626 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3628 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3632 r = ctxt->ops->get_msr(ctxt, msr_index, &msr_data);
3634 if (r == X86EMUL_IO_NEEDED)
3638 return emulate_gp(ctxt, 0);
3640 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3641 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3642 return X86EMUL_CONTINUE;
3645 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3647 if (segment > VCPU_SREG_GS &&
3648 (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3649 ctxt->ops->cpl(ctxt) > 0)
3650 return emulate_gp(ctxt, 0);
3652 ctxt->dst.val = get_segment_selector(ctxt, segment);
3653 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3654 ctxt->dst.bytes = 2;
3655 return X86EMUL_CONTINUE;
3658 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3660 if (ctxt->modrm_reg > VCPU_SREG_GS)
3661 return emulate_ud(ctxt);
3663 return em_store_sreg(ctxt, ctxt->modrm_reg);
3666 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3668 u16 sel = ctxt->src.val;
3670 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3671 return emulate_ud(ctxt);
3673 if (ctxt->modrm_reg == VCPU_SREG_SS)
3674 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3676 /* Disable writeback. */
3677 ctxt->dst.type = OP_NONE;
3678 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3681 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3683 return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3686 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3688 u16 sel = ctxt->src.val;
3690 /* Disable writeback. */
3691 ctxt->dst.type = OP_NONE;
3692 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3695 static int em_str(struct x86_emulate_ctxt *ctxt)
3697 return em_store_sreg(ctxt, VCPU_SREG_TR);
3700 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3702 u16 sel = ctxt->src.val;
3704 /* Disable writeback. */
3705 ctxt->dst.type = OP_NONE;
3706 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3709 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3714 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3715 if (rc == X86EMUL_CONTINUE)
3716 ctxt->ops->invlpg(ctxt, linear);
3717 /* Disable writeback. */
3718 ctxt->dst.type = OP_NONE;
3719 return X86EMUL_CONTINUE;
3722 static int em_clts(struct x86_emulate_ctxt *ctxt)
3726 cr0 = ctxt->ops->get_cr(ctxt, 0);
3728 ctxt->ops->set_cr(ctxt, 0, cr0);
3729 return X86EMUL_CONTINUE;
3732 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3734 int rc = ctxt->ops->fix_hypercall(ctxt);
3736 if (rc != X86EMUL_CONTINUE)
3739 /* Let the processor re-execute the fixed hypercall */
3740 ctxt->_eip = ctxt->eip;
3741 /* Disable writeback. */
3742 ctxt->dst.type = OP_NONE;
3743 return X86EMUL_CONTINUE;
3746 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3747 void (*get)(struct x86_emulate_ctxt *ctxt,
3748 struct desc_ptr *ptr))
3750 struct desc_ptr desc_ptr;
3752 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3753 ctxt->ops->cpl(ctxt) > 0)
3754 return emulate_gp(ctxt, 0);
3756 if (ctxt->mode == X86EMUL_MODE_PROT64)
3758 get(ctxt, &desc_ptr);
3759 if (ctxt->op_bytes == 2) {
3761 desc_ptr.address &= 0x00ffffff;
3763 /* Disable writeback. */
3764 ctxt->dst.type = OP_NONE;
3765 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3766 &desc_ptr, 2 + ctxt->op_bytes);
3769 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3771 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3774 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3776 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3779 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3781 struct desc_ptr desc_ptr;
3784 if (ctxt->mode == X86EMUL_MODE_PROT64)
3786 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3787 &desc_ptr.size, &desc_ptr.address,
3789 if (rc != X86EMUL_CONTINUE)
3791 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3792 emul_is_noncanonical_address(desc_ptr.address, ctxt))
3793 return emulate_gp(ctxt, 0);
3795 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3797 ctxt->ops->set_idt(ctxt, &desc_ptr);
3798 /* Disable writeback. */
3799 ctxt->dst.type = OP_NONE;
3800 return X86EMUL_CONTINUE;
3803 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3805 return em_lgdt_lidt(ctxt, true);
3808 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3810 return em_lgdt_lidt(ctxt, false);
3813 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3815 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3816 ctxt->ops->cpl(ctxt) > 0)
3817 return emulate_gp(ctxt, 0);
3819 if (ctxt->dst.type == OP_MEM)
3820 ctxt->dst.bytes = 2;
3821 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3822 return X86EMUL_CONTINUE;
3825 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3827 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3828 | (ctxt->src.val & 0x0f));
3829 ctxt->dst.type = OP_NONE;
3830 return X86EMUL_CONTINUE;
3833 static int em_loop(struct x86_emulate_ctxt *ctxt)
3835 int rc = X86EMUL_CONTINUE;
3837 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3838 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3839 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3840 rc = jmp_rel(ctxt, ctxt->src.val);
3845 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3847 int rc = X86EMUL_CONTINUE;
3849 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3850 rc = jmp_rel(ctxt, ctxt->src.val);
3855 static int em_in(struct x86_emulate_ctxt *ctxt)
3857 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3859 return X86EMUL_IO_NEEDED;
3861 return X86EMUL_CONTINUE;
3864 static int em_out(struct x86_emulate_ctxt *ctxt)
3866 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3868 /* Disable writeback. */
3869 ctxt->dst.type = OP_NONE;
3870 return X86EMUL_CONTINUE;
3873 static int em_cli(struct x86_emulate_ctxt *ctxt)
3875 if (emulator_bad_iopl(ctxt))
3876 return emulate_gp(ctxt, 0);
3878 ctxt->eflags &= ~X86_EFLAGS_IF;
3879 return X86EMUL_CONTINUE;
3882 static int em_sti(struct x86_emulate_ctxt *ctxt)
3884 if (emulator_bad_iopl(ctxt))
3885 return emulate_gp(ctxt, 0);
3887 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3888 ctxt->eflags |= X86_EFLAGS_IF;
3889 return X86EMUL_CONTINUE;
3892 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3894 u32 eax, ebx, ecx, edx;
3897 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3898 if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3899 ctxt->ops->cpl(ctxt)) {
3900 return emulate_gp(ctxt, 0);
3903 eax = reg_read(ctxt, VCPU_REGS_RAX);
3904 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3905 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3906 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3907 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3908 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3909 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3910 return X86EMUL_CONTINUE;
3913 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3917 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3919 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3921 ctxt->eflags &= ~0xffUL;
3922 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3923 return X86EMUL_CONTINUE;
3926 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3928 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3929 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3930 return X86EMUL_CONTINUE;
3933 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3935 switch (ctxt->op_bytes) {
3936 #ifdef CONFIG_X86_64
3938 asm("bswap %0" : "+r"(ctxt->dst.val));
3942 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3945 return X86EMUL_CONTINUE;
3948 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3950 /* emulating clflush regardless of cpuid */
3951 return X86EMUL_CONTINUE;
3954 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3956 /* emulating clflushopt regardless of cpuid */
3957 return X86EMUL_CONTINUE;
3960 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3962 ctxt->dst.val = (s32) ctxt->src.val;
3963 return X86EMUL_CONTINUE;
3966 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3968 if (!ctxt->ops->guest_has_fxsr(ctxt))
3969 return emulate_ud(ctxt);
3971 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3972 return emulate_nm(ctxt);
3975 * Don't emulate a case that should never be hit, instead of working
3976 * around a lack of fxsave64/fxrstor64 on old compilers.
3978 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3979 return X86EMUL_UNHANDLEABLE;
3981 return X86EMUL_CONTINUE;
3985 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3986 * and restore MXCSR.
3988 static size_t __fxstate_size(int nregs)
3990 return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3993 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3996 if (ctxt->mode == X86EMUL_MODE_PROT64)
3997 return __fxstate_size(16);
3999 cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
4000 return __fxstate_size(cr4_osfxsr ? 8 : 0);
4004 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
4007 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
4008 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
4010 * 3) 64-bit mode with REX.W prefix
4011 * - like (2), but XMM 8-15 are being saved and restored
4012 * 4) 64-bit mode without REX.W prefix
4013 * - like (3), but FIP and FDP are 64 bit
4015 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
4016 * desired result. (4) is not emulated.
4018 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
4019 * and FPU DS) should match.
4021 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
4023 struct fxregs_state fx_state;
4026 rc = check_fxsr(ctxt);
4027 if (rc != X86EMUL_CONTINUE)
4032 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
4036 if (rc != X86EMUL_CONTINUE)
4039 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
4040 fxstate_size(ctxt));
4044 * FXRSTOR might restore XMM registers not provided by the guest. Fill
4045 * in the host registers (via FXSAVE) instead, so they won't be modified.
4046 * (preemption has to stay disabled until FXRSTOR).
4048 * Use noinline to keep the stack for other functions called by callers small.
4050 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
4051 const size_t used_size)
4053 struct fxregs_state fx_tmp;
4056 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
4057 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
4058 __fxstate_size(16) - used_size);
4063 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4065 struct fxregs_state fx_state;
4069 rc = check_fxsr(ctxt);
4070 if (rc != X86EMUL_CONTINUE)
4073 size = fxstate_size(ctxt);
4074 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
4075 if (rc != X86EMUL_CONTINUE)
4080 if (size < __fxstate_size(16)) {
4081 rc = fxregs_fixup(&fx_state, size);
4082 if (rc != X86EMUL_CONTINUE)
4086 if (fx_state.mxcsr >> 16) {
4087 rc = emulate_gp(ctxt, 0);
4091 if (rc == X86EMUL_CONTINUE)
4092 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
4100 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
4104 eax = reg_read(ctxt, VCPU_REGS_RAX);
4105 edx = reg_read(ctxt, VCPU_REGS_RDX);
4106 ecx = reg_read(ctxt, VCPU_REGS_RCX);
4108 if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
4109 return emulate_gp(ctxt, 0);
4111 return X86EMUL_CONTINUE;
4114 static bool valid_cr(int nr)
4126 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
4128 if (!valid_cr(ctxt->modrm_reg))
4129 return emulate_ud(ctxt);
4131 return X86EMUL_CONTINUE;
4134 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
4138 ctxt->ops->get_dr(ctxt, 7, &dr7);
4140 /* Check if DR7.Global_Enable is set */
4141 return dr7 & (1 << 13);
4144 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
4146 int dr = ctxt->modrm_reg;
4150 return emulate_ud(ctxt);
4152 cr4 = ctxt->ops->get_cr(ctxt, 4);
4153 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
4154 return emulate_ud(ctxt);
4156 if (check_dr7_gd(ctxt)) {
4159 ctxt->ops->get_dr(ctxt, 6, &dr6);
4160 dr6 &= ~DR_TRAP_BITS;
4161 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
4162 ctxt->ops->set_dr(ctxt, 6, dr6);
4163 return emulate_db(ctxt);
4166 return X86EMUL_CONTINUE;
4169 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
4171 u64 new_val = ctxt->src.val64;
4172 int dr = ctxt->modrm_reg;
4174 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
4175 return emulate_gp(ctxt, 0);
4177 return check_dr_read(ctxt);
4180 static int check_svme(struct x86_emulate_ctxt *ctxt)
4184 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4186 if (!(efer & EFER_SVME))
4187 return emulate_ud(ctxt);
4189 return X86EMUL_CONTINUE;
4192 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
4194 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4196 /* Valid physical address? */
4197 if (rax & 0xffff000000000000ULL)
4198 return emulate_gp(ctxt, 0);
4200 return check_svme(ctxt);
4203 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
4205 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4207 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4208 return emulate_gp(ctxt, 0);
4210 return X86EMUL_CONTINUE;
4213 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4215 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4216 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4219 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
4220 * in Ring3 when CR4.PCE=0.
4222 if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
4223 return X86EMUL_CONTINUE;
4225 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4226 ctxt->ops->check_pmc(ctxt, rcx))
4227 return emulate_gp(ctxt, 0);
4229 return X86EMUL_CONTINUE;
4232 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4234 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4235 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4236 return emulate_gp(ctxt, 0);
4238 return X86EMUL_CONTINUE;
4241 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4243 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4244 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4245 return emulate_gp(ctxt, 0);
4247 return X86EMUL_CONTINUE;
4250 #define D(_y) { .flags = (_y) }
4251 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4252 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4253 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4254 #define N D(NotImpl)
4255 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4256 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4257 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4258 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4259 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4260 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4261 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4262 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4263 #define II(_f, _e, _i) \
4264 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4265 #define IIP(_f, _e, _i, _p) \
4266 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4267 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4268 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4270 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4271 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4272 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4273 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4274 #define I2bvIP(_f, _e, _i, _p) \
4275 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4277 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4278 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4279 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4281 static const struct opcode group7_rm0[] = {
4283 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4287 static const struct opcode group7_rm1[] = {
4288 DI(SrcNone | Priv, monitor),
4289 DI(SrcNone | Priv, mwait),
4293 static const struct opcode group7_rm2[] = {
4295 II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4299 static const struct opcode group7_rm3[] = {
4300 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4301 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4302 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4303 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4304 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4305 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4306 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4307 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4310 static const struct opcode group7_rm7[] = {
4312 DIP(SrcNone, rdtscp, check_rdtsc),
4316 static const struct opcode group1[] = {
4318 F(Lock | PageTable, em_or),
4321 F(Lock | PageTable, em_and),
4327 static const struct opcode group1A[] = {
4328 I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4331 static const struct opcode group2[] = {
4332 F(DstMem | ModRM, em_rol),
4333 F(DstMem | ModRM, em_ror),
4334 F(DstMem | ModRM, em_rcl),
4335 F(DstMem | ModRM, em_rcr),
4336 F(DstMem | ModRM, em_shl),
4337 F(DstMem | ModRM, em_shr),
4338 F(DstMem | ModRM, em_shl),
4339 F(DstMem | ModRM, em_sar),
4342 static const struct opcode group3[] = {
4343 F(DstMem | SrcImm | NoWrite, em_test),
4344 F(DstMem | SrcImm | NoWrite, em_test),
4345 F(DstMem | SrcNone | Lock, em_not),
4346 F(DstMem | SrcNone | Lock, em_neg),
4347 F(DstXacc | Src2Mem, em_mul_ex),
4348 F(DstXacc | Src2Mem, em_imul_ex),
4349 F(DstXacc | Src2Mem, em_div_ex),
4350 F(DstXacc | Src2Mem, em_idiv_ex),
4353 static const struct opcode group4[] = {
4354 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4355 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4359 static const struct opcode group5[] = {
4360 F(DstMem | SrcNone | Lock, em_inc),
4361 F(DstMem | SrcNone | Lock, em_dec),
4362 I(SrcMem | NearBranch, em_call_near_abs),
4363 I(SrcMemFAddr | ImplicitOps, em_call_far),
4364 I(SrcMem | NearBranch, em_jmp_abs),
4365 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
4366 I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4369 static const struct opcode group6[] = {
4370 II(Prot | DstMem, em_sldt, sldt),
4371 II(Prot | DstMem, em_str, str),
4372 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4373 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4377 static const struct group_dual group7 = { {
4378 II(Mov | DstMem, em_sgdt, sgdt),
4379 II(Mov | DstMem, em_sidt, sidt),
4380 II(SrcMem | Priv, em_lgdt, lgdt),
4381 II(SrcMem | Priv, em_lidt, lidt),
4382 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4383 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4384 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4390 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4391 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4395 static const struct opcode group8[] = {
4397 F(DstMem | SrcImmByte | NoWrite, em_bt),
4398 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4399 F(DstMem | SrcImmByte | Lock, em_btr),
4400 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4404 * The "memory" destination is actually always a register, since we come
4405 * from the register case of group9.
4407 static const struct gprefix pfx_0f_c7_7 = {
4408 N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4412 static const struct group_dual group9 = { {
4413 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4415 N, N, N, N, N, N, N,
4416 GP(0, &pfx_0f_c7_7),
4419 static const struct opcode group11[] = {
4420 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4424 static const struct gprefix pfx_0f_ae_7 = {
4425 I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4428 static const struct group_dual group15 = { {
4429 I(ModRM | Aligned16, em_fxsave),
4430 I(ModRM | Aligned16, em_fxrstor),
4431 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4433 N, N, N, N, N, N, N, N,
4436 static const struct gprefix pfx_0f_6f_0f_7f = {
4437 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4440 static const struct instr_dual instr_dual_0f_2b = {
4444 static const struct gprefix pfx_0f_2b = {
4445 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4448 static const struct gprefix pfx_0f_10_0f_11 = {
4449 I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4452 static const struct gprefix pfx_0f_28_0f_29 = {
4453 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4456 static const struct gprefix pfx_0f_e7 = {
4457 N, I(Sse, em_mov), N, N,
4460 static const struct escape escape_d9 = { {
4461 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4464 N, N, N, N, N, N, N, N,
4466 N, N, N, N, N, N, N, N,
4468 N, N, N, N, N, N, N, N,
4470 N, N, N, N, N, N, N, N,
4472 N, N, N, N, N, N, N, N,
4474 N, N, N, N, N, N, N, N,
4476 N, N, N, N, N, N, N, N,
4478 N, N, N, N, N, N, N, N,
4481 static const struct escape escape_db = { {
4482 N, N, N, N, N, N, N, N,
4485 N, N, N, N, N, N, N, N,
4487 N, N, N, N, N, N, N, N,
4489 N, N, N, N, N, N, N, N,
4491 N, N, N, N, N, N, N, N,
4493 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4495 N, N, N, N, N, N, N, N,
4497 N, N, N, N, N, N, N, N,
4499 N, N, N, N, N, N, N, N,
4502 static const struct escape escape_dd = { {
4503 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4506 N, N, N, N, N, N, N, N,
4508 N, N, N, N, N, N, N, N,
4510 N, N, N, N, N, N, N, N,
4512 N, N, N, N, N, N, N, N,
4514 N, N, N, N, N, N, N, N,
4516 N, N, N, N, N, N, N, N,
4518 N, N, N, N, N, N, N, N,
4520 N, N, N, N, N, N, N, N,
4523 static const struct instr_dual instr_dual_0f_c3 = {
4524 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4527 static const struct mode_dual mode_dual_63 = {
4528 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4531 static const struct opcode opcode_table[256] = {
4533 F6ALU(Lock, em_add),
4534 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4535 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4537 F6ALU(Lock | PageTable, em_or),
4538 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4541 F6ALU(Lock, em_adc),
4542 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4543 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4545 F6ALU(Lock, em_sbb),
4546 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4547 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4549 F6ALU(Lock | PageTable, em_and), N, N,
4551 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4553 F6ALU(Lock, em_xor), N, N,
4555 F6ALU(NoWrite, em_cmp), N, N,
4557 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4559 X8(I(SrcReg | Stack, em_push)),
4561 X8(I(DstReg | Stack, em_pop)),
4563 I(ImplicitOps | Stack | No64, em_pusha),
4564 I(ImplicitOps | Stack | No64, em_popa),
4565 N, MD(ModRM, &mode_dual_63),
4568 I(SrcImm | Mov | Stack, em_push),
4569 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4570 I(SrcImmByte | Mov | Stack, em_push),
4571 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4572 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4573 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4575 X16(D(SrcImmByte | NearBranch)),
4577 G(ByteOp | DstMem | SrcImm, group1),
4578 G(DstMem | SrcImm, group1),
4579 G(ByteOp | DstMem | SrcImm | No64, group1),
4580 G(DstMem | SrcImmByte, group1),
4581 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4582 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4584 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4585 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4586 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4587 D(ModRM | SrcMem | NoAccess | DstReg),
4588 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4591 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4593 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4594 I(SrcImmFAddr | No64, em_call_far), N,
4595 II(ImplicitOps | Stack, em_pushf, pushf),
4596 II(ImplicitOps | Stack, em_popf, popf),
4597 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4599 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4600 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4601 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4602 F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4604 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4605 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4606 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4607 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4609 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4611 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4613 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4614 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4615 I(ImplicitOps | NearBranch, em_ret),
4616 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4617 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4618 G(ByteOp, group11), G(0, group11),
4620 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4621 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4622 I(ImplicitOps, em_ret_far),
4623 D(ImplicitOps), DI(SrcImmByte, intn),
4624 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4626 G(Src2One | ByteOp, group2), G(Src2One, group2),
4627 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4628 I(DstAcc | SrcImmUByte | No64, em_aam),
4629 I(DstAcc | SrcImmUByte | No64, em_aad),
4630 F(DstAcc | ByteOp | No64, em_salc),
4631 I(DstAcc | SrcXLat | ByteOp, em_mov),
4633 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4635 X3(I(SrcImmByte | NearBranch, em_loop)),
4636 I(SrcImmByte | NearBranch, em_jcxz),
4637 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4638 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4640 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4641 I(SrcImmFAddr | No64, em_jmp_far),
4642 D(SrcImmByte | ImplicitOps | NearBranch),
4643 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4644 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4646 N, DI(ImplicitOps, icebp), N, N,
4647 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4648 G(ByteOp, group3), G(0, group3),
4650 D(ImplicitOps), D(ImplicitOps),
4651 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4652 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4655 static const struct opcode twobyte_table[256] = {
4657 G(0, group6), GD(0, &group7), N, N,
4658 N, I(ImplicitOps | EmulateOnUD, em_syscall),
4659 II(ImplicitOps | Priv, em_clts, clts), N,
4660 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4661 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4663 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4664 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4666 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4667 D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4668 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4669 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4670 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4671 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4673 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4674 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4675 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4677 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4680 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4681 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4682 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4685 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4686 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4687 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4688 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4689 I(ImplicitOps | EmulateOnUD, em_sysenter),
4690 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4692 N, N, N, N, N, N, N, N,
4694 X16(D(DstReg | SrcMem | ModRM)),
4696 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4701 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4706 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4708 X16(D(SrcImm | NearBranch)),
4710 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4712 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4713 II(ImplicitOps, em_cpuid, cpuid),
4714 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4715 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4716 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4718 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4719 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4720 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4721 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4722 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4723 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4725 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4726 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4727 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4728 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4729 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4730 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4734 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4735 I(DstReg | SrcMem | ModRM, em_bsf_c),
4736 I(DstReg | SrcMem | ModRM, em_bsr_c),
4737 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4739 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4740 N, ID(0, &instr_dual_0f_c3),
4741 N, N, N, GD(0, &group9),
4743 X8(I(DstReg, em_bswap)),
4745 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4747 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4748 N, N, N, N, N, N, N, N,
4750 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4753 static const struct instr_dual instr_dual_0f_38_f0 = {
4754 I(DstReg | SrcMem | Mov, em_movbe), N
4757 static const struct instr_dual instr_dual_0f_38_f1 = {
4758 I(DstMem | SrcReg | Mov, em_movbe), N
4761 static const struct gprefix three_byte_0f_38_f0 = {
4762 ID(0, &instr_dual_0f_38_f0), N, N, N
4765 static const struct gprefix three_byte_0f_38_f1 = {
4766 ID(0, &instr_dual_0f_38_f1), N, N, N
4770 * Insns below are selected by the prefix which indexed by the third opcode
4773 static const struct opcode opcode_map_0f_38[256] = {
4775 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4777 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4779 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4780 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4801 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4805 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4811 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4812 unsigned size, bool sign_extension)
4814 int rc = X86EMUL_CONTINUE;
4818 op->addr.mem.ea = ctxt->_eip;
4819 /* NB. Immediates are sign-extended as necessary. */
4820 switch (op->bytes) {
4822 op->val = insn_fetch(s8, ctxt);
4825 op->val = insn_fetch(s16, ctxt);
4828 op->val = insn_fetch(s32, ctxt);
4831 op->val = insn_fetch(s64, ctxt);
4834 if (!sign_extension) {
4835 switch (op->bytes) {
4843 op->val &= 0xffffffff;
4851 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4854 int rc = X86EMUL_CONTINUE;
4858 decode_register_operand(ctxt, op);
4861 rc = decode_imm(ctxt, op, 1, false);
4864 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4868 if (ctxt->d & BitOp)
4869 fetch_bit_operand(ctxt);
4870 op->orig_val = op->val;
4873 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4877 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4878 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4879 fetch_register_operand(op);
4880 op->orig_val = op->val;
4884 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4885 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4886 fetch_register_operand(op);
4887 op->orig_val = op->val;
4890 if (ctxt->d & ByteOp) {
4895 op->bytes = ctxt->op_bytes;
4896 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4897 fetch_register_operand(op);
4898 op->orig_val = op->val;
4902 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4904 register_address(ctxt, VCPU_REGS_RDI);
4905 op->addr.mem.seg = VCPU_SREG_ES;
4912 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4913 fetch_register_operand(op);
4918 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4921 rc = decode_imm(ctxt, op, 1, true);
4929 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4932 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4935 ctxt->memop.bytes = 1;
4936 if (ctxt->memop.type == OP_REG) {
4937 ctxt->memop.addr.reg = decode_register(ctxt,
4938 ctxt->modrm_rm, true);
4939 fetch_register_operand(&ctxt->memop);
4943 ctxt->memop.bytes = 2;
4946 ctxt->memop.bytes = 4;
4949 rc = decode_imm(ctxt, op, 2, false);
4952 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4956 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4958 register_address(ctxt, VCPU_REGS_RSI);
4959 op->addr.mem.seg = ctxt->seg_override;
4965 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4968 reg_read(ctxt, VCPU_REGS_RBX) +
4969 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4970 op->addr.mem.seg = ctxt->seg_override;
4975 op->addr.mem.ea = ctxt->_eip;
4976 op->bytes = ctxt->op_bytes + 2;
4977 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4980 ctxt->memop.bytes = ctxt->op_bytes + 2;
4984 op->val = VCPU_SREG_ES;
4988 op->val = VCPU_SREG_CS;
4992 op->val = VCPU_SREG_SS;
4996 op->val = VCPU_SREG_DS;
5000 op->val = VCPU_SREG_FS;
5004 op->val = VCPU_SREG_GS;
5007 /* Special instructions do their own operand decoding. */
5009 op->type = OP_NONE; /* Disable writeback. */
5017 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
5019 int rc = X86EMUL_CONTINUE;
5020 int mode = ctxt->mode;
5021 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
5022 bool op_prefix = false;
5023 bool has_seg_override = false;
5024 struct opcode opcode;
5026 struct desc_struct desc;
5028 ctxt->memop.type = OP_NONE;
5029 ctxt->memopp = NULL;
5030 ctxt->_eip = ctxt->eip;
5031 ctxt->fetch.ptr = ctxt->fetch.data;
5032 ctxt->fetch.end = ctxt->fetch.data + insn_len;
5033 ctxt->opcode_len = 1;
5034 ctxt->intercept = x86_intercept_none;
5036 memcpy(ctxt->fetch.data, insn, insn_len);
5038 rc = __do_insn_fetch_bytes(ctxt, 1);
5039 if (rc != X86EMUL_CONTINUE)
5044 case X86EMUL_MODE_REAL:
5045 case X86EMUL_MODE_VM86:
5046 def_op_bytes = def_ad_bytes = 2;
5047 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
5049 def_op_bytes = def_ad_bytes = 4;
5051 case X86EMUL_MODE_PROT16:
5052 def_op_bytes = def_ad_bytes = 2;
5054 case X86EMUL_MODE_PROT32:
5055 def_op_bytes = def_ad_bytes = 4;
5057 #ifdef CONFIG_X86_64
5058 case X86EMUL_MODE_PROT64:
5064 return EMULATION_FAILED;
5067 ctxt->op_bytes = def_op_bytes;
5068 ctxt->ad_bytes = def_ad_bytes;
5070 /* Legacy prefixes. */
5072 switch (ctxt->b = insn_fetch(u8, ctxt)) {
5073 case 0x66: /* operand-size override */
5075 /* switch between 2/4 bytes */
5076 ctxt->op_bytes = def_op_bytes ^ 6;
5078 case 0x67: /* address-size override */
5079 if (mode == X86EMUL_MODE_PROT64)
5080 /* switch between 4/8 bytes */
5081 ctxt->ad_bytes = def_ad_bytes ^ 12;
5083 /* switch between 2/4 bytes */
5084 ctxt->ad_bytes = def_ad_bytes ^ 6;
5086 case 0x26: /* ES override */
5087 has_seg_override = true;
5088 ctxt->seg_override = VCPU_SREG_ES;
5090 case 0x2e: /* CS override */
5091 has_seg_override = true;
5092 ctxt->seg_override = VCPU_SREG_CS;
5094 case 0x36: /* SS override */
5095 has_seg_override = true;
5096 ctxt->seg_override = VCPU_SREG_SS;
5098 case 0x3e: /* DS override */
5099 has_seg_override = true;
5100 ctxt->seg_override = VCPU_SREG_DS;
5102 case 0x64: /* FS override */
5103 has_seg_override = true;
5104 ctxt->seg_override = VCPU_SREG_FS;
5106 case 0x65: /* GS override */
5107 has_seg_override = true;
5108 ctxt->seg_override = VCPU_SREG_GS;
5110 case 0x40 ... 0x4f: /* REX */
5111 if (mode != X86EMUL_MODE_PROT64)
5113 ctxt->rex_prefix = ctxt->b;
5115 case 0xf0: /* LOCK */
5116 ctxt->lock_prefix = 1;
5118 case 0xf2: /* REPNE/REPNZ */
5119 case 0xf3: /* REP/REPE/REPZ */
5120 ctxt->rep_prefix = ctxt->b;
5126 /* Any legacy prefix after a REX prefix nullifies its effect. */
5128 ctxt->rex_prefix = 0;
5134 if (ctxt->rex_prefix & 8)
5135 ctxt->op_bytes = 8; /* REX.W */
5137 /* Opcode byte(s). */
5138 opcode = opcode_table[ctxt->b];
5139 /* Two-byte opcode? */
5140 if (ctxt->b == 0x0f) {
5141 ctxt->opcode_len = 2;
5142 ctxt->b = insn_fetch(u8, ctxt);
5143 opcode = twobyte_table[ctxt->b];
5145 /* 0F_38 opcode map */
5146 if (ctxt->b == 0x38) {
5147 ctxt->opcode_len = 3;
5148 ctxt->b = insn_fetch(u8, ctxt);
5149 opcode = opcode_map_0f_38[ctxt->b];
5152 ctxt->d = opcode.flags;
5154 if (ctxt->d & ModRM)
5155 ctxt->modrm = insn_fetch(u8, ctxt);
5157 /* vex-prefix instructions are not implemented */
5158 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5159 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5163 while (ctxt->d & GroupMask) {
5164 switch (ctxt->d & GroupMask) {
5166 goffset = (ctxt->modrm >> 3) & 7;
5167 opcode = opcode.u.group[goffset];
5170 goffset = (ctxt->modrm >> 3) & 7;
5171 if ((ctxt->modrm >> 6) == 3)
5172 opcode = opcode.u.gdual->mod3[goffset];
5174 opcode = opcode.u.gdual->mod012[goffset];
5177 goffset = ctxt->modrm & 7;
5178 opcode = opcode.u.group[goffset];
5181 if (ctxt->rep_prefix && op_prefix)
5182 return EMULATION_FAILED;
5183 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5184 switch (simd_prefix) {
5185 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5186 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5187 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5188 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5192 if (ctxt->modrm > 0xbf) {
5193 size_t size = ARRAY_SIZE(opcode.u.esc->high);
5194 u32 index = array_index_nospec(
5195 ctxt->modrm - 0xc0, size);
5197 opcode = opcode.u.esc->high[index];
5199 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5203 if ((ctxt->modrm >> 6) == 3)
5204 opcode = opcode.u.idual->mod3;
5206 opcode = opcode.u.idual->mod012;
5209 if (ctxt->mode == X86EMUL_MODE_PROT64)
5210 opcode = opcode.u.mdual->mode64;
5212 opcode = opcode.u.mdual->mode32;
5215 return EMULATION_FAILED;
5218 ctxt->d &= ~(u64)GroupMask;
5219 ctxt->d |= opcode.flags;
5224 return EMULATION_FAILED;
5226 ctxt->execute = opcode.u.execute;
5228 if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
5229 likely(!(ctxt->d & EmulateOnUD)))
5230 return EMULATION_FAILED;
5232 if (unlikely(ctxt->d &
5233 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5236 * These are copied unconditionally here, and checked unconditionally
5237 * in x86_emulate_insn.
5239 ctxt->check_perm = opcode.check_perm;
5240 ctxt->intercept = opcode.intercept;
5242 if (ctxt->d & NotImpl)
5243 return EMULATION_FAILED;
5245 if (mode == X86EMUL_MODE_PROT64) {
5246 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5248 else if (ctxt->d & NearBranch)
5252 if (ctxt->d & Op3264) {
5253 if (mode == X86EMUL_MODE_PROT64)
5259 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5263 ctxt->op_bytes = 16;
5264 else if (ctxt->d & Mmx)
5268 /* ModRM and SIB bytes. */
5269 if (ctxt->d & ModRM) {
5270 rc = decode_modrm(ctxt, &ctxt->memop);
5271 if (!has_seg_override) {
5272 has_seg_override = true;
5273 ctxt->seg_override = ctxt->modrm_seg;
5275 } else if (ctxt->d & MemAbs)
5276 rc = decode_abs(ctxt, &ctxt->memop);
5277 if (rc != X86EMUL_CONTINUE)
5280 if (!has_seg_override)
5281 ctxt->seg_override = VCPU_SREG_DS;
5283 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5286 * Decode and fetch the source operand: register, memory
5289 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5290 if (rc != X86EMUL_CONTINUE)
5294 * Decode and fetch the second source operand: register, memory
5297 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5298 if (rc != X86EMUL_CONTINUE)
5301 /* Decode and fetch the destination operand: register or memory. */
5302 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5304 if (ctxt->rip_relative && likely(ctxt->memopp))
5305 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5306 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5309 if (rc == X86EMUL_PROPAGATE_FAULT)
5310 ctxt->have_exception = true;
5311 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5314 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5316 return ctxt->d & PageTable;
5319 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5321 /* The second termination condition only applies for REPE
5322 * and REPNE. Test if the repeat string operation prefix is
5323 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5324 * corresponding termination condition according to:
5325 * - if REPE/REPZ and ZF = 0 then done
5326 * - if REPNE/REPNZ and ZF = 1 then done
5328 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5329 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5330 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5331 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5332 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5333 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5339 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5344 rc = asm_safe("fwait");
5347 if (unlikely(rc != X86EMUL_CONTINUE))
5348 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5350 return X86EMUL_CONTINUE;
5353 static void fetch_possible_mmx_operand(struct operand *op)
5355 if (op->type == OP_MM)
5356 kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5359 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5361 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5363 if (!(ctxt->d & ByteOp))
5364 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5366 asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5367 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5368 [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5369 : "c"(ctxt->src2.val));
5371 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5372 if (!fop) /* exception is returned in fop variable */
5373 return emulate_de(ctxt);
5374 return X86EMUL_CONTINUE;
5377 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5379 memset(&ctxt->rip_relative, 0,
5380 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5382 ctxt->io_read.pos = 0;
5383 ctxt->io_read.end = 0;
5384 ctxt->mem_read.end = 0;
5387 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5389 const struct x86_emulate_ops *ops = ctxt->ops;
5390 int rc = X86EMUL_CONTINUE;
5391 int saved_dst_type = ctxt->dst.type;
5392 unsigned emul_flags;
5394 ctxt->mem_read.pos = 0;
5396 /* LOCK prefix is allowed only with some instructions */
5397 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5398 rc = emulate_ud(ctxt);
5402 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5403 rc = emulate_ud(ctxt);
5407 emul_flags = ctxt->ops->get_hflags(ctxt);
5408 if (unlikely(ctxt->d &
5409 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5410 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5411 (ctxt->d & Undefined)) {
5412 rc = emulate_ud(ctxt);
5416 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5417 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5418 rc = emulate_ud(ctxt);
5422 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5423 rc = emulate_nm(ctxt);
5427 if (ctxt->d & Mmx) {
5428 rc = flush_pending_x87_faults(ctxt);
5429 if (rc != X86EMUL_CONTINUE)
5432 * Now that we know the fpu is exception safe, we can fetch
5435 fetch_possible_mmx_operand(&ctxt->src);
5436 fetch_possible_mmx_operand(&ctxt->src2);
5437 if (!(ctxt->d & Mov))
5438 fetch_possible_mmx_operand(&ctxt->dst);
5441 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5442 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5443 X86_ICPT_PRE_EXCEPT);
5444 if (rc != X86EMUL_CONTINUE)
5448 /* Instruction can only be executed in protected mode */
5449 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5450 rc = emulate_ud(ctxt);
5454 /* Privileged instruction can be executed only in CPL=0 */
5455 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5456 if (ctxt->d & PrivUD)
5457 rc = emulate_ud(ctxt);
5459 rc = emulate_gp(ctxt, 0);
5463 /* Do instruction specific permission checks */
5464 if (ctxt->d & CheckPerm) {
5465 rc = ctxt->check_perm(ctxt);
5466 if (rc != X86EMUL_CONTINUE)
5470 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5471 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5472 X86_ICPT_POST_EXCEPT);
5473 if (rc != X86EMUL_CONTINUE)
5477 if (ctxt->rep_prefix && (ctxt->d & String)) {
5478 /* All REP prefixes have the same first termination condition */
5479 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5480 string_registers_quirk(ctxt);
5481 ctxt->eip = ctxt->_eip;
5482 ctxt->eflags &= ~X86_EFLAGS_RF;
5488 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5489 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5490 ctxt->src.valptr, ctxt->src.bytes);
5491 if (rc != X86EMUL_CONTINUE)
5493 ctxt->src.orig_val64 = ctxt->src.val64;
5496 if (ctxt->src2.type == OP_MEM) {
5497 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5498 &ctxt->src2.val, ctxt->src2.bytes);
5499 if (rc != X86EMUL_CONTINUE)
5503 if ((ctxt->d & DstMask) == ImplicitOps)
5507 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5508 /* optimisation - avoid slow emulated read if Mov */
5509 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5510 &ctxt->dst.val, ctxt->dst.bytes);
5511 if (rc != X86EMUL_CONTINUE) {
5512 if (!(ctxt->d & NoWrite) &&
5513 rc == X86EMUL_PROPAGATE_FAULT &&
5514 ctxt->exception.vector == PF_VECTOR)
5515 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5519 /* Copy full 64-bit value for CMPXCHG8B. */
5520 ctxt->dst.orig_val64 = ctxt->dst.val64;
5524 if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5525 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5526 X86_ICPT_POST_MEMACCESS);
5527 if (rc != X86EMUL_CONTINUE)
5531 if (ctxt->rep_prefix && (ctxt->d & String))
5532 ctxt->eflags |= X86_EFLAGS_RF;
5534 ctxt->eflags &= ~X86_EFLAGS_RF;
5536 if (ctxt->execute) {
5537 if (ctxt->d & Fastop)
5538 rc = fastop(ctxt, ctxt->fop);
5540 rc = ctxt->execute(ctxt);
5541 if (rc != X86EMUL_CONTINUE)
5546 if (ctxt->opcode_len == 2)
5548 else if (ctxt->opcode_len == 3)
5549 goto threebyte_insn;
5552 case 0x70 ... 0x7f: /* jcc (short) */
5553 if (test_cc(ctxt->b, ctxt->eflags))
5554 rc = jmp_rel(ctxt, ctxt->src.val);
5556 case 0x8d: /* lea r16/r32, m */
5557 ctxt->dst.val = ctxt->src.addr.mem.ea;
5559 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5560 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5561 ctxt->dst.type = OP_NONE;
5565 case 0x98: /* cbw/cwde/cdqe */
5566 switch (ctxt->op_bytes) {
5567 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5568 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5569 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5572 case 0xcc: /* int3 */
5573 rc = emulate_int(ctxt, 3);
5575 case 0xcd: /* int n */
5576 rc = emulate_int(ctxt, ctxt->src.val);
5578 case 0xce: /* into */
5579 if (ctxt->eflags & X86_EFLAGS_OF)
5580 rc = emulate_int(ctxt, 4);
5582 case 0xe9: /* jmp rel */
5583 case 0xeb: /* jmp rel short */
5584 rc = jmp_rel(ctxt, ctxt->src.val);
5585 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5587 case 0xf4: /* hlt */
5588 ctxt->ops->halt(ctxt);
5590 case 0xf5: /* cmc */
5591 /* complement carry flag from eflags reg */
5592 ctxt->eflags ^= X86_EFLAGS_CF;
5594 case 0xf8: /* clc */
5595 ctxt->eflags &= ~X86_EFLAGS_CF;
5597 case 0xf9: /* stc */
5598 ctxt->eflags |= X86_EFLAGS_CF;
5600 case 0xfc: /* cld */
5601 ctxt->eflags &= ~X86_EFLAGS_DF;
5603 case 0xfd: /* std */
5604 ctxt->eflags |= X86_EFLAGS_DF;
5607 goto cannot_emulate;
5610 if (rc != X86EMUL_CONTINUE)
5614 if (ctxt->d & SrcWrite) {
5615 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5616 rc = writeback(ctxt, &ctxt->src);
5617 if (rc != X86EMUL_CONTINUE)
5620 if (!(ctxt->d & NoWrite)) {
5621 rc = writeback(ctxt, &ctxt->dst);
5622 if (rc != X86EMUL_CONTINUE)
5627 * restore dst type in case the decoding will be reused
5628 * (happens for string instruction )
5630 ctxt->dst.type = saved_dst_type;
5632 if ((ctxt->d & SrcMask) == SrcSI)
5633 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5635 if ((ctxt->d & DstMask) == DstDI)
5636 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5638 if (ctxt->rep_prefix && (ctxt->d & String)) {
5640 struct read_cache *r = &ctxt->io_read;
5641 if ((ctxt->d & SrcMask) == SrcSI)
5642 count = ctxt->src.count;
5644 count = ctxt->dst.count;
5645 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5647 if (!string_insn_completed(ctxt)) {
5649 * Re-enter guest when pio read ahead buffer is empty
5650 * or, if it is not used, after each 1024 iteration.
5652 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5653 (r->end == 0 || r->end != r->pos)) {
5655 * Reset read cache. Usually happens before
5656 * decode, but since instruction is restarted
5657 * we have to do it here.
5659 ctxt->mem_read.end = 0;
5660 writeback_registers(ctxt);
5661 return EMULATION_RESTART;
5663 goto done; /* skip rip writeback */
5665 ctxt->eflags &= ~X86_EFLAGS_RF;
5668 ctxt->eip = ctxt->_eip;
5669 if (ctxt->mode != X86EMUL_MODE_PROT64)
5670 ctxt->eip = (u32)ctxt->_eip;
5673 if (rc == X86EMUL_PROPAGATE_FAULT) {
5674 WARN_ON(ctxt->exception.vector > 0x1f);
5675 ctxt->have_exception = true;
5677 if (rc == X86EMUL_INTERCEPTED)
5678 return EMULATION_INTERCEPTED;
5680 if (rc == X86EMUL_CONTINUE)
5681 writeback_registers(ctxt);
5683 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5687 case 0x09: /* wbinvd */
5688 (ctxt->ops->wbinvd)(ctxt);
5690 case 0x08: /* invd */
5691 case 0x0d: /* GrpP (prefetch) */
5692 case 0x18: /* Grp16 (prefetch/nop) */
5693 case 0x1f: /* nop */
5695 case 0x20: /* mov cr, reg */
5696 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5698 case 0x21: /* mov from dr to reg */
5699 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5701 case 0x40 ... 0x4f: /* cmov */
5702 if (test_cc(ctxt->b, ctxt->eflags))
5703 ctxt->dst.val = ctxt->src.val;
5704 else if (ctxt->op_bytes != 4)
5705 ctxt->dst.type = OP_NONE; /* no writeback */
5707 case 0x80 ... 0x8f: /* jnz rel, etc*/
5708 if (test_cc(ctxt->b, ctxt->eflags))
5709 rc = jmp_rel(ctxt, ctxt->src.val);
5711 case 0x90 ... 0x9f: /* setcc r/m8 */
5712 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5714 case 0xb6 ... 0xb7: /* movzx */
5715 ctxt->dst.bytes = ctxt->op_bytes;
5716 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5717 : (u16) ctxt->src.val;
5719 case 0xbe ... 0xbf: /* movsx */
5720 ctxt->dst.bytes = ctxt->op_bytes;
5721 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5722 (s16) ctxt->src.val;
5725 goto cannot_emulate;
5730 if (rc != X86EMUL_CONTINUE)
5736 return EMULATION_FAILED;
5739 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5741 invalidate_registers(ctxt);
5744 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5746 writeback_registers(ctxt);
5749 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5751 if (ctxt->rep_prefix && (ctxt->d & String))
5754 if (ctxt->d & TwoMemOp)