2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "tcg-be-ldst.h"
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
80 /* 32 bit mode uses stack based calling convention (GCC default). */
84 static const int tcg_target_call_oarg_regs[] = {
86 #if TCG_TARGET_REG_BITS == 32
91 /* Registers used with L constraint, which are the first argument
92 registers on x86_64, and two random call clobbered registers on
94 #if TCG_TARGET_REG_BITS == 64
95 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
96 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
98 # define TCG_REG_L0 TCG_REG_EAX
99 # define TCG_REG_L1 TCG_REG_EDX
102 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
103 is available. However, the host compiler must supply <cpuid.h>, as we're
104 not going to go so far as our own inline assembly. */
105 #if TCG_TARGET_REG_BITS == 64
107 #elif defined(CONFIG_CPUID_H)
109 static bool have_cmov;
114 static uint8_t *tb_ret_addr;
116 static void patch_reloc(uint8_t *code_ptr, int type,
117 intptr_t value, intptr_t addend)
122 value -= (uintptr_t)code_ptr;
123 if (value != (int32_t)value) {
126 *(uint32_t *)code_ptr = value;
129 value -= (uintptr_t)code_ptr;
130 if (value != (int8_t)value) {
133 *(uint8_t *)code_ptr = value;
140 /* parse target specific constraints */
141 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148 ct->ct |= TCG_CT_REG;
149 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
152 ct->ct |= TCG_CT_REG;
153 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
156 ct->ct |= TCG_CT_REG;
157 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
160 ct->ct |= TCG_CT_REG;
161 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
164 ct->ct |= TCG_CT_REG;
165 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
168 ct->ct |= TCG_CT_REG;
169 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
172 ct->ct |= TCG_CT_REG;
173 if (TCG_TARGET_REG_BITS == 64) {
174 tcg_regset_set32(ct->u.regs, 0, 0xffff);
176 tcg_regset_set32(ct->u.regs, 0, 0xf);
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set32(ct->u.regs, 0, 0xf);
184 ct->ct |= TCG_CT_REG;
185 if (TCG_TARGET_REG_BITS == 64) {
186 tcg_regset_set32(ct->u.regs, 0, 0xffff);
188 tcg_regset_set32(ct->u.regs, 0, 0xff);
192 /* qemu_ld/st address constraint */
194 ct->ct |= TCG_CT_REG;
195 if (TCG_TARGET_REG_BITS == 64) {
196 tcg_regset_set32(ct->u.regs, 0, 0xffff);
198 tcg_regset_set32(ct->u.regs, 0, 0xff);
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
201 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
205 ct->ct |= TCG_CT_CONST_S32;
208 ct->ct |= TCG_CT_CONST_U32;
219 /* test if a constant matches the constraint */
220 static inline int tcg_target_const_match(tcg_target_long val,
221 const TCGArgConstraint *arg_ct)
224 if (ct & TCG_CT_CONST) {
227 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
230 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
236 #if TCG_TARGET_REG_BITS == 64
237 # define LOWREGMASK(x) ((x) & 7)
239 # define LOWREGMASK(x) (x)
242 #define P_EXT 0x100 /* 0x0f opcode prefix */
243 #define P_DATA16 0x200 /* 0x66 opcode prefix */
244 #if TCG_TARGET_REG_BITS == 64
245 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
246 # define P_REXW 0x800 /* Set REX.W = 1 */
247 # define P_REXB_R 0x1000 /* REG field as byte register */
248 # define P_REXB_RM 0x2000 /* R/M field as byte register */
249 # define P_GS 0x4000 /* gs segment override */
258 #define OPC_ARITH_EvIz (0x81)
259 #define OPC_ARITH_EvIb (0x83)
260 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
261 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
262 #define OPC_BSWAP (0xc8 | P_EXT)
263 #define OPC_CALL_Jz (0xe8)
264 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
265 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
266 #define OPC_DEC_r32 (0x48)
267 #define OPC_IMUL_GvEv (0xaf | P_EXT)
268 #define OPC_IMUL_GvEvIb (0x6b)
269 #define OPC_IMUL_GvEvIz (0x69)
270 #define OPC_INC_r32 (0x40)
271 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
272 #define OPC_JCC_short (0x70) /* ... plus condition code */
273 #define OPC_JMP_long (0xe9)
274 #define OPC_JMP_short (0xeb)
275 #define OPC_LEA (0x8d)
276 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
277 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
278 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
279 #define OPC_MOVB_EvIz (0xc6)
280 #define OPC_MOVL_EvIz (0xc7)
281 #define OPC_MOVL_Iv (0xb8)
282 #define OPC_MOVSBL (0xbe | P_EXT)
283 #define OPC_MOVSWL (0xbf | P_EXT)
284 #define OPC_MOVSLQ (0x63 | P_REXW)
285 #define OPC_MOVZBL (0xb6 | P_EXT)
286 #define OPC_MOVZWL (0xb7 | P_EXT)
287 #define OPC_POP_r32 (0x58)
288 #define OPC_PUSH_r32 (0x50)
289 #define OPC_PUSH_Iv (0x68)
290 #define OPC_PUSH_Ib (0x6a)
291 #define OPC_RET (0xc3)
292 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
293 #define OPC_SHIFT_1 (0xd1)
294 #define OPC_SHIFT_Ib (0xc1)
295 #define OPC_SHIFT_cl (0xd3)
296 #define OPC_TESTL (0x85)
297 #define OPC_XCHG_ax_r32 (0x90)
299 #define OPC_GRP3_Ev (0xf7)
300 #define OPC_GRP5 (0xff)
302 /* Group 1 opcode extensions for 0x80-0x83.
303 These are also used as modifiers for OPC_ARITH. */
313 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
320 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
328 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
329 #define EXT5_INC_Ev 0
330 #define EXT5_DEC_Ev 1
331 #define EXT5_CALLN_Ev 2
332 #define EXT5_JMPN_Ev 4
334 /* Condition codes to be added to OPC_JCC_{long,short}. */
353 static const uint8_t tcg_cond_to_jcc[] = {
354 [TCG_COND_EQ] = JCC_JE,
355 [TCG_COND_NE] = JCC_JNE,
356 [TCG_COND_LT] = JCC_JL,
357 [TCG_COND_GE] = JCC_JGE,
358 [TCG_COND_LE] = JCC_JLE,
359 [TCG_COND_GT] = JCC_JG,
360 [TCG_COND_LTU] = JCC_JB,
361 [TCG_COND_GEU] = JCC_JAE,
362 [TCG_COND_LEU] = JCC_JBE,
363 [TCG_COND_GTU] = JCC_JA,
366 #if TCG_TARGET_REG_BITS == 64
367 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
374 if (opc & P_DATA16) {
375 /* We should never be asking for both 16 and 64-bit operation. */
376 assert((opc & P_REXW) == 0);
379 if (opc & P_ADDR32) {
384 rex |= (opc & P_REXW) >> 8; /* REX.W */
385 rex |= (r & 8) >> 1; /* REX.R */
386 rex |= (x & 8) >> 2; /* REX.X */
387 rex |= (rm & 8) >> 3; /* REX.B */
389 /* P_REXB_{R,RM} indicates that the given register is the low byte.
390 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
391 as otherwise the encoding indicates %[abcd]h. Note that the values
392 that are ORed in merely indicate that the REX byte must be present;
393 those bits get discarded in output. */
394 rex |= opc & (r >= 4 ? P_REXB_R : 0);
395 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
398 tcg_out8(s, (uint8_t)(rex | 0x40));
407 static void tcg_out_opc(TCGContext *s, int opc)
409 if (opc & P_DATA16) {
417 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
418 the 32-bit compilation paths. This method works with all versions of gcc,
419 whereas relying on optimization may not be able to exclude them. */
420 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
423 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
425 tcg_out_opc(s, opc, r, rm, 0);
426 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
429 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
430 We handle either RM and INDEX missing with a negative value. In 64-bit
431 mode for absolute addresses, ~RM is the size of the immediate operand
432 that will follow the instruction. */
434 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
435 int index, int shift, intptr_t offset)
439 if (index < 0 && rm < 0) {
440 if (TCG_TARGET_REG_BITS == 64) {
441 /* Try for a rip-relative addressing mode. This has replaced
442 the 32-bit-mode absolute addressing encoding. */
443 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
444 intptr_t disp = offset - pc;
445 if (disp == (int32_t)disp) {
446 tcg_out_opc(s, opc, r, 0, 0);
447 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
452 /* Try for an absolute address encoding. This requires the
453 use of the MODRM+SIB encoding and is therefore larger than
454 rip-relative addressing. */
455 if (offset == (int32_t)offset) {
456 tcg_out_opc(s, opc, r, 0, 0);
457 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
458 tcg_out8(s, (4 << 3) | 5);
459 tcg_out32(s, offset);
463 /* ??? The memory isn't directly addressable. */
466 /* Absolute address. */
467 tcg_out_opc(s, opc, r, 0, 0);
468 tcg_out8(s, (r << 3) | 5);
469 tcg_out32(s, offset);
474 /* Find the length of the immediate addend. Note that the encoding
475 that would be used for (%ebp) indicates absolute addressing. */
477 mod = 0, len = 4, rm = 5;
478 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
480 } else if (offset == (int8_t)offset) {
486 /* Use a single byte MODRM format if possible. Note that the encoding
487 that would be used for %esp is the escape to the two byte form. */
488 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
489 /* Single byte MODRM format. */
490 tcg_out_opc(s, opc, r, rm, 0);
491 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
493 /* Two byte MODRM+SIB format. */
495 /* Note that the encoding that would place %esp into the index
496 field indicates no index register. In 64-bit mode, the REX.X
497 bit counts, so %r12 can be used as the index. */
501 assert(index != TCG_REG_ESP);
504 tcg_out_opc(s, opc, r, rm, index);
505 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
506 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
511 } else if (len == 4) {
512 tcg_out32(s, offset);
516 /* A simplification of the above with no index or shift. */
517 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
518 int rm, intptr_t offset)
520 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
523 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
524 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
526 /* Propagate an opcode prefix, such as P_REXW. */
527 int ext = subop & ~0x7;
530 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
533 static inline void tcg_out_mov(TCGContext *s, TCGType type,
534 TCGReg ret, TCGReg arg)
537 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
538 tcg_out_modrm(s, opc, ret, arg);
542 static void tcg_out_movi(TCGContext *s, TCGType type,
543 TCGReg ret, tcg_target_long arg)
545 tcg_target_long diff;
548 tgen_arithr(s, ARITH_XOR, ret, ret);
551 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
552 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
556 if (arg == (int32_t)arg) {
557 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
562 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
563 diff = arg - ((uintptr_t)s->code_ptr + 7);
564 if (diff == (int32_t)diff) {
565 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
566 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
571 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
575 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
577 if (val == (int8_t)val) {
578 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
580 } else if (val == (int32_t)val) {
581 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
588 static inline void tcg_out_push(TCGContext *s, int reg)
590 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
593 static inline void tcg_out_pop(TCGContext *s, int reg)
595 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
598 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
599 TCGReg arg1, intptr_t arg2)
601 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
602 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
605 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
606 TCGReg arg1, intptr_t arg2)
608 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
609 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
612 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
613 tcg_target_long ofs, tcg_target_long val)
615 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
616 tcg_out_modrm_offset(s, opc, 0, base, ofs);
620 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
622 /* Propagate an opcode prefix, such as P_DATA16. */
623 int ext = subopc & ~0x7;
627 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
629 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
634 static inline void tcg_out_bswap32(TCGContext *s, int reg)
636 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
639 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
641 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
644 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
647 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
648 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
651 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
654 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
655 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
658 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
661 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
664 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
667 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
670 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
672 /* 32-bit mov zero extends. */
673 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
676 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
678 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
681 static inline void tcg_out_bswap64(TCGContext *s, int reg)
683 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
686 static void tgen_arithi(TCGContext *s, int c, int r0,
687 tcg_target_long val, int cf)
691 if (TCG_TARGET_REG_BITS == 64) {
696 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
697 partial flags update stalls on Pentium4 and are not recommended
698 by current Intel optimization manuals. */
699 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
700 int is_inc = (c == ARITH_ADD) ^ (val < 0);
701 if (TCG_TARGET_REG_BITS == 64) {
702 /* The single-byte increment encodings are re-tasked as the
703 REX prefixes. Use the MODRM encoding. */
704 tcg_out_modrm(s, OPC_GRP5 + rexw,
705 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
707 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
712 if (c == ARITH_AND) {
713 if (TCG_TARGET_REG_BITS == 64) {
714 if (val == 0xffffffffu) {
715 tcg_out_ext32u(s, r0, r0);
718 if (val == (uint32_t)val) {
719 /* AND with no high bits set can use a 32-bit operation. */
723 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
724 tcg_out_ext8u(s, r0, r0);
727 if (val == 0xffffu) {
728 tcg_out_ext16u(s, r0, r0);
733 if (val == (int8_t)val) {
734 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
738 if (rexw == 0 || val == (int32_t)val) {
739 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
747 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
750 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
754 /* Use SMALL != 0 to force a short forward branch. */
755 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
758 TCGLabel *l = &s->labels[label_index];
761 val = l->u.value - (intptr_t)s->code_ptr;
763 if ((int8_t)val1 == val1) {
765 tcg_out8(s, OPC_JMP_short);
767 tcg_out8(s, OPC_JCC_short + opc);
775 tcg_out8(s, OPC_JMP_long);
776 tcg_out32(s, val - 5);
778 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
779 tcg_out32(s, val - 6);
784 tcg_out8(s, OPC_JMP_short);
786 tcg_out8(s, OPC_JCC_short + opc);
788 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
792 tcg_out8(s, OPC_JMP_long);
794 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
796 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
801 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
802 int const_arg2, int rexw)
807 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
809 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
812 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
816 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
817 TCGArg arg1, TCGArg arg2, int const_arg2,
818 int label_index, int small)
820 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
821 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
824 #if TCG_TARGET_REG_BITS == 64
825 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
826 TCGArg arg1, TCGArg arg2, int const_arg2,
827 int label_index, int small)
829 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
830 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
833 /* XXX: we implement it at the target level to avoid having to
834 handle cross basic blocks temporaries */
835 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
836 const int *const_args, int small)
839 label_next = gen_new_label();
842 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
844 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
848 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
850 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
854 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
856 tcg_out_jxx(s, JCC_JNE, label_next, 1);
857 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
861 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
863 tcg_out_jxx(s, JCC_JNE, label_next, 1);
864 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
868 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
870 tcg_out_jxx(s, JCC_JNE, label_next, 1);
871 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
875 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
877 tcg_out_jxx(s, JCC_JNE, label_next, 1);
878 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
882 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
884 tcg_out_jxx(s, JCC_JNE, label_next, 1);
885 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
889 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
891 tcg_out_jxx(s, JCC_JNE, label_next, 1);
892 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
896 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
898 tcg_out_jxx(s, JCC_JNE, label_next, 1);
899 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
903 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
905 tcg_out_jxx(s, JCC_JNE, label_next, 1);
906 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
912 tcg_out_label(s, label_next, s->code_ptr);
916 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
917 TCGArg arg1, TCGArg arg2, int const_arg2)
919 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
920 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
921 tcg_out_ext8u(s, dest, dest);
924 #if TCG_TARGET_REG_BITS == 64
925 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
926 TCGArg arg1, TCGArg arg2, int const_arg2)
928 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
929 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
930 tcg_out_ext8u(s, dest, dest);
933 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
934 const int *const_args)
937 int label_true, label_over;
939 memcpy(new_args, args+1, 5*sizeof(TCGArg));
941 if (args[0] == args[1] || args[0] == args[2]
942 || (!const_args[3] && args[0] == args[3])
943 || (!const_args[4] && args[0] == args[4])) {
944 /* When the destination overlaps with one of the argument
945 registers, don't do anything tricky. */
946 label_true = gen_new_label();
947 label_over = gen_new_label();
949 new_args[5] = label_true;
950 tcg_out_brcond2(s, new_args, const_args+1, 1);
952 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
953 tcg_out_jxx(s, JCC_JMP, label_over, 1);
954 tcg_out_label(s, label_true, s->code_ptr);
956 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
957 tcg_out_label(s, label_over, s->code_ptr);
959 /* When the destination does not overlap one of the arguments,
960 clear the destination first, jump if cond false, and emit an
961 increment in the true case. This results in smaller code. */
963 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
965 label_over = gen_new_label();
966 new_args[4] = tcg_invert_cond(new_args[4]);
967 new_args[5] = label_over;
968 tcg_out_brcond2(s, new_args, const_args+1, 1);
970 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
971 tcg_out_label(s, label_over, s->code_ptr);
976 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
977 TCGArg c1, TCGArg c2, int const_c2,
980 tcg_out_cmp(s, c1, c2, const_c2, 0);
982 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
984 int over = gen_new_label();
985 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
986 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
987 tcg_out_label(s, over, s->code_ptr);
991 #if TCG_TARGET_REG_BITS == 64
992 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
993 TCGArg c1, TCGArg c2, int const_c2,
996 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
997 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1001 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
1003 intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
1005 if (disp == (int32_t)disp) {
1006 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1009 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1010 tcg_out_modrm(s, OPC_GRP5,
1011 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1015 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
1017 tcg_out_branch(s, 1, dest);
1020 static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
1022 tcg_out_branch(s, 0, dest);
1025 #if defined(CONFIG_SOFTMMU)
1026 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1027 * int mmu_idx, uintptr_t ra)
1029 static const void * const qemu_ld_helpers[16] = {
1030 [MO_UB] = helper_ret_ldub_mmu,
1031 [MO_LEUW] = helper_le_lduw_mmu,
1032 [MO_LEUL] = helper_le_ldul_mmu,
1033 [MO_LEQ] = helper_le_ldq_mmu,
1034 [MO_BEUW] = helper_be_lduw_mmu,
1035 [MO_BEUL] = helper_be_ldul_mmu,
1036 [MO_BEQ] = helper_be_ldq_mmu,
1039 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1040 * uintxx_t val, int mmu_idx, uintptr_t ra)
1042 static const void * const qemu_st_helpers[16] = {
1043 [MO_UB] = helper_ret_stb_mmu,
1044 [MO_LEUW] = helper_le_stw_mmu,
1045 [MO_LEUL] = helper_le_stl_mmu,
1046 [MO_LEQ] = helper_le_stq_mmu,
1047 [MO_BEUW] = helper_be_stw_mmu,
1048 [MO_BEUL] = helper_be_stl_mmu,
1049 [MO_BEQ] = helper_be_stq_mmu,
1052 /* Perform the TLB load and compare.
1055 ADDRLO and ADDRHI contain the low and high part of the address.
1057 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1059 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1060 This should be offsetof addr_read or addr_write.
1063 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1064 positions of the displacements of forward jumps to the TLB miss case.
1066 Second argument register is loaded with the low part of the address.
1067 In the TLB hit case, it has been adjusted as indicated by the TLB
1068 and so is a host address. In the TLB miss case, it continues to
1069 hold a guest address.
1071 First argument register is clobbered. */
1073 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1074 int mem_index, TCGMemOp s_bits,
1075 uint8_t **label_ptr, int which)
1077 const TCGReg r0 = TCG_REG_L0;
1078 const TCGReg r1 = TCG_REG_L1;
1079 TCGType ttype = TCG_TYPE_I32;
1080 TCGType htype = TCG_TYPE_I32;
1081 int trexw = 0, hrexw = 0;
1083 if (TCG_TARGET_REG_BITS == 64) {
1084 if (TARGET_LONG_BITS == 64) {
1085 ttype = TCG_TYPE_I64;
1088 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1089 htype = TCG_TYPE_I64;
1094 tcg_out_mov(s, htype, r0, addrlo);
1095 tcg_out_mov(s, ttype, r1, addrlo);
1097 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
1098 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1100 tgen_arithi(s, ARITH_AND + trexw, r1,
1101 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1102 tgen_arithi(s, ARITH_AND + hrexw, r0,
1103 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1105 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1106 offsetof(CPUArchState, tlb_table[mem_index][0])
1110 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1112 /* Prepare for both the fast path add of the tlb addend, and the slow
1113 path function argument setup. There are two cases worth note:
1114 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1115 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1116 copies the entire guest address for the slow path, while truncation
1117 for the 32-bit host happens with the fastpath ADDL below. */
1118 tcg_out_mov(s, ttype, r1, addrlo);
1121 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1122 label_ptr[0] = s->code_ptr;
1125 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1126 /* cmp 4(r0), addrhi */
1127 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1130 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1131 label_ptr[1] = s->code_ptr;
1137 /* add addend(r0), r1 */
1138 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1139 offsetof(CPUTLBEntry, addend) - which);
1143 * Record the context of a call to the out of line helper code for the slow path
1144 * for a load or store, so that we can later generate the correct helper code
1146 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1147 TCGReg datalo, TCGReg datahi,
1148 TCGReg addrlo, TCGReg addrhi,
1149 int mem_index, uint8_t *raddr,
1150 uint8_t **label_ptr)
1152 TCGLabelQemuLdst *label = new_ldst_label(s);
1154 label->is_ld = is_ld;
1156 label->datalo_reg = datalo;
1157 label->datahi_reg = datahi;
1158 label->addrlo_reg = addrlo;
1159 label->addrhi_reg = addrhi;
1160 label->mem_index = mem_index;
1161 label->raddr = raddr;
1162 label->label_ptr[0] = label_ptr[0];
1163 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1164 label->label_ptr[1] = label_ptr[1];
1169 * Generate code for the slow path for a load at the end of block
1171 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1173 TCGMemOp opc = l->opc;
1175 uint8_t **label_ptr = &l->label_ptr[0];
1177 /* resolve label address */
1178 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1179 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1180 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1183 if (TCG_TARGET_REG_BITS == 32) {
1186 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1189 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1192 if (TARGET_LONG_BITS == 64) {
1193 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1197 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1200 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1202 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1203 /* The second argument is already loaded with addrlo. */
1204 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1206 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1207 (uintptr_t)l->raddr);
1210 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
1212 data_reg = l->datalo_reg;
1213 switch (opc & MO_SSIZE) {
1215 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1218 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1220 #if TCG_TARGET_REG_BITS == 64
1222 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1227 /* Note that the helpers have zero-extended to tcg_target_long. */
1229 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1232 if (TCG_TARGET_REG_BITS == 64) {
1233 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1234 } else if (data_reg == TCG_REG_EDX) {
1235 /* xchg %edx, %eax */
1236 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1237 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1239 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1240 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1247 /* Jump to the code corresponding to next IR of qemu_st */
1248 tcg_out_jmp(s, (uintptr_t)l->raddr);
1252 * Generate code for the slow path for a store at the end of block
1254 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1256 TCGMemOp opc = l->opc;
1257 TCGMemOp s_bits = opc & MO_SIZE;
1258 uint8_t **label_ptr = &l->label_ptr[0];
1261 /* resolve label address */
1262 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1263 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1264 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1267 if (TCG_TARGET_REG_BITS == 32) {
1270 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1273 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1276 if (TARGET_LONG_BITS == 64) {
1277 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1281 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1284 if (s_bits == MO_64) {
1285 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1289 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1292 retaddr = TCG_REG_EAX;
1293 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1294 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1296 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1297 /* The second argument is already loaded with addrlo. */
1298 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1299 tcg_target_call_iarg_regs[2], l->datalo_reg);
1300 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1303 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1304 retaddr = tcg_target_call_iarg_regs[4];
1305 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1307 retaddr = TCG_REG_RAX;
1308 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1309 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1313 /* "Tail call" to the helper, with the return address back inline. */
1314 tcg_out_push(s, retaddr);
1315 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
1317 #elif defined(__x86_64__) && defined(__linux__)
1318 # include <asm/prctl.h>
1319 # include <sys/prctl.h>
1321 int arch_prctl(int code, unsigned long addr);
1323 static int guest_base_flags;
1324 static inline void setup_guest_base_seg(void)
1326 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1327 guest_base_flags = P_GS;
1331 # define guest_base_flags 0
1332 static inline void setup_guest_base_seg(void) { }
1333 #endif /* SOFTMMU */
1335 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1336 TCGReg base, intptr_t ofs, int seg,
1339 const TCGMemOp bswap = memop & MO_BSWAP;
1341 switch (memop & MO_SSIZE) {
1343 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1346 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1349 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1351 tcg_out_rolw_8(s, datalo);
1356 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1357 tcg_out_rolw_8(s, datalo);
1358 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1360 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1365 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1367 tcg_out_bswap32(s, datalo);
1370 #if TCG_TARGET_REG_BITS == 64
1373 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1374 tcg_out_bswap32(s, datalo);
1375 tcg_out_ext32s(s, datalo, datalo);
1377 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1382 if (TCG_TARGET_REG_BITS == 64) {
1383 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1386 tcg_out_bswap64(s, datalo);
1394 if (base != datalo) {
1395 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1397 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1398 datahi, base, ofs + 4);
1400 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1401 datahi, base, ofs + 4);
1402 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1406 tcg_out_bswap32(s, datalo);
1407 tcg_out_bswap32(s, datahi);
1416 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1417 EAX. It will be useful once fixed registers globals are less
1419 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1421 TCGReg datalo, datahi, addrlo;
1422 TCGReg addrhi __attribute__((unused));
1424 #if defined(CONFIG_SOFTMMU)
1427 uint8_t *label_ptr[2];
1431 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1433 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1436 #if defined(CONFIG_SOFTMMU)
1437 mem_index = *args++;
1438 s_bits = opc & MO_SIZE;
1440 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1441 label_ptr, offsetof(CPUTLBEntry, addr_read));
1444 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1446 /* Record the current context of a load into ldst label */
1447 add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
1448 mem_index, s->code_ptr, label_ptr);
1451 int32_t offset = GUEST_BASE;
1452 TCGReg base = addrlo;
1455 /* ??? We assume all operations have left us with register contents
1456 that are zero extended. So far this appears to be true. If we
1457 want to enforce this, we can either do an explicit zero-extension
1458 here, or (if GUEST_BASE == 0, or a segment register is in use)
1459 use the ADDR32 prefix. For now, do nothing. */
1460 if (GUEST_BASE && guest_base_flags) {
1461 seg = guest_base_flags;
1463 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1464 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1465 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1470 tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
1475 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1476 TCGReg base, intptr_t ofs, int seg,
1479 const TCGMemOp bswap = memop & MO_BSWAP;
1481 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1482 we could perform the bswap twice to restore the original value
1483 instead of moving to the scratch. But as it is, the L constraint
1484 means that TCG_REG_L0 is definitely free here. */
1485 const TCGReg scratch = TCG_REG_L0;
1487 switch (memop & MO_SIZE) {
1489 /* In 32-bit mode, 8-byte stores can only happen from [abcd]x.
1490 Use the scratch register if necessary. */
1491 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1492 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1495 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1500 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1501 tcg_out_rolw_8(s, scratch);
1504 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1509 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1510 tcg_out_bswap32(s, scratch);
1513 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1516 if (TCG_TARGET_REG_BITS == 64) {
1518 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1519 tcg_out_bswap64(s, scratch);
1522 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1525 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1526 tcg_out_bswap32(s, scratch);
1527 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1528 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1529 tcg_out_bswap32(s, scratch);
1530 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1532 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1533 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
1541 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1543 TCGReg datalo, datahi, addrlo;
1544 TCGReg addrhi __attribute__((unused));
1546 #if defined(CONFIG_SOFTMMU)
1549 uint8_t *label_ptr[2];
1553 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1555 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1558 #if defined(CONFIG_SOFTMMU)
1559 mem_index = *args++;
1560 s_bits = opc & MO_SIZE;
1562 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1563 label_ptr, offsetof(CPUTLBEntry, addr_write));
1566 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1568 /* Record the current context of a store into ldst label */
1569 add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
1570 mem_index, s->code_ptr, label_ptr);
1573 int32_t offset = GUEST_BASE;
1574 TCGReg base = addrlo;
1577 /* ??? We assume all operations have left us with register contents
1578 that are zero extended. So far this appears to be true. If we
1579 want to enforce this, we can either do an explicit zero-extension
1580 here, or (if GUEST_BASE == 0, or a segment register is in use)
1581 use the ADDR32 prefix. For now, do nothing. */
1582 if (GUEST_BASE && guest_base_flags) {
1583 seg = guest_base_flags;
1585 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1586 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1587 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1592 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1597 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1598 const TCGArg *args, const int *const_args)
1602 #if TCG_TARGET_REG_BITS == 64
1603 # define OP_32_64(x) \
1604 case glue(glue(INDEX_op_, x), _i64): \
1605 rexw = P_REXW; /* FALLTHRU */ \
1606 case glue(glue(INDEX_op_, x), _i32)
1608 # define OP_32_64(x) \
1609 case glue(glue(INDEX_op_, x), _i32)
1613 case INDEX_op_exit_tb:
1614 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1615 tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
1617 case INDEX_op_goto_tb:
1618 if (s->tb_jmp_offset) {
1619 /* direct jump method */
1620 tcg_out8(s, OPC_JMP_long); /* jmp im */
1621 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1624 /* indirect jump method */
1625 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1626 (intptr_t)(s->tb_next + args[0]));
1628 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1631 if (const_args[0]) {
1632 tcg_out_calli(s, args[0]);
1635 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1639 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1641 case INDEX_op_movi_i32:
1642 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1645 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1646 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1649 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1652 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1653 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1656 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1658 #if TCG_TARGET_REG_BITS == 64
1659 case INDEX_op_ld32u_i64:
1661 case INDEX_op_ld_i32:
1662 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1666 if (const_args[0]) {
1667 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1668 0, args[1], args[2]);
1669 tcg_out8(s, args[0]);
1671 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1672 args[0], args[1], args[2]);
1676 if (const_args[0]) {
1677 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1678 0, args[1], args[2]);
1679 tcg_out16(s, args[0]);
1681 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1682 args[0], args[1], args[2]);
1685 #if TCG_TARGET_REG_BITS == 64
1686 case INDEX_op_st32_i64:
1688 case INDEX_op_st_i32:
1689 if (const_args[0]) {
1690 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1691 tcg_out32(s, args[0]);
1693 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1698 /* For 3-operand addition, use LEA. */
1699 if (args[0] != args[1]) {
1700 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1702 if (const_args[2]) {
1704 } else if (a0 == a2) {
1705 /* Watch out for dest = src + dest, since we've removed
1706 the matching constraint on the add. */
1707 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1711 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1729 if (const_args[2]) {
1730 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1732 tgen_arithr(s, c + rexw, args[0], args[2]);
1737 if (const_args[2]) {
1740 if (val == (int8_t)val) {
1741 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1744 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1748 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1753 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1756 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1775 if (const_args[2]) {
1776 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1778 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1782 case INDEX_op_brcond_i32:
1783 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1786 case INDEX_op_setcond_i32:
1787 tcg_out_setcond32(s, args[3], args[0], args[1],
1788 args[2], const_args[2]);
1790 case INDEX_op_movcond_i32:
1791 tcg_out_movcond32(s, args[5], args[0], args[1],
1792 args[2], const_args[2], args[3]);
1796 tcg_out_rolw_8(s, args[0]);
1799 tcg_out_bswap32(s, args[0]);
1803 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1806 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1810 tcg_out_ext8s(s, args[0], args[1], rexw);
1813 tcg_out_ext16s(s, args[0], args[1], rexw);
1816 tcg_out_ext8u(s, args[0], args[1]);
1819 tcg_out_ext16u(s, args[0], args[1]);
1822 case INDEX_op_qemu_ld_i32:
1823 tcg_out_qemu_ld(s, args, 0);
1825 case INDEX_op_qemu_ld_i64:
1826 tcg_out_qemu_ld(s, args, 1);
1828 case INDEX_op_qemu_st_i32:
1829 tcg_out_qemu_st(s, args, 0);
1831 case INDEX_op_qemu_st_i64:
1832 tcg_out_qemu_st(s, args, 1);
1836 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1839 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1842 if (const_args[4]) {
1843 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1845 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1847 if (const_args[5]) {
1848 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1850 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1854 if (const_args[4]) {
1855 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1857 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1859 if (const_args[5]) {
1860 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1862 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1866 #if TCG_TARGET_REG_BITS == 32
1867 case INDEX_op_brcond2_i32:
1868 tcg_out_brcond2(s, args, const_args, 0);
1870 case INDEX_op_setcond2_i32:
1871 tcg_out_setcond2(s, args, const_args);
1873 #else /* TCG_TARGET_REG_BITS == 64 */
1874 case INDEX_op_movi_i64:
1875 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1877 case INDEX_op_ld32s_i64:
1878 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1880 case INDEX_op_ld_i64:
1881 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1883 case INDEX_op_st_i64:
1884 if (const_args[0]) {
1885 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1886 0, args[1], args[2]);
1887 tcg_out32(s, args[0]);
1889 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1893 case INDEX_op_brcond_i64:
1894 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1897 case INDEX_op_setcond_i64:
1898 tcg_out_setcond64(s, args[3], args[0], args[1],
1899 args[2], const_args[2]);
1901 case INDEX_op_movcond_i64:
1902 tcg_out_movcond64(s, args[5], args[0], args[1],
1903 args[2], const_args[2], args[3]);
1906 case INDEX_op_bswap64_i64:
1907 tcg_out_bswap64(s, args[0]);
1909 case INDEX_op_ext32u_i64:
1910 tcg_out_ext32u(s, args[0], args[1]);
1912 case INDEX_op_ext32s_i64:
1913 tcg_out_ext32s(s, args[0], args[1]);
1918 if (args[3] == 0 && args[4] == 8) {
1919 /* load bits 0..7 */
1920 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1922 } else if (args[3] == 8 && args[4] == 8) {
1923 /* load bits 8..15 */
1924 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1925 } else if (args[3] == 0 && args[4] == 16) {
1926 /* load bits 0..15 */
1927 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1940 static const TCGTargetOpDef x86_op_defs[] = {
1941 { INDEX_op_exit_tb, { } },
1942 { INDEX_op_goto_tb, { } },
1943 { INDEX_op_call, { "ri" } },
1944 { INDEX_op_br, { } },
1945 { INDEX_op_mov_i32, { "r", "r" } },
1946 { INDEX_op_movi_i32, { "r" } },
1947 { INDEX_op_ld8u_i32, { "r", "r" } },
1948 { INDEX_op_ld8s_i32, { "r", "r" } },
1949 { INDEX_op_ld16u_i32, { "r", "r" } },
1950 { INDEX_op_ld16s_i32, { "r", "r" } },
1951 { INDEX_op_ld_i32, { "r", "r" } },
1952 { INDEX_op_st8_i32, { "qi", "r" } },
1953 { INDEX_op_st16_i32, { "ri", "r" } },
1954 { INDEX_op_st_i32, { "ri", "r" } },
1956 { INDEX_op_add_i32, { "r", "r", "ri" } },
1957 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1958 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1959 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1960 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1961 { INDEX_op_and_i32, { "r", "0", "ri" } },
1962 { INDEX_op_or_i32, { "r", "0", "ri" } },
1963 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1965 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1966 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1967 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1968 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1969 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1971 { INDEX_op_brcond_i32, { "r", "ri" } },
1973 { INDEX_op_bswap16_i32, { "r", "0" } },
1974 { INDEX_op_bswap32_i32, { "r", "0" } },
1976 { INDEX_op_neg_i32, { "r", "0" } },
1978 { INDEX_op_not_i32, { "r", "0" } },
1980 { INDEX_op_ext8s_i32, { "r", "q" } },
1981 { INDEX_op_ext16s_i32, { "r", "r" } },
1982 { INDEX_op_ext8u_i32, { "r", "q" } },
1983 { INDEX_op_ext16u_i32, { "r", "r" } },
1985 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1987 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1988 #if TCG_TARGET_HAS_movcond_i32
1989 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
1992 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1993 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
1994 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1995 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1997 #if TCG_TARGET_REG_BITS == 32
1998 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1999 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2001 { INDEX_op_mov_i64, { "r", "r" } },
2002 { INDEX_op_movi_i64, { "r" } },
2003 { INDEX_op_ld8u_i64, { "r", "r" } },
2004 { INDEX_op_ld8s_i64, { "r", "r" } },
2005 { INDEX_op_ld16u_i64, { "r", "r" } },
2006 { INDEX_op_ld16s_i64, { "r", "r" } },
2007 { INDEX_op_ld32u_i64, { "r", "r" } },
2008 { INDEX_op_ld32s_i64, { "r", "r" } },
2009 { INDEX_op_ld_i64, { "r", "r" } },
2010 { INDEX_op_st8_i64, { "ri", "r" } },
2011 { INDEX_op_st16_i64, { "ri", "r" } },
2012 { INDEX_op_st32_i64, { "ri", "r" } },
2013 { INDEX_op_st_i64, { "re", "r" } },
2015 { INDEX_op_add_i64, { "r", "r", "re" } },
2016 { INDEX_op_mul_i64, { "r", "0", "re" } },
2017 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2018 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2019 { INDEX_op_sub_i64, { "r", "0", "re" } },
2020 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2021 { INDEX_op_or_i64, { "r", "0", "re" } },
2022 { INDEX_op_xor_i64, { "r", "0", "re" } },
2024 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2025 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2026 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2027 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2028 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2030 { INDEX_op_brcond_i64, { "r", "re" } },
2031 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2033 { INDEX_op_bswap16_i64, { "r", "0" } },
2034 { INDEX_op_bswap32_i64, { "r", "0" } },
2035 { INDEX_op_bswap64_i64, { "r", "0" } },
2036 { INDEX_op_neg_i64, { "r", "0" } },
2037 { INDEX_op_not_i64, { "r", "0" } },
2039 { INDEX_op_ext8s_i64, { "r", "r" } },
2040 { INDEX_op_ext16s_i64, { "r", "r" } },
2041 { INDEX_op_ext32s_i64, { "r", "r" } },
2042 { INDEX_op_ext8u_i64, { "r", "r" } },
2043 { INDEX_op_ext16u_i64, { "r", "r" } },
2044 { INDEX_op_ext32u_i64, { "r", "r" } },
2046 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2047 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2049 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2050 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2051 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2052 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2055 #if TCG_TARGET_REG_BITS == 64
2056 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2057 { INDEX_op_qemu_st_i32, { "L", "L" } },
2058 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2059 { INDEX_op_qemu_st_i64, { "L", "L" } },
2060 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2061 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2062 { INDEX_op_qemu_st_i32, { "L", "L" } },
2063 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2064 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2066 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2067 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2068 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2069 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2074 static int tcg_target_callee_save_regs[] = {
2075 #if TCG_TARGET_REG_BITS == 64
2084 TCG_REG_R14, /* Currently used for the global env. */
2087 TCG_REG_EBP, /* Currently used for the global env. */
2094 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2095 and tcg_register_jit. */
2098 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2099 * (TCG_TARGET_REG_BITS / 8))
2101 #define FRAME_SIZE \
2103 + TCG_STATIC_CALL_ARGS_SIZE \
2104 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2105 + TCG_TARGET_STACK_ALIGN - 1) \
2106 & ~(TCG_TARGET_STACK_ALIGN - 1))
2108 /* Generate global QEMU prologue and epilogue code */
2109 static void tcg_target_qemu_prologue(TCGContext *s)
2111 int i, stack_addend;
2115 /* Reserve some stack space, also for TCG temps. */
2116 stack_addend = FRAME_SIZE - PUSH_SIZE;
2117 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2118 CPU_TEMP_BUF_NLONGS * sizeof(long));
2120 /* Save all callee saved registers. */
2121 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2122 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2125 #if TCG_TARGET_REG_BITS == 32
2126 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2127 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2128 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2130 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2131 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2134 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2135 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2137 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2141 tb_ret_addr = s->code_ptr;
2143 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2145 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2146 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2148 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2150 #if !defined(CONFIG_SOFTMMU)
2151 /* Try to set up a segment register to point to GUEST_BASE. */
2153 setup_guest_base_seg();
2158 static void tcg_target_init(TCGContext *s)
2160 /* For 32-bit, 99% certainty that we're running on hardware that supports
2161 cmov, but we still need to check. In case cmov is not available, we'll
2162 use a small forward branch. */
2165 unsigned a, b, c, d;
2166 have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2170 if (TCG_TARGET_REG_BITS == 64) {
2171 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2172 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2174 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2177 tcg_regset_clear(tcg_target_call_clobber_regs);
2178 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2179 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2180 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2181 if (TCG_TARGET_REG_BITS == 64) {
2182 #if !defined(_WIN64)
2183 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2184 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2186 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2187 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2188 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2189 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2192 tcg_regset_clear(s->reserved_regs);
2193 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2195 tcg_add_target_add_op_defs(x86_op_defs);
2200 DebugFrameFDEHeader fde;
2201 uint8_t fde_def_cfa[4];
2202 uint8_t fde_reg_ofs[14];
2205 /* We're expecting a 2 byte uleb128 encoded value. */
2206 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2208 #if !defined(__ELF__)
2209 /* Host machine without ELF. */
2210 #elif TCG_TARGET_REG_BITS == 64
2211 #define ELF_HOST_MACHINE EM_X86_64
2212 static DebugFrame debug_frame = {
2213 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2216 .cie.code_align = 1,
2217 .cie.data_align = 0x78, /* sleb128 -8 */
2218 .cie.return_column = 16,
2220 /* Total FDE size does not include the "len" member. */
2221 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2224 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2225 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2229 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2230 /* The following ordering must match tcg_target_callee_save_regs. */
2231 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2232 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2233 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2234 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2235 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2236 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2240 #define ELF_HOST_MACHINE EM_386
2241 static DebugFrame debug_frame = {
2242 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2245 .cie.code_align = 1,
2246 .cie.data_align = 0x7c, /* sleb128 -4 */
2247 .cie.return_column = 8,
2249 /* Total FDE size does not include the "len" member. */
2250 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2253 12, 4, /* DW_CFA_def_cfa %esp, ... */
2254 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2258 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2259 /* The following ordering must match tcg_target_callee_save_regs. */
2260 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2261 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2262 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2263 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2268 #if defined(ELF_HOST_MACHINE)
2269 void tcg_register_jit(void *buf, size_t buf_size)
2271 debug_frame.fde.func_start = (uintptr_t)buf;
2272 debug_frame.fde.func_len = buf_size;
2274 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));