4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "disas/disas.h"
36 #define ENABLE_ARCH_4T arm_feature(env, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5 arm_feature(env, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE arm_feature(env, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J 0
41 #define ENABLE_ARCH_6 arm_feature(env, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K arm_feature(env, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2 arm_feature(env, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7 arm_feature(env, ARM_FEATURE_V7)
46 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
48 /* internal defines */
49 typedef struct DisasContext {
52 /* Nonzero if this instruction has been conditionally skipped. */
54 /* The label that will be jumped to when the instruction is skipped. */
56 /* Thumb-2 conditional execution bits. */
59 struct TranslationBlock *tb;
60 int singlestep_enabled;
63 #if !defined(CONFIG_USER_ONLY)
71 static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
73 #if defined(CONFIG_USER_ONLY)
76 #define IS_USER(s) (s->user)
79 /* These instructions trap after executing, so defer them until after the
80 conditional execution state has been updated. */
84 static TCGv_ptr cpu_env;
85 /* We reuse the same 64-bit temporaries for efficiency. */
86 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
87 static TCGv_i32 cpu_R[16];
88 static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
89 static TCGv_i32 cpu_exclusive_addr;
90 static TCGv_i32 cpu_exclusive_val;
91 static TCGv_i32 cpu_exclusive_high;
92 #ifdef CONFIG_USER_ONLY
93 static TCGv_i32 cpu_exclusive_test;
94 static TCGv_i32 cpu_exclusive_info;
97 /* FIXME: These should be removed. */
98 static TCGv cpu_F0s, cpu_F1s;
99 static TCGv_i64 cpu_F0d, cpu_F1d;
101 #include "exec/gen-icount.h"
103 static const char *regnames[] =
104 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
105 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
107 /* initialize TCG globals. */
108 void arm_translate_init(void)
112 cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
114 for (i = 0; i < 16; i++) {
115 cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
116 offsetof(CPUARMState, regs[i]),
119 cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
120 cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
121 cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
122 cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
124 cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0,
125 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
126 cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0,
127 offsetof(CPUARMState, exclusive_val), "exclusive_val");
128 cpu_exclusive_high = tcg_global_mem_new_i32(TCG_AREG0,
129 offsetof(CPUARMState, exclusive_high), "exclusive_high");
130 #ifdef CONFIG_USER_ONLY
131 cpu_exclusive_test = tcg_global_mem_new_i32(TCG_AREG0,
132 offsetof(CPUARMState, exclusive_test), "exclusive_test");
133 cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
134 offsetof(CPUARMState, exclusive_info), "exclusive_info");
141 static inline TCGv load_cpu_offset(int offset)
143 TCGv tmp = tcg_temp_new_i32();
144 tcg_gen_ld_i32(tmp, cpu_env, offset);
148 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
150 static inline void store_cpu_offset(TCGv var, int offset)
152 tcg_gen_st_i32(var, cpu_env, offset);
153 tcg_temp_free_i32(var);
156 #define store_cpu_field(var, name) \
157 store_cpu_offset(var, offsetof(CPUARMState, name))
159 /* Set a variable to the value of a CPU register. */
160 static void load_reg_var(DisasContext *s, TCGv var, int reg)
164 /* normally, since we updated PC, we need only to add one insn */
166 addr = (long)s->pc + 2;
168 addr = (long)s->pc + 4;
169 tcg_gen_movi_i32(var, addr);
171 tcg_gen_mov_i32(var, cpu_R[reg]);
175 /* Create a new temporary and set it to the value of a CPU register. */
176 static inline TCGv load_reg(DisasContext *s, int reg)
178 TCGv tmp = tcg_temp_new_i32();
179 load_reg_var(s, tmp, reg);
183 /* Set a CPU register. The source must be a temporary and will be
185 static void store_reg(DisasContext *s, int reg, TCGv var)
188 tcg_gen_andi_i32(var, var, ~1);
189 s->is_jmp = DISAS_JUMP;
191 tcg_gen_mov_i32(cpu_R[reg], var);
192 tcg_temp_free_i32(var);
195 /* Value extensions. */
196 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
197 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
198 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
199 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
201 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
202 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
205 static inline void gen_set_cpsr(TCGv var, uint32_t mask)
207 TCGv tmp_mask = tcg_const_i32(mask);
208 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
209 tcg_temp_free_i32(tmp_mask);
211 /* Set NZCV flags from the high 4 bits of var. */
212 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
214 static void gen_exception(int excp)
216 TCGv tmp = tcg_temp_new_i32();
217 tcg_gen_movi_i32(tmp, excp);
218 gen_helper_exception(cpu_env, tmp);
219 tcg_temp_free_i32(tmp);
222 static void gen_smul_dual(TCGv a, TCGv b)
224 TCGv tmp1 = tcg_temp_new_i32();
225 TCGv tmp2 = tcg_temp_new_i32();
226 tcg_gen_ext16s_i32(tmp1, a);
227 tcg_gen_ext16s_i32(tmp2, b);
228 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
229 tcg_temp_free_i32(tmp2);
230 tcg_gen_sari_i32(a, a, 16);
231 tcg_gen_sari_i32(b, b, 16);
232 tcg_gen_mul_i32(b, b, a);
233 tcg_gen_mov_i32(a, tmp1);
234 tcg_temp_free_i32(tmp1);
237 /* Byteswap each halfword. */
238 static void gen_rev16(TCGv var)
240 TCGv tmp = tcg_temp_new_i32();
241 tcg_gen_shri_i32(tmp, var, 8);
242 tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
243 tcg_gen_shli_i32(var, var, 8);
244 tcg_gen_andi_i32(var, var, 0xff00ff00);
245 tcg_gen_or_i32(var, var, tmp);
246 tcg_temp_free_i32(tmp);
249 /* Byteswap low halfword and sign extend. */
250 static void gen_revsh(TCGv var)
252 tcg_gen_ext16u_i32(var, var);
253 tcg_gen_bswap16_i32(var, var);
254 tcg_gen_ext16s_i32(var, var);
257 /* Unsigned bitfield extract. */
258 static void gen_ubfx(TCGv var, int shift, uint32_t mask)
261 tcg_gen_shri_i32(var, var, shift);
262 tcg_gen_andi_i32(var, var, mask);
265 /* Signed bitfield extract. */
266 static void gen_sbfx(TCGv var, int shift, int width)
271 tcg_gen_sari_i32(var, var, shift);
272 if (shift + width < 32) {
273 signbit = 1u << (width - 1);
274 tcg_gen_andi_i32(var, var, (1u << width) - 1);
275 tcg_gen_xori_i32(var, var, signbit);
276 tcg_gen_subi_i32(var, var, signbit);
280 /* Return (b << 32) + a. Mark inputs as dead */
281 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b)
283 TCGv_i64 tmp64 = tcg_temp_new_i64();
285 tcg_gen_extu_i32_i64(tmp64, b);
286 tcg_temp_free_i32(b);
287 tcg_gen_shli_i64(tmp64, tmp64, 32);
288 tcg_gen_add_i64(a, tmp64, a);
290 tcg_temp_free_i64(tmp64);
294 /* Return (b << 32) - a. Mark inputs as dead. */
295 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv b)
297 TCGv_i64 tmp64 = tcg_temp_new_i64();
299 tcg_gen_extu_i32_i64(tmp64, b);
300 tcg_temp_free_i32(b);
301 tcg_gen_shli_i64(tmp64, tmp64, 32);
302 tcg_gen_sub_i64(a, tmp64, a);
304 tcg_temp_free_i64(tmp64);
308 /* FIXME: Most targets have native widening multiplication.
309 It would be good to use that instead of a full wide multiply. */
310 /* 32x32->64 multiply. Marks inputs as dead. */
311 static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
313 TCGv_i64 tmp1 = tcg_temp_new_i64();
314 TCGv_i64 tmp2 = tcg_temp_new_i64();
316 tcg_gen_extu_i32_i64(tmp1, a);
317 tcg_temp_free_i32(a);
318 tcg_gen_extu_i32_i64(tmp2, b);
319 tcg_temp_free_i32(b);
320 tcg_gen_mul_i64(tmp1, tmp1, tmp2);
321 tcg_temp_free_i64(tmp2);
325 static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
327 TCGv_i64 tmp1 = tcg_temp_new_i64();
328 TCGv_i64 tmp2 = tcg_temp_new_i64();
330 tcg_gen_ext_i32_i64(tmp1, a);
331 tcg_temp_free_i32(a);
332 tcg_gen_ext_i32_i64(tmp2, b);
333 tcg_temp_free_i32(b);
334 tcg_gen_mul_i64(tmp1, tmp1, tmp2);
335 tcg_temp_free_i64(tmp2);
339 /* Swap low and high halfwords. */
340 static void gen_swap_half(TCGv var)
342 TCGv tmp = tcg_temp_new_i32();
343 tcg_gen_shri_i32(tmp, var, 16);
344 tcg_gen_shli_i32(var, var, 16);
345 tcg_gen_or_i32(var, var, tmp);
346 tcg_temp_free_i32(tmp);
349 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
350 tmp = (t0 ^ t1) & 0x8000;
353 t0 = (t0 + t1) ^ tmp;
356 static void gen_add16(TCGv t0, TCGv t1)
358 TCGv tmp = tcg_temp_new_i32();
359 tcg_gen_xor_i32(tmp, t0, t1);
360 tcg_gen_andi_i32(tmp, tmp, 0x8000);
361 tcg_gen_andi_i32(t0, t0, ~0x8000);
362 tcg_gen_andi_i32(t1, t1, ~0x8000);
363 tcg_gen_add_i32(t0, t0, t1);
364 tcg_gen_xor_i32(t0, t0, tmp);
365 tcg_temp_free_i32(tmp);
366 tcg_temp_free_i32(t1);
369 /* Set CF to the top bit of var. */
370 static void gen_set_CF_bit31(TCGv var)
372 tcg_gen_shri_i32(cpu_CF, var, 31);
375 /* Set N and Z flags from var. */
376 static inline void gen_logic_CC(TCGv var)
378 tcg_gen_mov_i32(cpu_NF, var);
379 tcg_gen_mov_i32(cpu_ZF, var);
383 static void gen_adc(TCGv t0, TCGv t1)
385 tcg_gen_add_i32(t0, t0, t1);
386 tcg_gen_add_i32(t0, t0, cpu_CF);
389 /* dest = T0 + T1 + CF. */
390 static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
392 tcg_gen_add_i32(dest, t0, t1);
393 tcg_gen_add_i32(dest, dest, cpu_CF);
396 /* dest = T0 - T1 + CF - 1. */
397 static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
399 tcg_gen_sub_i32(dest, t0, t1);
400 tcg_gen_add_i32(dest, dest, cpu_CF);
401 tcg_gen_subi_i32(dest, dest, 1);
404 /* dest = T0 + T1. Compute C, N, V and Z flags */
405 static void gen_add_CC(TCGv dest, TCGv t0, TCGv t1)
408 tcg_gen_add_i32(cpu_NF, t0, t1);
409 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
410 tcg_gen_setcond_i32(TCG_COND_LTU, cpu_CF, cpu_NF, t0);
411 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
412 tmp = tcg_temp_new_i32();
413 tcg_gen_xor_i32(tmp, t0, t1);
414 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
415 tcg_temp_free_i32(tmp);
416 tcg_gen_mov_i32(dest, cpu_NF);
419 /* dest = T0 - T1. Compute C, N, V and Z flags */
420 static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
423 tcg_gen_sub_i32(cpu_NF, t0, t1);
424 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
425 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
426 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
427 tmp = tcg_temp_new_i32();
428 tcg_gen_xor_i32(tmp, t0, t1);
429 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
430 tcg_temp_free_i32(tmp);
431 tcg_gen_mov_i32(dest, cpu_NF);
434 #define GEN_SHIFT(name) \
435 static void gen_##name(TCGv dest, TCGv t0, TCGv t1) \
437 TCGv tmp1, tmp2, tmp3; \
438 tmp1 = tcg_temp_new_i32(); \
439 tcg_gen_andi_i32(tmp1, t1, 0xff); \
440 tmp2 = tcg_const_i32(0); \
441 tmp3 = tcg_const_i32(0x1f); \
442 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
443 tcg_temp_free_i32(tmp3); \
444 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
445 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
446 tcg_temp_free_i32(tmp2); \
447 tcg_temp_free_i32(tmp1); \
453 static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
456 tmp1 = tcg_temp_new_i32();
457 tcg_gen_andi_i32(tmp1, t1, 0xff);
458 tmp2 = tcg_const_i32(0x1f);
459 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
460 tcg_temp_free_i32(tmp2);
461 tcg_gen_sar_i32(dest, t0, tmp1);
462 tcg_temp_free_i32(tmp1);
465 static void tcg_gen_abs_i32(TCGv dest, TCGv src)
467 TCGv c0 = tcg_const_i32(0);
468 TCGv tmp = tcg_temp_new_i32();
469 tcg_gen_neg_i32(tmp, src);
470 tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
471 tcg_temp_free_i32(c0);
472 tcg_temp_free_i32(tmp);
475 static void shifter_out_im(TCGv var, int shift)
478 tcg_gen_andi_i32(cpu_CF, var, 1);
480 tcg_gen_shri_i32(cpu_CF, var, shift);
482 tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
487 /* Shift by immediate. Includes special handling for shift == 0. */
488 static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
494 shifter_out_im(var, 32 - shift);
495 tcg_gen_shli_i32(var, var, shift);
501 tcg_gen_shri_i32(cpu_CF, var, 31);
503 tcg_gen_movi_i32(var, 0);
506 shifter_out_im(var, shift - 1);
507 tcg_gen_shri_i32(var, var, shift);
514 shifter_out_im(var, shift - 1);
517 tcg_gen_sari_i32(var, var, shift);
519 case 3: /* ROR/RRX */
522 shifter_out_im(var, shift - 1);
523 tcg_gen_rotri_i32(var, var, shift); break;
525 TCGv tmp = tcg_temp_new_i32();
526 tcg_gen_shli_i32(tmp, cpu_CF, 31);
528 shifter_out_im(var, 0);
529 tcg_gen_shri_i32(var, var, 1);
530 tcg_gen_or_i32(var, var, tmp);
531 tcg_temp_free_i32(tmp);
536 static inline void gen_arm_shift_reg(TCGv var, int shiftop,
537 TCGv shift, int flags)
541 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
542 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
543 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
544 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
549 gen_shl(var, var, shift);
552 gen_shr(var, var, shift);
555 gen_sar(var, var, shift);
557 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
558 tcg_gen_rotr_i32(var, var, shift); break;
561 tcg_temp_free_i32(shift);
564 #define PAS_OP(pfx) \
566 case 0: gen_pas_helper(glue(pfx,add16)); break; \
567 case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
568 case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
569 case 3: gen_pas_helper(glue(pfx,sub16)); break; \
570 case 4: gen_pas_helper(glue(pfx,add8)); break; \
571 case 7: gen_pas_helper(glue(pfx,sub8)); break; \
573 static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
578 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
580 tmp = tcg_temp_new_ptr();
581 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
583 tcg_temp_free_ptr(tmp);
586 tmp = tcg_temp_new_ptr();
587 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
589 tcg_temp_free_ptr(tmp);
591 #undef gen_pas_helper
592 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
605 #undef gen_pas_helper
610 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
611 #define PAS_OP(pfx) \
613 case 0: gen_pas_helper(glue(pfx,add8)); break; \
614 case 1: gen_pas_helper(glue(pfx,add16)); break; \
615 case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
616 case 4: gen_pas_helper(glue(pfx,sub8)); break; \
617 case 5: gen_pas_helper(glue(pfx,sub16)); break; \
618 case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
620 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
625 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
627 tmp = tcg_temp_new_ptr();
628 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
630 tcg_temp_free_ptr(tmp);
633 tmp = tcg_temp_new_ptr();
634 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
636 tcg_temp_free_ptr(tmp);
638 #undef gen_pas_helper
639 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
652 #undef gen_pas_helper
657 static void gen_test_cc(int cc, int label)
664 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
667 tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
670 tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
673 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
676 tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
679 tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
682 tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
685 tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
687 case 8: /* hi: C && !Z */
688 inv = gen_new_label();
689 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
690 tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
693 case 9: /* ls: !C || Z */
694 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
695 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
697 case 10: /* ge: N == V -> N ^ V == 0 */
698 tmp = tcg_temp_new_i32();
699 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
700 tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
701 tcg_temp_free_i32(tmp);
703 case 11: /* lt: N != V -> N ^ V != 0 */
704 tmp = tcg_temp_new_i32();
705 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
706 tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
707 tcg_temp_free_i32(tmp);
709 case 12: /* gt: !Z && N == V */
710 inv = gen_new_label();
711 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
712 tmp = tcg_temp_new_i32();
713 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
714 tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
715 tcg_temp_free_i32(tmp);
718 case 13: /* le: Z || N != V */
719 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
720 tmp = tcg_temp_new_i32();
721 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
722 tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
723 tcg_temp_free_i32(tmp);
726 fprintf(stderr, "Bad condition code 0x%x\n", cc);
731 static const uint8_t table_logic_cc[16] = {
750 /* Set PC and Thumb state from an immediate address. */
751 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
755 s->is_jmp = DISAS_UPDATE;
756 if (s->thumb != (addr & 1)) {
757 tmp = tcg_temp_new_i32();
758 tcg_gen_movi_i32(tmp, addr & 1);
759 tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
760 tcg_temp_free_i32(tmp);
762 tcg_gen_movi_i32(cpu_R[15], addr & ~1);
765 /* Set PC and Thumb state from var. var is marked as dead. */
766 static inline void gen_bx(DisasContext *s, TCGv var)
768 s->is_jmp = DISAS_UPDATE;
769 tcg_gen_andi_i32(cpu_R[15], var, ~1);
770 tcg_gen_andi_i32(var, var, 1);
771 store_cpu_field(var, thumb);
774 /* Variant of store_reg which uses branch&exchange logic when storing
775 to r15 in ARM architecture v7 and above. The source must be a temporary
776 and will be marked as dead. */
777 static inline void store_reg_bx(CPUARMState *env, DisasContext *s,
780 if (reg == 15 && ENABLE_ARCH_7) {
783 store_reg(s, reg, var);
787 /* Variant of store_reg which uses branch&exchange logic when storing
788 * to r15 in ARM architecture v5T and above. This is used for storing
789 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
790 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
791 static inline void store_reg_from_load(CPUARMState *env, DisasContext *s,
794 if (reg == 15 && ENABLE_ARCH_5) {
797 store_reg(s, reg, var);
801 static inline TCGv gen_ld8s(TCGv addr, int index)
803 TCGv tmp = tcg_temp_new_i32();
804 tcg_gen_qemu_ld8s(tmp, addr, index);
807 static inline TCGv gen_ld8u(TCGv addr, int index)
809 TCGv tmp = tcg_temp_new_i32();
810 tcg_gen_qemu_ld8u(tmp, addr, index);
813 static inline TCGv gen_ld16s(TCGv addr, int index)
815 TCGv tmp = tcg_temp_new_i32();
816 tcg_gen_qemu_ld16s(tmp, addr, index);
819 static inline TCGv gen_ld16u(TCGv addr, int index)
821 TCGv tmp = tcg_temp_new_i32();
822 tcg_gen_qemu_ld16u(tmp, addr, index);
825 static inline TCGv gen_ld32(TCGv addr, int index)
827 TCGv tmp = tcg_temp_new_i32();
828 tcg_gen_qemu_ld32u(tmp, addr, index);
831 static inline TCGv_i64 gen_ld64(TCGv addr, int index)
833 TCGv_i64 tmp = tcg_temp_new_i64();
834 tcg_gen_qemu_ld64(tmp, addr, index);
837 static inline void gen_st8(TCGv val, TCGv addr, int index)
839 tcg_gen_qemu_st8(val, addr, index);
840 tcg_temp_free_i32(val);
842 static inline void gen_st16(TCGv val, TCGv addr, int index)
844 tcg_gen_qemu_st16(val, addr, index);
845 tcg_temp_free_i32(val);
847 static inline void gen_st32(TCGv val, TCGv addr, int index)
849 tcg_gen_qemu_st32(val, addr, index);
850 tcg_temp_free_i32(val);
852 static inline void gen_st64(TCGv_i64 val, TCGv addr, int index)
854 tcg_gen_qemu_st64(val, addr, index);
855 tcg_temp_free_i64(val);
858 static inline void gen_set_pc_im(uint32_t val)
860 tcg_gen_movi_i32(cpu_R[15], val);
863 /* Force a TB lookup after an instruction that changes the CPU state. */
864 static inline void gen_lookup_tb(DisasContext *s)
866 tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
867 s->is_jmp = DISAS_UPDATE;
870 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
873 int val, rm, shift, shiftop;
876 if (!(insn & (1 << 25))) {
879 if (!(insn & (1 << 23)))
882 tcg_gen_addi_i32(var, var, val);
886 shift = (insn >> 7) & 0x1f;
887 shiftop = (insn >> 5) & 3;
888 offset = load_reg(s, rm);
889 gen_arm_shift_im(offset, shiftop, shift, 0);
890 if (!(insn & (1 << 23)))
891 tcg_gen_sub_i32(var, var, offset);
893 tcg_gen_add_i32(var, var, offset);
894 tcg_temp_free_i32(offset);
898 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
904 if (insn & (1 << 22)) {
906 val = (insn & 0xf) | ((insn >> 4) & 0xf0);
907 if (!(insn & (1 << 23)))
911 tcg_gen_addi_i32(var, var, val);
915 tcg_gen_addi_i32(var, var, extra);
917 offset = load_reg(s, rm);
918 if (!(insn & (1 << 23)))
919 tcg_gen_sub_i32(var, var, offset);
921 tcg_gen_add_i32(var, var, offset);
922 tcg_temp_free_i32(offset);
926 static TCGv_ptr get_fpstatus_ptr(int neon)
928 TCGv_ptr statusptr = tcg_temp_new_ptr();
931 offset = offsetof(CPUARMState, vfp.standard_fp_status);
933 offset = offsetof(CPUARMState, vfp.fp_status);
935 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
939 #define VFP_OP2(name) \
940 static inline void gen_vfp_##name(int dp) \
942 TCGv_ptr fpst = get_fpstatus_ptr(0); \
944 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \
946 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \
948 tcg_temp_free_ptr(fpst); \
958 static inline void gen_vfp_F1_mul(int dp)
960 /* Like gen_vfp_mul() but put result in F1 */
961 TCGv_ptr fpst = get_fpstatus_ptr(0);
963 gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
965 gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
967 tcg_temp_free_ptr(fpst);
970 static inline void gen_vfp_F1_neg(int dp)
972 /* Like gen_vfp_neg() but put result in F1 */
974 gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
976 gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
980 static inline void gen_vfp_abs(int dp)
983 gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
985 gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
988 static inline void gen_vfp_neg(int dp)
991 gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
993 gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
996 static inline void gen_vfp_sqrt(int dp)
999 gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1001 gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1004 static inline void gen_vfp_cmp(int dp)
1007 gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1009 gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1012 static inline void gen_vfp_cmpe(int dp)
1015 gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1017 gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1020 static inline void gen_vfp_F1_ld0(int dp)
1023 tcg_gen_movi_i64(cpu_F1d, 0);
1025 tcg_gen_movi_i32(cpu_F1s, 0);
1028 #define VFP_GEN_ITOF(name) \
1029 static inline void gen_vfp_##name(int dp, int neon) \
1031 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1033 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1035 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1037 tcg_temp_free_ptr(statusptr); \
1044 #define VFP_GEN_FTOI(name) \
1045 static inline void gen_vfp_##name(int dp, int neon) \
1047 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1049 gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1051 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1053 tcg_temp_free_ptr(statusptr); \
1062 #define VFP_GEN_FIX(name) \
1063 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1065 TCGv tmp_shift = tcg_const_i32(shift); \
1066 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1068 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \
1070 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \
1072 tcg_temp_free_i32(tmp_shift); \
1073 tcg_temp_free_ptr(statusptr); \
1085 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv addr)
1088 tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s));
1090 tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s));
1093 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv addr)
1096 tcg_gen_qemu_st64(cpu_F0d, addr, IS_USER(s));
1098 tcg_gen_qemu_st32(cpu_F0s, addr, IS_USER(s));
1102 vfp_reg_offset (int dp, int reg)
1105 return offsetof(CPUARMState, vfp.regs[reg]);
1107 return offsetof(CPUARMState, vfp.regs[reg >> 1])
1108 + offsetof(CPU_DoubleU, l.upper);
1110 return offsetof(CPUARMState, vfp.regs[reg >> 1])
1111 + offsetof(CPU_DoubleU, l.lower);
1115 /* Return the offset of a 32-bit piece of a NEON register.
1116 zero is the least significant end of the register. */
1118 neon_reg_offset (int reg, int n)
1122 return vfp_reg_offset(0, sreg);
1125 static TCGv neon_load_reg(int reg, int pass)
1127 TCGv tmp = tcg_temp_new_i32();
1128 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1132 static void neon_store_reg(int reg, int pass, TCGv var)
1134 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1135 tcg_temp_free_i32(var);
1138 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1140 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1143 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1145 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1148 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1149 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1150 #define tcg_gen_st_f32 tcg_gen_st_i32
1151 #define tcg_gen_st_f64 tcg_gen_st_i64
1153 static inline void gen_mov_F0_vreg(int dp, int reg)
1156 tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1158 tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1161 static inline void gen_mov_F1_vreg(int dp, int reg)
1164 tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1166 tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1169 static inline void gen_mov_vreg_F0(int dp, int reg)
1172 tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1174 tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1177 #define ARM_CP_RW_BIT (1 << 20)
1179 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1181 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1184 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1186 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1189 static inline TCGv iwmmxt_load_creg(int reg)
1191 TCGv var = tcg_temp_new_i32();
1192 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1196 static inline void iwmmxt_store_creg(int reg, TCGv var)
1198 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1199 tcg_temp_free_i32(var);
1202 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1204 iwmmxt_store_reg(cpu_M0, rn);
1207 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1209 iwmmxt_load_reg(cpu_M0, rn);
1212 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1214 iwmmxt_load_reg(cpu_V1, rn);
1215 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1218 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1220 iwmmxt_load_reg(cpu_V1, rn);
1221 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1224 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1226 iwmmxt_load_reg(cpu_V1, rn);
1227 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1230 #define IWMMXT_OP(name) \
1231 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1233 iwmmxt_load_reg(cpu_V1, rn); \
1234 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1237 #define IWMMXT_OP_ENV(name) \
1238 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1240 iwmmxt_load_reg(cpu_V1, rn); \
1241 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1244 #define IWMMXT_OP_ENV_SIZE(name) \
1245 IWMMXT_OP_ENV(name##b) \
1246 IWMMXT_OP_ENV(name##w) \
1247 IWMMXT_OP_ENV(name##l)
1249 #define IWMMXT_OP_ENV1(name) \
1250 static inline void gen_op_iwmmxt_##name##_M0(void) \
1252 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1266 IWMMXT_OP_ENV_SIZE(unpackl)
1267 IWMMXT_OP_ENV_SIZE(unpackh)
1269 IWMMXT_OP_ENV1(unpacklub)
1270 IWMMXT_OP_ENV1(unpackluw)
1271 IWMMXT_OP_ENV1(unpacklul)
1272 IWMMXT_OP_ENV1(unpackhub)
1273 IWMMXT_OP_ENV1(unpackhuw)
1274 IWMMXT_OP_ENV1(unpackhul)
1275 IWMMXT_OP_ENV1(unpacklsb)
1276 IWMMXT_OP_ENV1(unpacklsw)
1277 IWMMXT_OP_ENV1(unpacklsl)
1278 IWMMXT_OP_ENV1(unpackhsb)
1279 IWMMXT_OP_ENV1(unpackhsw)
1280 IWMMXT_OP_ENV1(unpackhsl)
1282 IWMMXT_OP_ENV_SIZE(cmpeq)
1283 IWMMXT_OP_ENV_SIZE(cmpgtu)
1284 IWMMXT_OP_ENV_SIZE(cmpgts)
1286 IWMMXT_OP_ENV_SIZE(mins)
1287 IWMMXT_OP_ENV_SIZE(minu)
1288 IWMMXT_OP_ENV_SIZE(maxs)
1289 IWMMXT_OP_ENV_SIZE(maxu)
1291 IWMMXT_OP_ENV_SIZE(subn)
1292 IWMMXT_OP_ENV_SIZE(addn)
1293 IWMMXT_OP_ENV_SIZE(subu)
1294 IWMMXT_OP_ENV_SIZE(addu)
1295 IWMMXT_OP_ENV_SIZE(subs)
1296 IWMMXT_OP_ENV_SIZE(adds)
1298 IWMMXT_OP_ENV(avgb0)
1299 IWMMXT_OP_ENV(avgb1)
1300 IWMMXT_OP_ENV(avgw0)
1301 IWMMXT_OP_ENV(avgw1)
1305 IWMMXT_OP_ENV(packuw)
1306 IWMMXT_OP_ENV(packul)
1307 IWMMXT_OP_ENV(packuq)
1308 IWMMXT_OP_ENV(packsw)
1309 IWMMXT_OP_ENV(packsl)
1310 IWMMXT_OP_ENV(packsq)
1312 static void gen_op_iwmmxt_set_mup(void)
1315 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1316 tcg_gen_ori_i32(tmp, tmp, 2);
1317 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1320 static void gen_op_iwmmxt_set_cup(void)
1323 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1324 tcg_gen_ori_i32(tmp, tmp, 1);
1325 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1328 static void gen_op_iwmmxt_setpsr_nz(void)
1330 TCGv tmp = tcg_temp_new_i32();
1331 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1332 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1335 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1337 iwmmxt_load_reg(cpu_V1, rn);
1338 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1339 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1342 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest)
1348 rd = (insn >> 16) & 0xf;
1349 tmp = load_reg(s, rd);
1351 offset = (insn & 0xff) << ((insn >> 7) & 2);
1352 if (insn & (1 << 24)) {
1354 if (insn & (1 << 23))
1355 tcg_gen_addi_i32(tmp, tmp, offset);
1357 tcg_gen_addi_i32(tmp, tmp, -offset);
1358 tcg_gen_mov_i32(dest, tmp);
1359 if (insn & (1 << 21))
1360 store_reg(s, rd, tmp);
1362 tcg_temp_free_i32(tmp);
1363 } else if (insn & (1 << 21)) {
1365 tcg_gen_mov_i32(dest, tmp);
1366 if (insn & (1 << 23))
1367 tcg_gen_addi_i32(tmp, tmp, offset);
1369 tcg_gen_addi_i32(tmp, tmp, -offset);
1370 store_reg(s, rd, tmp);
1371 } else if (!(insn & (1 << 23)))
1376 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest)
1378 int rd = (insn >> 0) & 0xf;
1381 if (insn & (1 << 8)) {
1382 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1385 tmp = iwmmxt_load_creg(rd);
1388 tmp = tcg_temp_new_i32();
1389 iwmmxt_load_reg(cpu_V0, rd);
1390 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
1392 tcg_gen_andi_i32(tmp, tmp, mask);
1393 tcg_gen_mov_i32(dest, tmp);
1394 tcg_temp_free_i32(tmp);
1398 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1399 (ie. an undefined instruction). */
1400 static int disas_iwmmxt_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
1403 int rdhi, rdlo, rd0, rd1, i;
1405 TCGv tmp, tmp2, tmp3;
1407 if ((insn & 0x0e000e00) == 0x0c000000) {
1408 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1410 rdlo = (insn >> 12) & 0xf;
1411 rdhi = (insn >> 16) & 0xf;
1412 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1413 iwmmxt_load_reg(cpu_V0, wrd);
1414 tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
1415 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1416 tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
1417 } else { /* TMCRR */
1418 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1419 iwmmxt_store_reg(cpu_V0, wrd);
1420 gen_op_iwmmxt_set_mup();
1425 wrd = (insn >> 12) & 0xf;
1426 addr = tcg_temp_new_i32();
1427 if (gen_iwmmxt_address(s, insn, addr)) {
1428 tcg_temp_free_i32(addr);
1431 if (insn & ARM_CP_RW_BIT) {
1432 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1433 tmp = tcg_temp_new_i32();
1434 tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
1435 iwmmxt_store_creg(wrd, tmp);
1438 if (insn & (1 << 8)) {
1439 if (insn & (1 << 22)) { /* WLDRD */
1440 tcg_gen_qemu_ld64(cpu_M0, addr, IS_USER(s));
1442 } else { /* WLDRW wRd */
1443 tmp = gen_ld32(addr, IS_USER(s));
1446 if (insn & (1 << 22)) { /* WLDRH */
1447 tmp = gen_ld16u(addr, IS_USER(s));
1448 } else { /* WLDRB */
1449 tmp = gen_ld8u(addr, IS_USER(s));
1453 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1454 tcg_temp_free_i32(tmp);
1456 gen_op_iwmmxt_movq_wRn_M0(wrd);
1459 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1460 tmp = iwmmxt_load_creg(wrd);
1461 gen_st32(tmp, addr, IS_USER(s));
1463 gen_op_iwmmxt_movq_M0_wRn(wrd);
1464 tmp = tcg_temp_new_i32();
1465 if (insn & (1 << 8)) {
1466 if (insn & (1 << 22)) { /* WSTRD */
1467 tcg_temp_free_i32(tmp);
1468 tcg_gen_qemu_st64(cpu_M0, addr, IS_USER(s));
1469 } else { /* WSTRW wRd */
1470 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1471 gen_st32(tmp, addr, IS_USER(s));
1474 if (insn & (1 << 22)) { /* WSTRH */
1475 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1476 gen_st16(tmp, addr, IS_USER(s));
1477 } else { /* WSTRB */
1478 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1479 gen_st8(tmp, addr, IS_USER(s));
1484 tcg_temp_free_i32(addr);
1488 if ((insn & 0x0f000000) != 0x0e000000)
1491 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1492 case 0x000: /* WOR */
1493 wrd = (insn >> 12) & 0xf;
1494 rd0 = (insn >> 0) & 0xf;
1495 rd1 = (insn >> 16) & 0xf;
1496 gen_op_iwmmxt_movq_M0_wRn(rd0);
1497 gen_op_iwmmxt_orq_M0_wRn(rd1);
1498 gen_op_iwmmxt_setpsr_nz();
1499 gen_op_iwmmxt_movq_wRn_M0(wrd);
1500 gen_op_iwmmxt_set_mup();
1501 gen_op_iwmmxt_set_cup();
1503 case 0x011: /* TMCR */
1506 rd = (insn >> 12) & 0xf;
1507 wrd = (insn >> 16) & 0xf;
1509 case ARM_IWMMXT_wCID:
1510 case ARM_IWMMXT_wCASF:
1512 case ARM_IWMMXT_wCon:
1513 gen_op_iwmmxt_set_cup();
1515 case ARM_IWMMXT_wCSSF:
1516 tmp = iwmmxt_load_creg(wrd);
1517 tmp2 = load_reg(s, rd);
1518 tcg_gen_andc_i32(tmp, tmp, tmp2);
1519 tcg_temp_free_i32(tmp2);
1520 iwmmxt_store_creg(wrd, tmp);
1522 case ARM_IWMMXT_wCGR0:
1523 case ARM_IWMMXT_wCGR1:
1524 case ARM_IWMMXT_wCGR2:
1525 case ARM_IWMMXT_wCGR3:
1526 gen_op_iwmmxt_set_cup();
1527 tmp = load_reg(s, rd);
1528 iwmmxt_store_creg(wrd, tmp);
1534 case 0x100: /* WXOR */
1535 wrd = (insn >> 12) & 0xf;
1536 rd0 = (insn >> 0) & 0xf;
1537 rd1 = (insn >> 16) & 0xf;
1538 gen_op_iwmmxt_movq_M0_wRn(rd0);
1539 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1540 gen_op_iwmmxt_setpsr_nz();
1541 gen_op_iwmmxt_movq_wRn_M0(wrd);
1542 gen_op_iwmmxt_set_mup();
1543 gen_op_iwmmxt_set_cup();
1545 case 0x111: /* TMRC */
1548 rd = (insn >> 12) & 0xf;
1549 wrd = (insn >> 16) & 0xf;
1550 tmp = iwmmxt_load_creg(wrd);
1551 store_reg(s, rd, tmp);
1553 case 0x300: /* WANDN */
1554 wrd = (insn >> 12) & 0xf;
1555 rd0 = (insn >> 0) & 0xf;
1556 rd1 = (insn >> 16) & 0xf;
1557 gen_op_iwmmxt_movq_M0_wRn(rd0);
1558 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1559 gen_op_iwmmxt_andq_M0_wRn(rd1);
1560 gen_op_iwmmxt_setpsr_nz();
1561 gen_op_iwmmxt_movq_wRn_M0(wrd);
1562 gen_op_iwmmxt_set_mup();
1563 gen_op_iwmmxt_set_cup();
1565 case 0x200: /* WAND */
1566 wrd = (insn >> 12) & 0xf;
1567 rd0 = (insn >> 0) & 0xf;
1568 rd1 = (insn >> 16) & 0xf;
1569 gen_op_iwmmxt_movq_M0_wRn(rd0);
1570 gen_op_iwmmxt_andq_M0_wRn(rd1);
1571 gen_op_iwmmxt_setpsr_nz();
1572 gen_op_iwmmxt_movq_wRn_M0(wrd);
1573 gen_op_iwmmxt_set_mup();
1574 gen_op_iwmmxt_set_cup();
1576 case 0x810: case 0xa10: /* WMADD */
1577 wrd = (insn >> 12) & 0xf;
1578 rd0 = (insn >> 0) & 0xf;
1579 rd1 = (insn >> 16) & 0xf;
1580 gen_op_iwmmxt_movq_M0_wRn(rd0);
1581 if (insn & (1 << 21))
1582 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1584 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1585 gen_op_iwmmxt_movq_wRn_M0(wrd);
1586 gen_op_iwmmxt_set_mup();
1588 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1589 wrd = (insn >> 12) & 0xf;
1590 rd0 = (insn >> 16) & 0xf;
1591 rd1 = (insn >> 0) & 0xf;
1592 gen_op_iwmmxt_movq_M0_wRn(rd0);
1593 switch ((insn >> 22) & 3) {
1595 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1598 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1601 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1606 gen_op_iwmmxt_movq_wRn_M0(wrd);
1607 gen_op_iwmmxt_set_mup();
1608 gen_op_iwmmxt_set_cup();
1610 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1611 wrd = (insn >> 12) & 0xf;
1612 rd0 = (insn >> 16) & 0xf;
1613 rd1 = (insn >> 0) & 0xf;
1614 gen_op_iwmmxt_movq_M0_wRn(rd0);
1615 switch ((insn >> 22) & 3) {
1617 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1620 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1623 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1628 gen_op_iwmmxt_movq_wRn_M0(wrd);
1629 gen_op_iwmmxt_set_mup();
1630 gen_op_iwmmxt_set_cup();
1632 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1633 wrd = (insn >> 12) & 0xf;
1634 rd0 = (insn >> 16) & 0xf;
1635 rd1 = (insn >> 0) & 0xf;
1636 gen_op_iwmmxt_movq_M0_wRn(rd0);
1637 if (insn & (1 << 22))
1638 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1640 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1641 if (!(insn & (1 << 20)))
1642 gen_op_iwmmxt_addl_M0_wRn(wrd);
1643 gen_op_iwmmxt_movq_wRn_M0(wrd);
1644 gen_op_iwmmxt_set_mup();
1646 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1647 wrd = (insn >> 12) & 0xf;
1648 rd0 = (insn >> 16) & 0xf;
1649 rd1 = (insn >> 0) & 0xf;
1650 gen_op_iwmmxt_movq_M0_wRn(rd0);
1651 if (insn & (1 << 21)) {
1652 if (insn & (1 << 20))
1653 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1655 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1657 if (insn & (1 << 20))
1658 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1660 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1662 gen_op_iwmmxt_movq_wRn_M0(wrd);
1663 gen_op_iwmmxt_set_mup();
1665 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1666 wrd = (insn >> 12) & 0xf;
1667 rd0 = (insn >> 16) & 0xf;
1668 rd1 = (insn >> 0) & 0xf;
1669 gen_op_iwmmxt_movq_M0_wRn(rd0);
1670 if (insn & (1 << 21))
1671 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1673 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1674 if (!(insn & (1 << 20))) {
1675 iwmmxt_load_reg(cpu_V1, wrd);
1676 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1678 gen_op_iwmmxt_movq_wRn_M0(wrd);
1679 gen_op_iwmmxt_set_mup();
1681 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1682 wrd = (insn >> 12) & 0xf;
1683 rd0 = (insn >> 16) & 0xf;
1684 rd1 = (insn >> 0) & 0xf;
1685 gen_op_iwmmxt_movq_M0_wRn(rd0);
1686 switch ((insn >> 22) & 3) {
1688 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1691 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1694 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1699 gen_op_iwmmxt_movq_wRn_M0(wrd);
1700 gen_op_iwmmxt_set_mup();
1701 gen_op_iwmmxt_set_cup();
1703 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1704 wrd = (insn >> 12) & 0xf;
1705 rd0 = (insn >> 16) & 0xf;
1706 rd1 = (insn >> 0) & 0xf;
1707 gen_op_iwmmxt_movq_M0_wRn(rd0);
1708 if (insn & (1 << 22)) {
1709 if (insn & (1 << 20))
1710 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1712 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1714 if (insn & (1 << 20))
1715 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1717 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1719 gen_op_iwmmxt_movq_wRn_M0(wrd);
1720 gen_op_iwmmxt_set_mup();
1721 gen_op_iwmmxt_set_cup();
1723 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1724 wrd = (insn >> 12) & 0xf;
1725 rd0 = (insn >> 16) & 0xf;
1726 rd1 = (insn >> 0) & 0xf;
1727 gen_op_iwmmxt_movq_M0_wRn(rd0);
1728 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1729 tcg_gen_andi_i32(tmp, tmp, 7);
1730 iwmmxt_load_reg(cpu_V1, rd1);
1731 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1732 tcg_temp_free_i32(tmp);
1733 gen_op_iwmmxt_movq_wRn_M0(wrd);
1734 gen_op_iwmmxt_set_mup();
1736 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1737 if (((insn >> 6) & 3) == 3)
1739 rd = (insn >> 12) & 0xf;
1740 wrd = (insn >> 16) & 0xf;
1741 tmp = load_reg(s, rd);
1742 gen_op_iwmmxt_movq_M0_wRn(wrd);
1743 switch ((insn >> 6) & 3) {
1745 tmp2 = tcg_const_i32(0xff);
1746 tmp3 = tcg_const_i32((insn & 7) << 3);
1749 tmp2 = tcg_const_i32(0xffff);
1750 tmp3 = tcg_const_i32((insn & 3) << 4);
1753 tmp2 = tcg_const_i32(0xffffffff);
1754 tmp3 = tcg_const_i32((insn & 1) << 5);
1760 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1761 tcg_temp_free(tmp3);
1762 tcg_temp_free(tmp2);
1763 tcg_temp_free_i32(tmp);
1764 gen_op_iwmmxt_movq_wRn_M0(wrd);
1765 gen_op_iwmmxt_set_mup();
1767 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1768 rd = (insn >> 12) & 0xf;
1769 wrd = (insn >> 16) & 0xf;
1770 if (rd == 15 || ((insn >> 22) & 3) == 3)
1772 gen_op_iwmmxt_movq_M0_wRn(wrd);
1773 tmp = tcg_temp_new_i32();
1774 switch ((insn >> 22) & 3) {
1776 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1777 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1779 tcg_gen_ext8s_i32(tmp, tmp);
1781 tcg_gen_andi_i32(tmp, tmp, 0xff);
1785 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1786 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1788 tcg_gen_ext16s_i32(tmp, tmp);
1790 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1794 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1795 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1798 store_reg(s, rd, tmp);
1800 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1801 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1803 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1804 switch ((insn >> 22) & 3) {
1806 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1809 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1812 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1815 tcg_gen_shli_i32(tmp, tmp, 28);
1817 tcg_temp_free_i32(tmp);
1819 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1820 if (((insn >> 6) & 3) == 3)
1822 rd = (insn >> 12) & 0xf;
1823 wrd = (insn >> 16) & 0xf;
1824 tmp = load_reg(s, rd);
1825 switch ((insn >> 6) & 3) {
1827 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1830 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1833 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1836 tcg_temp_free_i32(tmp);
1837 gen_op_iwmmxt_movq_wRn_M0(wrd);
1838 gen_op_iwmmxt_set_mup();
1840 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1841 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1843 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1844 tmp2 = tcg_temp_new_i32();
1845 tcg_gen_mov_i32(tmp2, tmp);
1846 switch ((insn >> 22) & 3) {
1848 for (i = 0; i < 7; i ++) {
1849 tcg_gen_shli_i32(tmp2, tmp2, 4);
1850 tcg_gen_and_i32(tmp, tmp, tmp2);
1854 for (i = 0; i < 3; i ++) {
1855 tcg_gen_shli_i32(tmp2, tmp2, 8);
1856 tcg_gen_and_i32(tmp, tmp, tmp2);
1860 tcg_gen_shli_i32(tmp2, tmp2, 16);
1861 tcg_gen_and_i32(tmp, tmp, tmp2);
1865 tcg_temp_free_i32(tmp2);
1866 tcg_temp_free_i32(tmp);
1868 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
1869 wrd = (insn >> 12) & 0xf;
1870 rd0 = (insn >> 16) & 0xf;
1871 gen_op_iwmmxt_movq_M0_wRn(rd0);
1872 switch ((insn >> 22) & 3) {
1874 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1877 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1880 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1885 gen_op_iwmmxt_movq_wRn_M0(wrd);
1886 gen_op_iwmmxt_set_mup();
1888 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
1889 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1891 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1892 tmp2 = tcg_temp_new_i32();
1893 tcg_gen_mov_i32(tmp2, tmp);
1894 switch ((insn >> 22) & 3) {
1896 for (i = 0; i < 7; i ++) {
1897 tcg_gen_shli_i32(tmp2, tmp2, 4);
1898 tcg_gen_or_i32(tmp, tmp, tmp2);
1902 for (i = 0; i < 3; i ++) {
1903 tcg_gen_shli_i32(tmp2, tmp2, 8);
1904 tcg_gen_or_i32(tmp, tmp, tmp2);
1908 tcg_gen_shli_i32(tmp2, tmp2, 16);
1909 tcg_gen_or_i32(tmp, tmp, tmp2);
1913 tcg_temp_free_i32(tmp2);
1914 tcg_temp_free_i32(tmp);
1916 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
1917 rd = (insn >> 12) & 0xf;
1918 rd0 = (insn >> 16) & 0xf;
1919 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1921 gen_op_iwmmxt_movq_M0_wRn(rd0);
1922 tmp = tcg_temp_new_i32();
1923 switch ((insn >> 22) & 3) {
1925 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1928 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
1931 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
1934 store_reg(s, rd, tmp);
1936 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
1937 case 0x906: case 0xb06: case 0xd06: case 0xf06:
1938 wrd = (insn >> 12) & 0xf;
1939 rd0 = (insn >> 16) & 0xf;
1940 rd1 = (insn >> 0) & 0xf;
1941 gen_op_iwmmxt_movq_M0_wRn(rd0);
1942 switch ((insn >> 22) & 3) {
1944 if (insn & (1 << 21))
1945 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
1947 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
1950 if (insn & (1 << 21))
1951 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
1953 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
1956 if (insn & (1 << 21))
1957 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
1959 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
1964 gen_op_iwmmxt_movq_wRn_M0(wrd);
1965 gen_op_iwmmxt_set_mup();
1966 gen_op_iwmmxt_set_cup();
1968 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
1969 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
1970 wrd = (insn >> 12) & 0xf;
1971 rd0 = (insn >> 16) & 0xf;
1972 gen_op_iwmmxt_movq_M0_wRn(rd0);
1973 switch ((insn >> 22) & 3) {
1975 if (insn & (1 << 21))
1976 gen_op_iwmmxt_unpacklsb_M0();
1978 gen_op_iwmmxt_unpacklub_M0();
1981 if (insn & (1 << 21))
1982 gen_op_iwmmxt_unpacklsw_M0();
1984 gen_op_iwmmxt_unpackluw_M0();
1987 if (insn & (1 << 21))
1988 gen_op_iwmmxt_unpacklsl_M0();
1990 gen_op_iwmmxt_unpacklul_M0();
1995 gen_op_iwmmxt_movq_wRn_M0(wrd);
1996 gen_op_iwmmxt_set_mup();
1997 gen_op_iwmmxt_set_cup();
1999 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2000 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2001 wrd = (insn >> 12) & 0xf;
2002 rd0 = (insn >> 16) & 0xf;
2003 gen_op_iwmmxt_movq_M0_wRn(rd0);
2004 switch ((insn >> 22) & 3) {
2006 if (insn & (1 << 21))
2007 gen_op_iwmmxt_unpackhsb_M0();
2009 gen_op_iwmmxt_unpackhub_M0();
2012 if (insn & (1 << 21))
2013 gen_op_iwmmxt_unpackhsw_M0();
2015 gen_op_iwmmxt_unpackhuw_M0();
2018 if (insn & (1 << 21))
2019 gen_op_iwmmxt_unpackhsl_M0();
2021 gen_op_iwmmxt_unpackhul_M0();
2026 gen_op_iwmmxt_movq_wRn_M0(wrd);
2027 gen_op_iwmmxt_set_mup();
2028 gen_op_iwmmxt_set_cup();
2030 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2031 case 0x214: case 0x614: case 0xa14: case 0xe14:
2032 if (((insn >> 22) & 3) == 0)
2034 wrd = (insn >> 12) & 0xf;
2035 rd0 = (insn >> 16) & 0xf;
2036 gen_op_iwmmxt_movq_M0_wRn(rd0);
2037 tmp = tcg_temp_new_i32();
2038 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2039 tcg_temp_free_i32(tmp);
2042 switch ((insn >> 22) & 3) {
2044 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2047 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2050 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2053 tcg_temp_free_i32(tmp);
2054 gen_op_iwmmxt_movq_wRn_M0(wrd);
2055 gen_op_iwmmxt_set_mup();
2056 gen_op_iwmmxt_set_cup();
2058 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2059 case 0x014: case 0x414: case 0x814: case 0xc14:
2060 if (((insn >> 22) & 3) == 0)
2062 wrd = (insn >> 12) & 0xf;
2063 rd0 = (insn >> 16) & 0xf;
2064 gen_op_iwmmxt_movq_M0_wRn(rd0);
2065 tmp = tcg_temp_new_i32();
2066 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2067 tcg_temp_free_i32(tmp);
2070 switch ((insn >> 22) & 3) {
2072 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2075 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2078 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2081 tcg_temp_free_i32(tmp);
2082 gen_op_iwmmxt_movq_wRn_M0(wrd);
2083 gen_op_iwmmxt_set_mup();
2084 gen_op_iwmmxt_set_cup();
2086 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2087 case 0x114: case 0x514: case 0x914: case 0xd14:
2088 if (((insn >> 22) & 3) == 0)
2090 wrd = (insn >> 12) & 0xf;
2091 rd0 = (insn >> 16) & 0xf;
2092 gen_op_iwmmxt_movq_M0_wRn(rd0);
2093 tmp = tcg_temp_new_i32();
2094 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2095 tcg_temp_free_i32(tmp);
2098 switch ((insn >> 22) & 3) {
2100 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2103 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2106 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2109 tcg_temp_free_i32(tmp);
2110 gen_op_iwmmxt_movq_wRn_M0(wrd);
2111 gen_op_iwmmxt_set_mup();
2112 gen_op_iwmmxt_set_cup();
2114 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2115 case 0x314: case 0x714: case 0xb14: case 0xf14:
2116 if (((insn >> 22) & 3) == 0)
2118 wrd = (insn >> 12) & 0xf;
2119 rd0 = (insn >> 16) & 0xf;
2120 gen_op_iwmmxt_movq_M0_wRn(rd0);
2121 tmp = tcg_temp_new_i32();
2122 switch ((insn >> 22) & 3) {
2124 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2125 tcg_temp_free_i32(tmp);
2128 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2131 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2132 tcg_temp_free_i32(tmp);
2135 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2138 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2139 tcg_temp_free_i32(tmp);
2142 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2145 tcg_temp_free_i32(tmp);
2146 gen_op_iwmmxt_movq_wRn_M0(wrd);
2147 gen_op_iwmmxt_set_mup();
2148 gen_op_iwmmxt_set_cup();
2150 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2151 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2152 wrd = (insn >> 12) & 0xf;
2153 rd0 = (insn >> 16) & 0xf;
2154 rd1 = (insn >> 0) & 0xf;
2155 gen_op_iwmmxt_movq_M0_wRn(rd0);
2156 switch ((insn >> 22) & 3) {
2158 if (insn & (1 << 21))
2159 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2161 gen_op_iwmmxt_minub_M0_wRn(rd1);
2164 if (insn & (1 << 21))
2165 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2167 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2170 if (insn & (1 << 21))
2171 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2173 gen_op_iwmmxt_minul_M0_wRn(rd1);
2178 gen_op_iwmmxt_movq_wRn_M0(wrd);
2179 gen_op_iwmmxt_set_mup();
2181 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2182 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2183 wrd = (insn >> 12) & 0xf;
2184 rd0 = (insn >> 16) & 0xf;
2185 rd1 = (insn >> 0) & 0xf;
2186 gen_op_iwmmxt_movq_M0_wRn(rd0);
2187 switch ((insn >> 22) & 3) {
2189 if (insn & (1 << 21))
2190 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2192 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2195 if (insn & (1 << 21))
2196 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2198 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2201 if (insn & (1 << 21))
2202 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2204 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2209 gen_op_iwmmxt_movq_wRn_M0(wrd);
2210 gen_op_iwmmxt_set_mup();
2212 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2213 case 0x402: case 0x502: case 0x602: case 0x702:
2214 wrd = (insn >> 12) & 0xf;
2215 rd0 = (insn >> 16) & 0xf;
2216 rd1 = (insn >> 0) & 0xf;
2217 gen_op_iwmmxt_movq_M0_wRn(rd0);
2218 tmp = tcg_const_i32((insn >> 20) & 3);
2219 iwmmxt_load_reg(cpu_V1, rd1);
2220 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2222 gen_op_iwmmxt_movq_wRn_M0(wrd);
2223 gen_op_iwmmxt_set_mup();
2225 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2226 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2227 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2228 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2229 wrd = (insn >> 12) & 0xf;
2230 rd0 = (insn >> 16) & 0xf;
2231 rd1 = (insn >> 0) & 0xf;
2232 gen_op_iwmmxt_movq_M0_wRn(rd0);
2233 switch ((insn >> 20) & 0xf) {
2235 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2238 gen_op_iwmmxt_subub_M0_wRn(rd1);
2241 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2244 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2247 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2250 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2253 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2256 gen_op_iwmmxt_subul_M0_wRn(rd1);
2259 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2264 gen_op_iwmmxt_movq_wRn_M0(wrd);
2265 gen_op_iwmmxt_set_mup();
2266 gen_op_iwmmxt_set_cup();
2268 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2269 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2270 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2271 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2272 wrd = (insn >> 12) & 0xf;
2273 rd0 = (insn >> 16) & 0xf;
2274 gen_op_iwmmxt_movq_M0_wRn(rd0);
2275 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2276 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2278 gen_op_iwmmxt_movq_wRn_M0(wrd);
2279 gen_op_iwmmxt_set_mup();
2280 gen_op_iwmmxt_set_cup();
2282 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2283 case 0x418: case 0x518: case 0x618: case 0x718:
2284 case 0x818: case 0x918: case 0xa18: case 0xb18:
2285 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2286 wrd = (insn >> 12) & 0xf;
2287 rd0 = (insn >> 16) & 0xf;
2288 rd1 = (insn >> 0) & 0xf;
2289 gen_op_iwmmxt_movq_M0_wRn(rd0);
2290 switch ((insn >> 20) & 0xf) {
2292 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2295 gen_op_iwmmxt_addub_M0_wRn(rd1);
2298 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2301 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2304 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2307 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2310 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2313 gen_op_iwmmxt_addul_M0_wRn(rd1);
2316 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2321 gen_op_iwmmxt_movq_wRn_M0(wrd);
2322 gen_op_iwmmxt_set_mup();
2323 gen_op_iwmmxt_set_cup();
2325 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2326 case 0x408: case 0x508: case 0x608: case 0x708:
2327 case 0x808: case 0x908: case 0xa08: case 0xb08:
2328 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2329 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2331 wrd = (insn >> 12) & 0xf;
2332 rd0 = (insn >> 16) & 0xf;
2333 rd1 = (insn >> 0) & 0xf;
2334 gen_op_iwmmxt_movq_M0_wRn(rd0);
2335 switch ((insn >> 22) & 3) {
2337 if (insn & (1 << 21))
2338 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2340 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2343 if (insn & (1 << 21))
2344 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2346 gen_op_iwmmxt_packul_M0_wRn(rd1);
2349 if (insn & (1 << 21))
2350 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2352 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2355 gen_op_iwmmxt_movq_wRn_M0(wrd);
2356 gen_op_iwmmxt_set_mup();
2357 gen_op_iwmmxt_set_cup();
2359 case 0x201: case 0x203: case 0x205: case 0x207:
2360 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2361 case 0x211: case 0x213: case 0x215: case 0x217:
2362 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2363 wrd = (insn >> 5) & 0xf;
2364 rd0 = (insn >> 12) & 0xf;
2365 rd1 = (insn >> 0) & 0xf;
2366 if (rd0 == 0xf || rd1 == 0xf)
2368 gen_op_iwmmxt_movq_M0_wRn(wrd);
2369 tmp = load_reg(s, rd0);
2370 tmp2 = load_reg(s, rd1);
2371 switch ((insn >> 16) & 0xf) {
2372 case 0x0: /* TMIA */
2373 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2375 case 0x8: /* TMIAPH */
2376 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2378 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2379 if (insn & (1 << 16))
2380 tcg_gen_shri_i32(tmp, tmp, 16);
2381 if (insn & (1 << 17))
2382 tcg_gen_shri_i32(tmp2, tmp2, 16);
2383 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2386 tcg_temp_free_i32(tmp2);
2387 tcg_temp_free_i32(tmp);
2390 tcg_temp_free_i32(tmp2);
2391 tcg_temp_free_i32(tmp);
2392 gen_op_iwmmxt_movq_wRn_M0(wrd);
2393 gen_op_iwmmxt_set_mup();
2402 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2403 (ie. an undefined instruction). */
2404 static int disas_dsp_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
2406 int acc, rd0, rd1, rdhi, rdlo;
2409 if ((insn & 0x0ff00f10) == 0x0e200010) {
2410 /* Multiply with Internal Accumulate Format */
2411 rd0 = (insn >> 12) & 0xf;
2413 acc = (insn >> 5) & 7;
2418 tmp = load_reg(s, rd0);
2419 tmp2 = load_reg(s, rd1);
2420 switch ((insn >> 16) & 0xf) {
2422 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2424 case 0x8: /* MIAPH */
2425 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2427 case 0xc: /* MIABB */
2428 case 0xd: /* MIABT */
2429 case 0xe: /* MIATB */
2430 case 0xf: /* MIATT */
2431 if (insn & (1 << 16))
2432 tcg_gen_shri_i32(tmp, tmp, 16);
2433 if (insn & (1 << 17))
2434 tcg_gen_shri_i32(tmp2, tmp2, 16);
2435 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2440 tcg_temp_free_i32(tmp2);
2441 tcg_temp_free_i32(tmp);
2443 gen_op_iwmmxt_movq_wRn_M0(acc);
2447 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2448 /* Internal Accumulator Access Format */
2449 rdhi = (insn >> 16) & 0xf;
2450 rdlo = (insn >> 12) & 0xf;
2456 if (insn & ARM_CP_RW_BIT) { /* MRA */
2457 iwmmxt_load_reg(cpu_V0, acc);
2458 tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
2459 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2460 tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
2461 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2463 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2464 iwmmxt_store_reg(cpu_V0, acc);
2472 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2473 #define VFP_SREG(insn, bigbit, smallbit) \
2474 ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2475 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2476 if (arm_feature(env, ARM_FEATURE_VFP3)) { \
2477 reg = (((insn) >> (bigbit)) & 0x0f) \
2478 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2480 if (insn & (1 << (smallbit))) \
2482 reg = ((insn) >> (bigbit)) & 0x0f; \
2485 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2486 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2487 #define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
2488 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2489 #define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
2490 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2492 /* Move between integer and VFP cores. */
2493 static TCGv gen_vfp_mrs(void)
2495 TCGv tmp = tcg_temp_new_i32();
2496 tcg_gen_mov_i32(tmp, cpu_F0s);
2500 static void gen_vfp_msr(TCGv tmp)
2502 tcg_gen_mov_i32(cpu_F0s, tmp);
2503 tcg_temp_free_i32(tmp);
2506 static void gen_neon_dup_u8(TCGv var, int shift)
2508 TCGv tmp = tcg_temp_new_i32();
2510 tcg_gen_shri_i32(var, var, shift);
2511 tcg_gen_ext8u_i32(var, var);
2512 tcg_gen_shli_i32(tmp, var, 8);
2513 tcg_gen_or_i32(var, var, tmp);
2514 tcg_gen_shli_i32(tmp, var, 16);
2515 tcg_gen_or_i32(var, var, tmp);
2516 tcg_temp_free_i32(tmp);
2519 static void gen_neon_dup_low16(TCGv var)
2521 TCGv tmp = tcg_temp_new_i32();
2522 tcg_gen_ext16u_i32(var, var);
2523 tcg_gen_shli_i32(tmp, var, 16);
2524 tcg_gen_or_i32(var, var, tmp);
2525 tcg_temp_free_i32(tmp);
2528 static void gen_neon_dup_high16(TCGv var)
2530 TCGv tmp = tcg_temp_new_i32();
2531 tcg_gen_andi_i32(var, var, 0xffff0000);
2532 tcg_gen_shri_i32(tmp, var, 16);
2533 tcg_gen_or_i32(var, var, tmp);
2534 tcg_temp_free_i32(tmp);
2537 static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size)
2539 /* Load a single Neon element and replicate into a 32 bit TCG reg */
2543 tmp = gen_ld8u(addr, IS_USER(s));
2544 gen_neon_dup_u8(tmp, 0);
2547 tmp = gen_ld16u(addr, IS_USER(s));
2548 gen_neon_dup_low16(tmp);
2551 tmp = gen_ld32(addr, IS_USER(s));
2553 default: /* Avoid compiler warnings. */
2559 /* Disassemble a VFP instruction. Returns nonzero if an error occurred
2560 (ie. an undefined instruction). */
2561 static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
2563 uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
2569 if (!arm_feature(env, ARM_FEATURE_VFP))
2572 if (!s->vfp_enabled) {
2573 /* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */
2574 if ((insn & 0x0fe00fff) != 0x0ee00a10)
2576 rn = (insn >> 16) & 0xf;
2577 if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
2578 && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
2581 dp = ((insn & 0xf00) == 0xb00);
2582 switch ((insn >> 24) & 0xf) {
2584 if (insn & (1 << 4)) {
2585 /* single register transfer */
2586 rd = (insn >> 12) & 0xf;
2591 VFP_DREG_N(rn, insn);
2594 if (insn & 0x00c00060
2595 && !arm_feature(env, ARM_FEATURE_NEON))
2598 pass = (insn >> 21) & 1;
2599 if (insn & (1 << 22)) {
2601 offset = ((insn >> 5) & 3) * 8;
2602 } else if (insn & (1 << 5)) {
2604 offset = (insn & (1 << 6)) ? 16 : 0;
2609 if (insn & ARM_CP_RW_BIT) {
2611 tmp = neon_load_reg(rn, pass);
2615 tcg_gen_shri_i32(tmp, tmp, offset);
2616 if (insn & (1 << 23))
2622 if (insn & (1 << 23)) {
2624 tcg_gen_shri_i32(tmp, tmp, 16);
2630 tcg_gen_sari_i32(tmp, tmp, 16);
2639 store_reg(s, rd, tmp);
2642 tmp = load_reg(s, rd);
2643 if (insn & (1 << 23)) {
2646 gen_neon_dup_u8(tmp, 0);
2647 } else if (size == 1) {
2648 gen_neon_dup_low16(tmp);
2650 for (n = 0; n <= pass * 2; n++) {
2651 tmp2 = tcg_temp_new_i32();
2652 tcg_gen_mov_i32(tmp2, tmp);
2653 neon_store_reg(rn, n, tmp2);
2655 neon_store_reg(rn, n, tmp);
2660 tmp2 = neon_load_reg(rn, pass);
2661 tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
2662 tcg_temp_free_i32(tmp2);
2665 tmp2 = neon_load_reg(rn, pass);
2666 tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
2667 tcg_temp_free_i32(tmp2);
2672 neon_store_reg(rn, pass, tmp);
2676 if ((insn & 0x6f) != 0x00)
2678 rn = VFP_SREG_N(insn);
2679 if (insn & ARM_CP_RW_BIT) {
2681 if (insn & (1 << 21)) {
2682 /* system register */
2687 /* VFP2 allows access to FSID from userspace.
2688 VFP3 restricts all id registers to privileged
2691 && arm_feature(env, ARM_FEATURE_VFP3))
2693 tmp = load_cpu_field(vfp.xregs[rn]);
2698 tmp = load_cpu_field(vfp.xregs[rn]);
2700 case ARM_VFP_FPINST:
2701 case ARM_VFP_FPINST2:
2702 /* Not present in VFP3. */
2704 || arm_feature(env, ARM_FEATURE_VFP3))
2706 tmp = load_cpu_field(vfp.xregs[rn]);
2710 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
2711 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
2713 tmp = tcg_temp_new_i32();
2714 gen_helper_vfp_get_fpscr(tmp, cpu_env);
2720 || !arm_feature(env, ARM_FEATURE_MVFR))
2722 tmp = load_cpu_field(vfp.xregs[rn]);
2728 gen_mov_F0_vreg(0, rn);
2729 tmp = gen_vfp_mrs();
2732 /* Set the 4 flag bits in the CPSR. */
2734 tcg_temp_free_i32(tmp);
2736 store_reg(s, rd, tmp);
2740 tmp = load_reg(s, rd);
2741 if (insn & (1 << 21)) {
2743 /* system register */
2748 /* Writes are ignored. */
2751 gen_helper_vfp_set_fpscr(cpu_env, tmp);
2752 tcg_temp_free_i32(tmp);
2758 /* TODO: VFP subarchitecture support.
2759 * For now, keep the EN bit only */
2760 tcg_gen_andi_i32(tmp, tmp, 1 << 30);
2761 store_cpu_field(tmp, vfp.xregs[rn]);
2764 case ARM_VFP_FPINST:
2765 case ARM_VFP_FPINST2:
2766 store_cpu_field(tmp, vfp.xregs[rn]);
2773 gen_mov_vreg_F0(0, rn);
2778 /* data processing */
2779 /* The opcode is in bits 23, 21, 20 and 6. */
2780 op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
2784 rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
2786 /* rn is register number */
2787 VFP_DREG_N(rn, insn);
2790 if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) {
2791 /* Integer or single precision destination. */
2792 rd = VFP_SREG_D(insn);
2794 VFP_DREG_D(rd, insn);
2797 (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) {
2798 /* VCVT from int is always from S reg regardless of dp bit.
2799 * VCVT with immediate frac_bits has same format as SREG_M
2801 rm = VFP_SREG_M(insn);
2803 VFP_DREG_M(rm, insn);
2806 rn = VFP_SREG_N(insn);
2807 if (op == 15 && rn == 15) {
2808 /* Double precision destination. */
2809 VFP_DREG_D(rd, insn);
2811 rd = VFP_SREG_D(insn);
2813 /* NB that we implicitly rely on the encoding for the frac_bits
2814 * in VCVT of fixed to float being the same as that of an SREG_M
2816 rm = VFP_SREG_M(insn);
2819 veclen = s->vec_len;
2820 if (op == 15 && rn > 3)
2823 /* Shut up compiler warnings. */
2834 /* Figure out what type of vector operation this is. */
2835 if ((rd & bank_mask) == 0) {
2840 delta_d = (s->vec_stride >> 1) + 1;
2842 delta_d = s->vec_stride + 1;
2844 if ((rm & bank_mask) == 0) {
2845 /* mixed scalar/vector */
2854 /* Load the initial operands. */
2859 /* Integer source */
2860 gen_mov_F0_vreg(0, rm);
2865 gen_mov_F0_vreg(dp, rd);
2866 gen_mov_F1_vreg(dp, rm);
2870 /* Compare with zero */
2871 gen_mov_F0_vreg(dp, rd);
2882 /* Source and destination the same. */
2883 gen_mov_F0_vreg(dp, rd);
2889 /* VCVTB, VCVTT: only present with the halfprec extension,
2890 * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
2892 if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
2895 /* Otherwise fall through */
2897 /* One source operand. */
2898 gen_mov_F0_vreg(dp, rm);
2902 /* Two source operands. */
2903 gen_mov_F0_vreg(dp, rn);
2904 gen_mov_F1_vreg(dp, rm);
2908 /* Perform the calculation. */
2910 case 0: /* VMLA: fd + (fn * fm) */
2911 /* Note that order of inputs to the add matters for NaNs */
2913 gen_mov_F0_vreg(dp, rd);
2916 case 1: /* VMLS: fd + -(fn * fm) */
2919 gen_mov_F0_vreg(dp, rd);
2922 case 2: /* VNMLS: -fd + (fn * fm) */
2923 /* Note that it isn't valid to replace (-A + B) with (B - A)
2924 * or similar plausible looking simplifications
2925 * because this will give wrong results for NaNs.
2928 gen_mov_F0_vreg(dp, rd);
2932 case 3: /* VNMLA: -fd + -(fn * fm) */
2935 gen_mov_F0_vreg(dp, rd);
2939 case 4: /* mul: fn * fm */
2942 case 5: /* nmul: -(fn * fm) */
2946 case 6: /* add: fn + fm */
2949 case 7: /* sub: fn - fm */
2952 case 8: /* div: fn / fm */
2955 case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
2956 case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
2957 case 12: /* VFMA : fd = muladd( fd, fn, fm) */
2958 case 13: /* VFMS : fd = muladd( fd, -fn, fm) */
2959 /* These are fused multiply-add, and must be done as one
2960 * floating point operation with no rounding between the
2961 * multiplication and addition steps.
2962 * NB that doing the negations here as separate steps is
2963 * correct : an input NaN should come out with its sign bit
2964 * flipped if it is a negated-input.
2966 if (!arm_feature(env, ARM_FEATURE_VFP4)) {
2974 gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
2976 frd = tcg_temp_new_i64();
2977 tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
2980 gen_helper_vfp_negd(frd, frd);
2982 fpst = get_fpstatus_ptr(0);
2983 gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
2984 cpu_F1d, frd, fpst);
2985 tcg_temp_free_ptr(fpst);
2986 tcg_temp_free_i64(frd);
2992 gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
2994 frd = tcg_temp_new_i32();
2995 tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
2997 gen_helper_vfp_negs(frd, frd);
2999 fpst = get_fpstatus_ptr(0);
3000 gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
3001 cpu_F1s, frd, fpst);
3002 tcg_temp_free_ptr(fpst);
3003 tcg_temp_free_i32(frd);
3006 case 14: /* fconst */
3007 if (!arm_feature(env, ARM_FEATURE_VFP3))
3010 n = (insn << 12) & 0x80000000;
3011 i = ((insn >> 12) & 0x70) | (insn & 0xf);
3018 tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3025 tcg_gen_movi_i32(cpu_F0s, n);
3028 case 15: /* extension space */
3042 case 4: /* vcvtb.f32.f16 */
3043 tmp = gen_vfp_mrs();
3044 tcg_gen_ext16u_i32(tmp, tmp);
3045 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
3046 tcg_temp_free_i32(tmp);
3048 case 5: /* vcvtt.f32.f16 */
3049 tmp = gen_vfp_mrs();
3050 tcg_gen_shri_i32(tmp, tmp, 16);
3051 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
3052 tcg_temp_free_i32(tmp);
3054 case 6: /* vcvtb.f16.f32 */
3055 tmp = tcg_temp_new_i32();
3056 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
3057 gen_mov_F0_vreg(0, rd);
3058 tmp2 = gen_vfp_mrs();
3059 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3060 tcg_gen_or_i32(tmp, tmp, tmp2);
3061 tcg_temp_free_i32(tmp2);
3064 case 7: /* vcvtt.f16.f32 */
3065 tmp = tcg_temp_new_i32();
3066 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
3067 tcg_gen_shli_i32(tmp, tmp, 16);
3068 gen_mov_F0_vreg(0, rd);
3069 tmp2 = gen_vfp_mrs();
3070 tcg_gen_ext16u_i32(tmp2, tmp2);
3071 tcg_gen_or_i32(tmp, tmp, tmp2);
3072 tcg_temp_free_i32(tmp2);
3084 case 11: /* cmpez */
3088 case 15: /* single<->double conversion */
3090 gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3092 gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3094 case 16: /* fuito */
3095 gen_vfp_uito(dp, 0);
3097 case 17: /* fsito */
3098 gen_vfp_sito(dp, 0);
3100 case 20: /* fshto */
3101 if (!arm_feature(env, ARM_FEATURE_VFP3))
3103 gen_vfp_shto(dp, 16 - rm, 0);
3105 case 21: /* fslto */
3106 if (!arm_feature(env, ARM_FEATURE_VFP3))
3108 gen_vfp_slto(dp, 32 - rm, 0);
3110 case 22: /* fuhto */
3111 if (!arm_feature(env, ARM_FEATURE_VFP3))
3113 gen_vfp_uhto(dp, 16 - rm, 0);
3115 case 23: /* fulto */
3116 if (!arm_feature(env, ARM_FEATURE_VFP3))
3118 gen_vfp_ulto(dp, 32 - rm, 0);
3120 case 24: /* ftoui */
3121 gen_vfp_toui(dp, 0);
3123 case 25: /* ftouiz */
3124 gen_vfp_touiz(dp, 0);
3126 case 26: /* ftosi */
3127 gen_vfp_tosi(dp, 0);
3129 case 27: /* ftosiz */
3130 gen_vfp_tosiz(dp, 0);
3132 case 28: /* ftosh */
3133 if (!arm_feature(env, ARM_FEATURE_VFP3))
3135 gen_vfp_tosh(dp, 16 - rm, 0);
3137 case 29: /* ftosl */
3138 if (!arm_feature(env, ARM_FEATURE_VFP3))
3140 gen_vfp_tosl(dp, 32 - rm, 0);
3142 case 30: /* ftouh */
3143 if (!arm_feature(env, ARM_FEATURE_VFP3))
3145 gen_vfp_touh(dp, 16 - rm, 0);
3147 case 31: /* ftoul */
3148 if (!arm_feature(env, ARM_FEATURE_VFP3))
3150 gen_vfp_toul(dp, 32 - rm, 0);
3152 default: /* undefined */
3156 default: /* undefined */
3160 /* Write back the result. */
3161 if (op == 15 && (rn >= 8 && rn <= 11))
3162 ; /* Comparison, do nothing. */
3163 else if (op == 15 && dp && ((rn & 0x1c) == 0x18))
3164 /* VCVT double to int: always integer result. */
3165 gen_mov_vreg_F0(0, rd);
3166 else if (op == 15 && rn == 15)
3168 gen_mov_vreg_F0(!dp, rd);
3170 gen_mov_vreg_F0(dp, rd);
3172 /* break out of the loop if we have finished */
3176 if (op == 15 && delta_m == 0) {
3177 /* single source one-many */
3179 rd = ((rd + delta_d) & (bank_mask - 1))
3181 gen_mov_vreg_F0(dp, rd);
3185 /* Setup the next operands. */
3187 rd = ((rd + delta_d) & (bank_mask - 1))
3191 /* One source operand. */
3192 rm = ((rm + delta_m) & (bank_mask - 1))
3194 gen_mov_F0_vreg(dp, rm);
3196 /* Two source operands. */
3197 rn = ((rn + delta_d) & (bank_mask - 1))
3199 gen_mov_F0_vreg(dp, rn);
3201 rm = ((rm + delta_m) & (bank_mask - 1))
3203 gen_mov_F1_vreg(dp, rm);
3211 if ((insn & 0x03e00000) == 0x00400000) {
3212 /* two-register transfer */
3213 rn = (insn >> 16) & 0xf;
3214 rd = (insn >> 12) & 0xf;
3216 VFP_DREG_M(rm, insn);
3218 rm = VFP_SREG_M(insn);
3221 if (insn & ARM_CP_RW_BIT) {
3224 gen_mov_F0_vreg(0, rm * 2);
3225 tmp = gen_vfp_mrs();
3226 store_reg(s, rd, tmp);
3227 gen_mov_F0_vreg(0, rm * 2 + 1);
3228 tmp = gen_vfp_mrs();
3229 store_reg(s, rn, tmp);
3231 gen_mov_F0_vreg(0, rm);
3232 tmp = gen_vfp_mrs();
3233 store_reg(s, rd, tmp);
3234 gen_mov_F0_vreg(0, rm + 1);
3235 tmp = gen_vfp_mrs();
3236 store_reg(s, rn, tmp);
3241 tmp = load_reg(s, rd);
3243 gen_mov_vreg_F0(0, rm * 2);
3244 tmp = load_reg(s, rn);
3246 gen_mov_vreg_F0(0, rm * 2 + 1);
3248 tmp = load_reg(s, rd);
3250 gen_mov_vreg_F0(0, rm);
3251 tmp = load_reg(s, rn);
3253 gen_mov_vreg_F0(0, rm + 1);
3258 rn = (insn >> 16) & 0xf;
3260 VFP_DREG_D(rd, insn);
3262 rd = VFP_SREG_D(insn);
3263 if ((insn & 0x01200000) == 0x01000000) {
3264 /* Single load/store */
3265 offset = (insn & 0xff) << 2;
3266 if ((insn & (1 << 23)) == 0)
3268 if (s->thumb && rn == 15) {
3269 /* This is actually UNPREDICTABLE */
3270 addr = tcg_temp_new_i32();
3271 tcg_gen_movi_i32(addr, s->pc & ~2);
3273 addr = load_reg(s, rn);
3275 tcg_gen_addi_i32(addr, addr, offset);
3276 if (insn & (1 << 20)) {
3277 gen_vfp_ld(s, dp, addr);
3278 gen_mov_vreg_F0(dp, rd);
3280 gen_mov_F0_vreg(dp, rd);
3281 gen_vfp_st(s, dp, addr);
3283 tcg_temp_free_i32(addr);
3285 /* load/store multiple */
3286 int w = insn & (1 << 21);
3288 n = (insn >> 1) & 0x7f;
3292 if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
3293 /* P == U , W == 1 => UNDEF */
3296 if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
3297 /* UNPREDICTABLE cases for bad immediates: we choose to
3298 * UNDEF to avoid generating huge numbers of TCG ops
3302 if (rn == 15 && w) {
3303 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
3307 if (s->thumb && rn == 15) {
3308 /* This is actually UNPREDICTABLE */
3309 addr = tcg_temp_new_i32();
3310 tcg_gen_movi_i32(addr, s->pc & ~2);
3312 addr = load_reg(s, rn);
3314 if (insn & (1 << 24)) /* pre-decrement */
3315 tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
3321 for (i = 0; i < n; i++) {
3322 if (insn & ARM_CP_RW_BIT) {
3324 gen_vfp_ld(s, dp, addr);
3325 gen_mov_vreg_F0(dp, rd + i);
3328 gen_mov_F0_vreg(dp, rd + i);
3329 gen_vfp_st(s, dp, addr);
3331 tcg_gen_addi_i32(addr, addr, offset);
3335 if (insn & (1 << 24))
3336 offset = -offset * n;
3337 else if (dp && (insn & 1))
3343 tcg_gen_addi_i32(addr, addr, offset);
3344 store_reg(s, rn, addr);
3346 tcg_temp_free_i32(addr);
3352 /* Should never happen. */
3358 static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
3360 TranslationBlock *tb;
3363 if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3365 gen_set_pc_im(dest);
3366 tcg_gen_exit_tb((tcg_target_long)tb + n);
3368 gen_set_pc_im(dest);
3373 static inline void gen_jmp (DisasContext *s, uint32_t dest)
3375 if (unlikely(s->singlestep_enabled)) {
3376 /* An indirect jump so that we still trigger the debug exception. */
3381 gen_goto_tb(s, 0, dest);
3382 s->is_jmp = DISAS_TB_JUMP;
3386 static inline void gen_mulxy(TCGv t0, TCGv t1, int x, int y)
3389 tcg_gen_sari_i32(t0, t0, 16);
3393 tcg_gen_sari_i32(t1, t1, 16);
3396 tcg_gen_mul_i32(t0, t0, t1);
3399 /* Return the mask of PSR bits set by a MSR instruction. */
3400 static uint32_t msr_mask(CPUARMState *env, DisasContext *s, int flags, int spsr) {
3404 if (flags & (1 << 0))
3406 if (flags & (1 << 1))
3408 if (flags & (1 << 2))
3410 if (flags & (1 << 3))
3413 /* Mask out undefined bits. */
3414 mask &= ~CPSR_RESERVED;
3415 if (!arm_feature(env, ARM_FEATURE_V4T))
3417 if (!arm_feature(env, ARM_FEATURE_V5))
3418 mask &= ~CPSR_Q; /* V5TE in reality*/
3419 if (!arm_feature(env, ARM_FEATURE_V6))
3420 mask &= ~(CPSR_E | CPSR_GE);
3421 if (!arm_feature(env, ARM_FEATURE_THUMB2))
3423 /* Mask out execution state bits. */
3426 /* Mask out privileged bits. */
3432 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
3433 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0)
3437 /* ??? This is also undefined in system mode. */
3441 tmp = load_cpu_field(spsr);
3442 tcg_gen_andi_i32(tmp, tmp, ~mask);
3443 tcg_gen_andi_i32(t0, t0, mask);
3444 tcg_gen_or_i32(tmp, tmp, t0);
3445 store_cpu_field(tmp, spsr);
3447 gen_set_cpsr(t0, mask);
3449 tcg_temp_free_i32(t0);
3454 /* Returns nonzero if access to the PSR is not permitted. */
3455 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
3458 tmp = tcg_temp_new_i32();
3459 tcg_gen_movi_i32(tmp, val);
3460 return gen_set_psr(s, mask, spsr, tmp);
3463 /* Generate an old-style exception return. Marks pc as dead. */
3464 static void gen_exception_return(DisasContext *s, TCGv pc)
3467 store_reg(s, 15, pc);
3468 tmp = load_cpu_field(spsr);
3469 gen_set_cpsr(tmp, 0xffffffff);
3470 tcg_temp_free_i32(tmp);
3471 s->is_jmp = DISAS_UPDATE;
3474 /* Generate a v6 exception return. Marks both values as dead. */
3475 static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
3477 gen_set_cpsr(cpsr, 0xffffffff);
3478 tcg_temp_free_i32(cpsr);
3479 store_reg(s, 15, pc);
3480 s->is_jmp = DISAS_UPDATE;
3484 gen_set_condexec (DisasContext *s)
3486 if (s->condexec_mask) {
3487 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
3488 TCGv tmp = tcg_temp_new_i32();
3489 tcg_gen_movi_i32(tmp, val);
3490 store_cpu_field(tmp, condexec_bits);
3494 static void gen_exception_insn(DisasContext *s, int offset, int excp)
3496 gen_set_condexec(s);
3497 gen_set_pc_im(s->pc - offset);
3498 gen_exception(excp);
3499 s->is_jmp = DISAS_JUMP;
3502 static void gen_nop_hint(DisasContext *s, int val)
3506 gen_set_pc_im(s->pc);
3507 s->is_jmp = DISAS_WFI;
3511 /* TODO: Implement SEV and WFE. May help SMP performance. */
3517 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3519 static inline void gen_neon_add(int size, TCGv t0, TCGv t1)
3522 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
3523 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
3524 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3529 static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1)
3532 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3533 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3534 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3539 /* 32-bit pairwise ops end up the same as the elementwise versions. */
3540 #define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
3541 #define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
3542 #define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
3543 #define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
3545 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3546 switch ((size << 1) | u) { \
3548 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3551 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3554 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3557 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3560 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3563 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3565 default: return 1; \
3568 #define GEN_NEON_INTEGER_OP(name) do { \
3569 switch ((size << 1) | u) { \
3571 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3574 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3577 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3580 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3583 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3586 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3588 default: return 1; \
3591 static TCGv neon_load_scratch(int scratch)
3593 TCGv tmp = tcg_temp_new_i32();
3594 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3598 static void neon_store_scratch(int scratch, TCGv var)
3600 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3601 tcg_temp_free_i32(var);
3604 static inline TCGv neon_get_scalar(int size, int reg)
3608 tmp = neon_load_reg(reg & 7, reg >> 4);
3610 gen_neon_dup_high16(tmp);
3612 gen_neon_dup_low16(tmp);
3615 tmp = neon_load_reg(reg & 15, reg >> 4);
3620 static int gen_neon_unzip(int rd, int rm, int size, int q)
3623 if (!q && size == 2) {
3626 tmp = tcg_const_i32(rd);
3627 tmp2 = tcg_const_i32(rm);
3631 gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
3634 gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
3637 gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
3645 gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
3648 gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
3654 tcg_temp_free_i32(tmp);
3655 tcg_temp_free_i32(tmp2);
3659 static int gen_neon_zip(int rd, int rm, int size, int q)
3662 if (!q && size == 2) {
3665 tmp = tcg_const_i32(rd);
3666 tmp2 = tcg_const_i32(rm);
3670 gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
3673 gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
3676 gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
3684 gen_helper_neon_zip8(cpu_env, tmp, tmp2);
3687 gen_helper_neon_zip16(cpu_env, tmp, tmp2);
3693 tcg_temp_free_i32(tmp);
3694 tcg_temp_free_i32(tmp2);
3698 static void gen_neon_trn_u8(TCGv t0, TCGv t1)
3702 rd = tcg_temp_new_i32();
3703 tmp = tcg_temp_new_i32();
3705 tcg_gen_shli_i32(rd, t0, 8);
3706 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3707 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3708 tcg_gen_or_i32(rd, rd, tmp);
3710 tcg_gen_shri_i32(t1, t1, 8);
3711 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3712 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3713 tcg_gen_or_i32(t1, t1, tmp);
3714 tcg_gen_mov_i32(t0, rd);
3716 tcg_temp_free_i32(tmp);
3717 tcg_temp_free_i32(rd);
3720 static void gen_neon_trn_u16(TCGv t0, TCGv t1)
3724 rd = tcg_temp_new_i32();
3725 tmp = tcg_temp_new_i32();
3727 tcg_gen_shli_i32(rd, t0, 16);
3728 tcg_gen_andi_i32(tmp, t1, 0xffff);
3729 tcg_gen_or_i32(rd, rd, tmp);
3730 tcg_gen_shri_i32(t1, t1, 16);
3731 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3732 tcg_gen_or_i32(t1, t1, tmp);
3733 tcg_gen_mov_i32(t0, rd);
3735 tcg_temp_free_i32(tmp);
3736 tcg_temp_free_i32(rd);
3744 } neon_ls_element_type[11] = {
3758 /* Translate a NEON load/store element instruction. Return nonzero if the
3759 instruction is invalid. */
3760 static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
3779 if (!s->vfp_enabled)
3781 VFP_DREG_D(rd, insn);
3782 rn = (insn >> 16) & 0xf;
3784 load = (insn & (1 << 21)) != 0;
3785 if ((insn & (1 << 23)) == 0) {
3786 /* Load store all elements. */
3787 op = (insn >> 8) & 0xf;
3788 size = (insn >> 6) & 3;
3791 /* Catch UNDEF cases for bad values of align field */
3794 if (((insn >> 5) & 1) == 1) {
3799 if (((insn >> 4) & 3) == 3) {
3806 nregs = neon_ls_element_type[op].nregs;
3807 interleave = neon_ls_element_type[op].interleave;
3808 spacing = neon_ls_element_type[op].spacing;
3809 if (size == 3 && (interleave | spacing) != 1)
3811 addr = tcg_temp_new_i32();
3812 load_reg_var(s, addr, rn);
3813 stride = (1 << size) * interleave;
3814 for (reg = 0; reg < nregs; reg++) {
3815 if (interleave > 2 || (interleave == 2 && nregs == 2)) {
3816 load_reg_var(s, addr, rn);
3817 tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
3818 } else if (interleave == 2 && nregs == 4 && reg == 2) {
3819 load_reg_var(s, addr, rn);
3820 tcg_gen_addi_i32(addr, addr, 1 << size);
3824 tmp64 = gen_ld64(addr, IS_USER(s));
3825 neon_store_reg64(tmp64, rd);
3826 tcg_temp_free_i64(tmp64);
3828 tmp64 = tcg_temp_new_i64();
3829 neon_load_reg64(tmp64, rd);
3830 gen_st64(tmp64, addr, IS_USER(s));
3832 tcg_gen_addi_i32(addr, addr, stride);
3834 for (pass = 0; pass < 2; pass++) {
3837 tmp = gen_ld32(addr, IS_USER(s));
3838 neon_store_reg(rd, pass, tmp);
3840 tmp = neon_load_reg(rd, pass);
3841 gen_st32(tmp, addr, IS_USER(s));
3843 tcg_gen_addi_i32(addr, addr, stride);
3844 } else if (size == 1) {
3846 tmp = gen_ld16u(addr, IS_USER(s));
3847 tcg_gen_addi_i32(addr, addr, stride);
3848 tmp2 = gen_ld16u(addr, IS_USER(s));
3849 tcg_gen_addi_i32(addr, addr, stride);
3850 tcg_gen_shli_i32(tmp2, tmp2, 16);
3851 tcg_gen_or_i32(tmp, tmp, tmp2);
3852 tcg_temp_free_i32(tmp2);
3853 neon_store_reg(rd, pass, tmp);
3855 tmp = neon_load_reg(rd, pass);
3856 tmp2 = tcg_temp_new_i32();
3857 tcg_gen_shri_i32(tmp2, tmp, 16);
3858 gen_st16(tmp, addr, IS_USER(s));
3859 tcg_gen_addi_i32(addr, addr, stride);
3860 gen_st16(tmp2, addr, IS_USER(s));
3861 tcg_gen_addi_i32(addr, addr, stride);
3863 } else /* size == 0 */ {
3866 for (n = 0; n < 4; n++) {
3867 tmp = gen_ld8u(addr, IS_USER(s));
3868 tcg_gen_addi_i32(addr, addr, stride);
3872 tcg_gen_shli_i32(tmp, tmp, n * 8);
3873 tcg_gen_or_i32(tmp2, tmp2, tmp);
3874 tcg_temp_free_i32(tmp);
3877 neon_store_reg(rd, pass, tmp2);
3879 tmp2 = neon_load_reg(rd, pass);
3880 for (n = 0; n < 4; n++) {
3881 tmp = tcg_temp_new_i32();
3883 tcg_gen_mov_i32(tmp, tmp2);
3885 tcg_gen_shri_i32(tmp, tmp2, n * 8);
3887 gen_st8(tmp, addr, IS_USER(s));
3888 tcg_gen_addi_i32(addr, addr, stride);
3890 tcg_temp_free_i32(tmp2);
3897 tcg_temp_free_i32(addr);
3900 size = (insn >> 10) & 3;
3902 /* Load single element to all lanes. */
3903 int a = (insn >> 4) & 1;
3907 size = (insn >> 6) & 3;
3908 nregs = ((insn >> 8) & 3) + 1;
3911 if (nregs != 4 || a == 0) {
3914 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
3917 if (nregs == 1 && a == 1 && size == 0) {
3920 if (nregs == 3 && a == 1) {
3923 addr = tcg_temp_new_i32();
3924 load_reg_var(s, addr, rn);
3926 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
3927 tmp = gen_load_and_replicate(s, addr, size);
3928 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
3929 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
3930 if (insn & (1 << 5)) {
3931 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
3932 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
3934 tcg_temp_free_i32(tmp);
3936 /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
3937 stride = (insn & (1 << 5)) ? 2 : 1;
3938 for (reg = 0; reg < nregs; reg++) {
3939 tmp = gen_load_and_replicate(s, addr, size);
3940 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
3941 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
3942 tcg_temp_free_i32(tmp);
3943 tcg_gen_addi_i32(addr, addr, 1 << size);
3947 tcg_temp_free_i32(addr);
3948 stride = (1 << size) * nregs;
3950 /* Single element. */
3951 int idx = (insn >> 4) & 0xf;
3952 pass = (insn >> 7) & 1;
3955 shift = ((insn >> 5) & 3) * 8;
3959 shift = ((insn >> 6) & 1) * 16;
3960 stride = (insn & (1 << 5)) ? 2 : 1;
3964 stride = (insn & (1 << 6)) ? 2 : 1;
3969 nregs = ((insn >> 8) & 3) + 1;
3970 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
3973 if (((idx & (1 << size)) != 0) ||
3974 (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
3979 if ((idx & 1) != 0) {
3984 if (size == 2 && (idx & 2) != 0) {
3989 if ((size == 2) && ((idx & 3) == 3)) {
3996 if ((rd + stride * (nregs - 1)) > 31) {
3997 /* Attempts to write off the end of the register file
3998 * are UNPREDICTABLE; we choose to UNDEF because otherwise
3999 * the neon_load_reg() would write off the end of the array.
4003 addr = tcg_temp_new_i32();
4004 load_reg_var(s, addr, rn);
4005 for (reg = 0; reg < nregs; reg++) {
4009 tmp = gen_ld8u(addr, IS_USER(s));
4012 tmp = gen_ld16u(addr, IS_USER(s));
4015 tmp = gen_ld32(addr, IS_USER(s));
4017 default: /* Avoid compiler warnings. */
4021 tmp2 = neon_load_reg(rd, pass);
4022 tcg_gen_deposit_i32(tmp, tmp2, tmp,
4023 shift, size ? 16 : 8);
4024 tcg_temp_free_i32(tmp2);
4026 neon_store_reg(rd, pass, tmp);
4027 } else { /* Store */
4028 tmp = neon_load_reg(rd, pass);
4030 tcg_gen_shri_i32(tmp, tmp, shift);
4033 gen_st8(tmp, addr, IS_USER(s));
4036 gen_st16(tmp, addr, IS_USER(s));
4039 gen_st32(tmp, addr, IS_USER(s));
4044 tcg_gen_addi_i32(addr, addr, 1 << size);
4046 tcg_temp_free_i32(addr);
4047 stride = nregs * (1 << size);
4053 base = load_reg(s, rn);
4055 tcg_gen_addi_i32(base, base, stride);
4058 index = load_reg(s, rm);
4059 tcg_gen_add_i32(base, base, index);
4060 tcg_temp_free_i32(index);
4062 store_reg(s, rn, base);
4067 /* Bitwise select. dest = c ? t : f. Clobbers T and F. */
4068 static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
4070 tcg_gen_and_i32(t, t, c);
4071 tcg_gen_andc_i32(f, f, c);
4072 tcg_gen_or_i32(dest, t, f);
4075 static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src)
4078 case 0: gen_helper_neon_narrow_u8(dest, src); break;
4079 case 1: gen_helper_neon_narrow_u16(dest, src); break;
4080 case 2: tcg_gen_trunc_i64_i32(dest, src); break;
4085 static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
4088 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
4089 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
4090 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
4095 static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
4098 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
4099 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
4100 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
4105 static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src)
4108 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
4109 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
4110 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
4115 static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
4121 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
4122 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
4127 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
4128 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
4135 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
4136 case 2: gen_helper_neon_shl_u32(var, var, shift); break;
4141 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
4142 case 2: gen_helper_neon_shl_s32(var, var, shift); break;
4149 static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
4153 case 0: gen_helper_neon_widen_u8(dest, src); break;
4154 case 1: gen_helper_neon_widen_u16(dest, src); break;
4155 case 2: tcg_gen_extu_i32_i64(dest, src); break;
4160 case 0: gen_helper_neon_widen_s8(dest, src); break;
4161 case 1: gen_helper_neon_widen_s16(dest, src); break;
4162 case 2: tcg_gen_ext_i32_i64(dest, src); break;
4166 tcg_temp_free_i32(src);
4169 static inline void gen_neon_addl(int size)
4172 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4173 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4174 case 2: tcg_gen_add_i64(CPU_V001); break;
4179 static inline void gen_neon_subl(int size)
4182 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4183 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4184 case 2: tcg_gen_sub_i64(CPU_V001); break;
4189 static inline void gen_neon_negl(TCGv_i64 var, int size)
4192 case 0: gen_helper_neon_negl_u16(var, var); break;
4193 case 1: gen_helper_neon_negl_u32(var, var); break;
4195 tcg_gen_neg_i64(var, var);
4201 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4204 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4205 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4210 static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
4214 switch ((size << 1) | u) {
4215 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4216 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4217 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4218 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4220 tmp = gen_muls_i64_i32(a, b);
4221 tcg_gen_mov_i64(dest, tmp);
4222 tcg_temp_free_i64(tmp);
4225 tmp = gen_mulu_i64_i32(a, b);
4226 tcg_gen_mov_i64(dest, tmp);
4227 tcg_temp_free_i64(tmp);
4232 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
4233 Don't forget to clean them now. */
4235 tcg_temp_free_i32(a);
4236 tcg_temp_free_i32(b);
4240 static void gen_neon_narrow_op(int op, int u, int size, TCGv dest, TCGv_i64 src)
4244 gen_neon_unarrow_sats(size, dest, src);
4246 gen_neon_narrow(size, dest, src);
4250 gen_neon_narrow_satu(size, dest, src);
4252 gen_neon_narrow_sats(size, dest, src);
4257 /* Symbolic constants for op fields for Neon 3-register same-length.
4258 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
4261 #define NEON_3R_VHADD 0
4262 #define NEON_3R_VQADD 1
4263 #define NEON_3R_VRHADD 2
4264 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
4265 #define NEON_3R_VHSUB 4
4266 #define NEON_3R_VQSUB 5
4267 #define NEON_3R_VCGT 6
4268 #define NEON_3R_VCGE 7
4269 #define NEON_3R_VSHL 8
4270 #define NEON_3R_VQSHL 9
4271 #define NEON_3R_VRSHL 10
4272 #define NEON_3R_VQRSHL 11
4273 #define NEON_3R_VMAX 12
4274 #define NEON_3R_VMIN 13
4275 #define NEON_3R_VABD 14
4276 #define NEON_3R_VABA 15
4277 #define NEON_3R_VADD_VSUB 16
4278 #define NEON_3R_VTST_VCEQ 17
4279 #define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
4280 #define NEON_3R_VMUL 19
4281 #define NEON_3R_VPMAX 20
4282 #define NEON_3R_VPMIN 21
4283 #define NEON_3R_VQDMULH_VQRDMULH 22
4284 #define NEON_3R_VPADD 23
4285 #define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
4286 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
4287 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
4288 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
4289 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
4290 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
4291 #define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */
4293 static const uint8_t neon_3r_sizes[] = {
4294 [NEON_3R_VHADD] = 0x7,
4295 [NEON_3R_VQADD] = 0xf,
4296 [NEON_3R_VRHADD] = 0x7,
4297 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
4298 [NEON_3R_VHSUB] = 0x7,
4299 [NEON_3R_VQSUB] = 0xf,
4300 [NEON_3R_VCGT] = 0x7,
4301 [NEON_3R_VCGE] = 0x7,
4302 [NEON_3R_VSHL] = 0xf,
4303 [NEON_3R_VQSHL] = 0xf,
4304 [NEON_3R_VRSHL] = 0xf,
4305 [NEON_3R_VQRSHL] = 0xf,
4306 [NEON_3R_VMAX] = 0x7,
4307 [NEON_3R_VMIN] = 0x7,
4308 [NEON_3R_VABD] = 0x7,
4309 [NEON_3R_VABA] = 0x7,
4310 [NEON_3R_VADD_VSUB] = 0xf,
4311 [NEON_3R_VTST_VCEQ] = 0x7,
4312 [NEON_3R_VML] = 0x7,
4313 [NEON_3R_VMUL] = 0x7,
4314 [NEON_3R_VPMAX] = 0x7,
4315 [NEON_3R_VPMIN] = 0x7,
4316 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
4317 [NEON_3R_VPADD] = 0x7,
4318 [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
4319 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
4320 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
4321 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
4322 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
4323 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
4324 [NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */
4327 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
4328 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
4331 #define NEON_2RM_VREV64 0
4332 #define NEON_2RM_VREV32 1
4333 #define NEON_2RM_VREV16 2
4334 #define NEON_2RM_VPADDL 4
4335 #define NEON_2RM_VPADDL_U 5
4336 #define NEON_2RM_VCLS 8
4337 #define NEON_2RM_VCLZ 9
4338 #define NEON_2RM_VCNT 10
4339 #define NEON_2RM_VMVN 11
4340 #define NEON_2RM_VPADAL 12
4341 #define NEON_2RM_VPADAL_U 13
4342 #define NEON_2RM_VQABS 14
4343 #define NEON_2RM_VQNEG 15
4344 #define NEON_2RM_VCGT0 16
4345 #define NEON_2RM_VCGE0 17
4346 #define NEON_2RM_VCEQ0 18
4347 #define NEON_2RM_VCLE0 19
4348 #define NEON_2RM_VCLT0 20
4349 #define NEON_2RM_VABS 22
4350 #define NEON_2RM_VNEG 23
4351 #define NEON_2RM_VCGT0_F 24
4352 #define NEON_2RM_VCGE0_F 25
4353 #define NEON_2RM_VCEQ0_F 26
4354 #define NEON_2RM_VCLE0_F 27
4355 #define NEON_2RM_VCLT0_F 28
4356 #define NEON_2RM_VABS_F 30
4357 #define NEON_2RM_VNEG_F 31
4358 #define NEON_2RM_VSWP 32
4359 #define NEON_2RM_VTRN 33
4360 #define NEON_2RM_VUZP 34
4361 #define NEON_2RM_VZIP 35
4362 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
4363 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
4364 #define NEON_2RM_VSHLL 38
4365 #define NEON_2RM_VCVT_F16_F32 44
4366 #define NEON_2RM_VCVT_F32_F16 46
4367 #define NEON_2RM_VRECPE 56
4368 #define NEON_2RM_VRSQRTE 57
4369 #define NEON_2RM_VRECPE_F 58
4370 #define NEON_2RM_VRSQRTE_F 59
4371 #define NEON_2RM_VCVT_FS 60
4372 #define NEON_2RM_VCVT_FU 61
4373 #define NEON_2RM_VCVT_SF 62
4374 #define NEON_2RM_VCVT_UF 63
4376 static int neon_2rm_is_float_op(int op)
4378 /* Return true if this neon 2reg-misc op is float-to-float */
4379 return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
4380 op >= NEON_2RM_VRECPE_F);
4383 /* Each entry in this array has bit n set if the insn allows
4384 * size value n (otherwise it will UNDEF). Since unallocated
4385 * op values will have no bits set they always UNDEF.
4387 static const uint8_t neon_2rm_sizes[] = {
4388 [NEON_2RM_VREV64] = 0x7,
4389 [NEON_2RM_VREV32] = 0x3,
4390 [NEON_2RM_VREV16] = 0x1,
4391 [NEON_2RM_VPADDL] = 0x7,
4392 [NEON_2RM_VPADDL_U] = 0x7,
4393 [NEON_2RM_VCLS] = 0x7,
4394 [NEON_2RM_VCLZ] = 0x7,
4395 [NEON_2RM_VCNT] = 0x1,
4396 [NEON_2RM_VMVN] = 0x1,
4397 [NEON_2RM_VPADAL] = 0x7,
4398 [NEON_2RM_VPADAL_U] = 0x7,
4399 [NEON_2RM_VQABS] = 0x7,
4400 [NEON_2RM_VQNEG] = 0x7,
4401 [NEON_2RM_VCGT0] = 0x7,
4402 [NEON_2RM_VCGE0] = 0x7,
4403 [NEON_2RM_VCEQ0] = 0x7,
4404 [NEON_2RM_VCLE0] = 0x7,
4405 [NEON_2RM_VCLT0] = 0x7,
4406 [NEON_2RM_VABS] = 0x7,
4407 [NEON_2RM_VNEG] = 0x7,
4408 [NEON_2RM_VCGT0_F] = 0x4,
4409 [NEON_2RM_VCGE0_F] = 0x4,
4410 [NEON_2RM_VCEQ0_F] = 0x4,
4411 [NEON_2RM_VCLE0_F] = 0x4,
4412 [NEON_2RM_VCLT0_F] = 0x4,
4413 [NEON_2RM_VABS_F] = 0x4,
4414 [NEON_2RM_VNEG_F] = 0x4,
4415 [NEON_2RM_VSWP] = 0x1,
4416 [NEON_2RM_VTRN] = 0x7,
4417 [NEON_2RM_VUZP] = 0x7,
4418 [NEON_2RM_VZIP] = 0x7,
4419 [NEON_2RM_VMOVN] = 0x7,
4420 [NEON_2RM_VQMOVN] = 0x7,
4421 [NEON_2RM_VSHLL] = 0x7,
4422 [NEON_2RM_VCVT_F16_F32] = 0x2,
4423 [NEON_2RM_VCVT_F32_F16] = 0x2,
4424 [NEON_2RM_VRECPE] = 0x4,
4425 [NEON_2RM_VRSQRTE] = 0x4,
4426 [NEON_2RM_VRECPE_F] = 0x4,
4427 [NEON_2RM_VRSQRTE_F] = 0x4,
4428 [NEON_2RM_VCVT_FS] = 0x4,
4429 [NEON_2RM_VCVT_FU] = 0x4,
4430 [NEON_2RM_VCVT_SF] = 0x4,
4431 [NEON_2RM_VCVT_UF] = 0x4,
4434 /* Translate a NEON data processing instruction. Return nonzero if the
4435 instruction is invalid.
4436 We process data in a mixture of 32-bit and 64-bit chunks.
4437 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
4439 static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
4451 TCGv tmp, tmp2, tmp3, tmp4, tmp5;
4454 if (!s->vfp_enabled)
4456 q = (insn & (1 << 6)) != 0;
4457 u = (insn >> 24) & 1;
4458 VFP_DREG_D(rd, insn);
4459 VFP_DREG_N(rn, insn);
4460 VFP_DREG_M(rm, insn);
4461 size = (insn >> 20) & 3;
4462 if ((insn & (1 << 23)) == 0) {
4463 /* Three register same length. */
4464 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4465 /* Catch invalid op and bad size combinations: UNDEF */
4466 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
4469 /* All insns of this form UNDEF for either this condition or the
4470 * superset of cases "Q==1"; we catch the latter later.
4472 if (q && ((rd | rn | rm) & 1)) {
4475 if (size == 3 && op != NEON_3R_LOGIC) {
4476 /* 64-bit element instructions. */
4477 for (pass = 0; pass < (q ? 2 : 1); pass++) {
4478 neon_load_reg64(cpu_V0, rn + pass);
4479 neon_load_reg64(cpu_V1, rm + pass);
4483 gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
4486 gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
4492 gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
4495 gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
4501 gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4503 gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4508 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4511 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
4517 gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4519 gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4522 case NEON_3R_VQRSHL:
4524 gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4527 gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4531 case NEON_3R_VADD_VSUB:
4533 tcg_gen_sub_i64(CPU_V001);
4535 tcg_gen_add_i64(CPU_V001);
4541 neon_store_reg64(cpu_V0, rd + pass);
4550 case NEON_3R_VQRSHL:
4553 /* Shift instruction operands are reversed. */
4568 case NEON_3R_FLOAT_ARITH:
4569 pairwise = (u && size < 2); /* if VPADD (float) */
4571 case NEON_3R_FLOAT_MINMAX:
4572 pairwise = u; /* if VPMIN/VPMAX (float) */
4574 case NEON_3R_FLOAT_CMP:
4576 /* no encoding for U=0 C=1x */
4580 case NEON_3R_FLOAT_ACMP:
4585 case NEON_3R_VRECPS_VRSQRTS:
4591 if (u && (size != 0)) {
4592 /* UNDEF on invalid size for polynomial subcase */
4597 if (!arm_feature(env, ARM_FEATURE_VFP4) || u) {
4605 if (pairwise && q) {
4606 /* All the pairwise insns UNDEF if Q is set */
4610 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4615 tmp = neon_load_reg(rn, 0);
4616 tmp2 = neon_load_reg(rn, 1);
4618 tmp = neon_load_reg(rm, 0);
4619 tmp2 = neon_load_reg(rm, 1);
4623 tmp = neon_load_reg(rn, pass);
4624 tmp2 = neon_load_reg(rm, pass);
4628 GEN_NEON_INTEGER_OP(hadd);
4631 GEN_NEON_INTEGER_OP_ENV(qadd);
4633 case NEON_3R_VRHADD:
4634 GEN_NEON_INTEGER_OP(rhadd);
4636 case NEON_3R_LOGIC: /* Logic ops. */
4637 switch ((u << 2) | size) {
4639 tcg_gen_and_i32(tmp, tmp, tmp2);
4642 tcg_gen_andc_i32(tmp, tmp, tmp2);
4645 tcg_gen_or_i32(tmp, tmp, tmp2);
4648 tcg_gen_orc_i32(tmp, tmp, tmp2);
4651 tcg_gen_xor_i32(tmp, tmp, tmp2);
4654 tmp3 = neon_load_reg(rd, pass);
4655 gen_neon_bsl(tmp, tmp, tmp2, tmp3);
4656 tcg_temp_free_i32(tmp3);
4659 tmp3 = neon_load_reg(rd, pass);
4660 gen_neon_bsl(tmp, tmp, tmp3, tmp2);
4661 tcg_temp_free_i32(tmp3);
4664 tmp3 = neon_load_reg(rd, pass);
4665 gen_neon_bsl(tmp, tmp3, tmp, tmp2);
4666 tcg_temp_free_i32(tmp3);
4671 GEN_NEON_INTEGER_OP(hsub);
4674 GEN_NEON_INTEGER_OP_ENV(qsub);
4677 GEN_NEON_INTEGER_OP(cgt);
4680 GEN_NEON_INTEGER_OP(cge);
4683 GEN_NEON_INTEGER_OP(shl);
4686 GEN_NEON_INTEGER_OP_ENV(qshl);
4689 GEN_NEON_INTEGER_OP(rshl);
4691 case NEON_3R_VQRSHL:
4692 GEN_NEON_INTEGER_OP_ENV(qrshl);
4695 GEN_NEON_INTEGER_OP(max);
4698 GEN_NEON_INTEGER_OP(min);
4701 GEN_NEON_INTEGER_OP(abd);
4704 GEN_NEON_INTEGER_OP(abd);
4705 tcg_temp_free_i32(tmp2);
4706 tmp2 = neon_load_reg(rd, pass);
4707 gen_neon_add(size, tmp, tmp2);
4709 case NEON_3R_VADD_VSUB:
4710 if (!u) { /* VADD */
4711 gen_neon_add(size, tmp, tmp2);
4714 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
4715 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
4716 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
4721 case NEON_3R_VTST_VCEQ:
4722 if (!u) { /* VTST */
4724 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
4725 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
4726 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
4731 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
4732 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
4733 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
4738 case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
4740 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
4741 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
4742 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4745 tcg_temp_free_i32(tmp2);
4746 tmp2 = neon_load_reg(rd, pass);
4748 gen_neon_rsb(size, tmp, tmp2);
4750 gen_neon_add(size, tmp, tmp2);
4754 if (u) { /* polynomial */
4755 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4756 } else { /* Integer */
4758 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
4759 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
4760 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4766 GEN_NEON_INTEGER_OP(pmax);
4769 GEN_NEON_INTEGER_OP(pmin);
4771 case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
4772 if (!u) { /* VQDMULH */
4775 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
4778 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
4782 } else { /* VQRDMULH */
4785 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
4788 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
4796 case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
4797 case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
4798 case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
4802 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
4804 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4805 switch ((u << 2) | size) {
4808 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4811 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
4814 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
4819 tcg_temp_free_ptr(fpstatus);
4822 case NEON_3R_FLOAT_MULTIPLY:
4824 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4825 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
4827 tcg_temp_free_i32(tmp2);
4828 tmp2 = neon_load_reg(rd, pass);
4830 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4832 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
4835 tcg_temp_free_ptr(fpstatus);
4838 case NEON_3R_FLOAT_CMP:
4840 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4842 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
4845 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
4847 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
4850 tcg_temp_free_ptr(fpstatus);
4853 case NEON_3R_FLOAT_ACMP:
4855 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4857 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
4859 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
4861 tcg_temp_free_ptr(fpstatus);
4864 case NEON_3R_FLOAT_MINMAX:
4866 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4868 gen_helper_neon_max_f32(tmp, tmp, tmp2, fpstatus);
4870 gen_helper_neon_min_f32(tmp, tmp, tmp2, fpstatus);
4872 tcg_temp_free_ptr(fpstatus);
4875 case NEON_3R_VRECPS_VRSQRTS:
4877 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
4879 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
4883 /* VFMA, VFMS: fused multiply-add */
4884 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4885 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
4888 gen_helper_vfp_negs(tmp, tmp);
4890 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
4891 tcg_temp_free_i32(tmp3);
4892 tcg_temp_free_ptr(fpstatus);
4898 tcg_temp_free_i32(tmp2);
4900 /* Save the result. For elementwise operations we can put it
4901 straight into the destination register. For pairwise operations
4902 we have to be careful to avoid clobbering the source operands. */
4903 if (pairwise && rd == rm) {
4904 neon_store_scratch(pass, tmp);
4906 neon_store_reg(rd, pass, tmp);
4910 if (pairwise && rd == rm) {
4911 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4912 tmp = neon_load_scratch(pass);
4913 neon_store_reg(rd, pass, tmp);
4916 /* End of 3 register same size operations. */
4917 } else if (insn & (1 << 4)) {
4918 if ((insn & 0x00380080) != 0) {
4919 /* Two registers and shift. */
4920 op = (insn >> 8) & 0xf;
4921 if (insn & (1 << 7)) {
4929 while ((insn & (1 << (size + 19))) == 0)
4932 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
4933 /* To avoid excessive duplication of ops we implement shift
4934 by immediate using the variable shift operations. */
4936 /* Shift by immediate:
4937 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
4938 if (q && ((rd | rm) & 1)) {
4941 if (!u && (op == 4 || op == 6)) {
4944 /* Right shifts are encoded as N - shift, where N is the
4945 element size in bits. */
4947 shift = shift - (1 << (size + 3));
4955 imm = (uint8_t) shift;
4960 imm = (uint16_t) shift;
4971 for (pass = 0; pass < count; pass++) {
4973 neon_load_reg64(cpu_V0, rm + pass);
4974 tcg_gen_movi_i64(cpu_V1, imm);
4979 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4981 gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
4986 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
4988 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
4991 case 5: /* VSHL, VSLI */
4992 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4994 case 6: /* VQSHLU */
4995 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5000 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5003 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5008 if (op == 1 || op == 3) {
5010 neon_load_reg64(cpu_V1, rd + pass);
5011 tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5012 } else if (op == 4 || (op == 5 && u)) {
5014 neon_load_reg64(cpu_V1, rd + pass);
5016 if (shift < -63 || shift > 63) {
5020 mask = 0xffffffffffffffffull >> -shift;
5022 mask = 0xffffffffffffffffull << shift;
5025 tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
5026 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5028 neon_store_reg64(cpu_V0, rd + pass);
5029 } else { /* size < 3 */
5030 /* Operands in T0 and T1. */
5031 tmp = neon_load_reg(rm, pass);
5032 tmp2 = tcg_temp_new_i32();
5033 tcg_gen_movi_i32(tmp2, imm);
5037 GEN_NEON_INTEGER_OP(shl);
5041 GEN_NEON_INTEGER_OP(rshl);
5044 case 5: /* VSHL, VSLI */
5046 case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
5047 case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
5048 case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
5052 case 6: /* VQSHLU */
5055 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5059 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5063 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5071 GEN_NEON_INTEGER_OP_ENV(qshl);
5074 tcg_temp_free_i32(tmp2);
5076 if (op == 1 || op == 3) {
5078 tmp2 = neon_load_reg(rd, pass);
5079 gen_neon_add(size, tmp, tmp2);
5080 tcg_temp_free_i32(tmp2);
5081 } else if (op == 4 || (op == 5 && u)) {
5086 mask = 0xff >> -shift;
5088 mask = (uint8_t)(0xff << shift);
5094 mask = 0xffff >> -shift;
5096 mask = (uint16_t)(0xffff << shift);
5100 if (shift < -31 || shift > 31) {
5104 mask = 0xffffffffu >> -shift;
5106 mask = 0xffffffffu << shift;
5112 tmp2 = neon_load_reg(rd, pass);
5113 tcg_gen_andi_i32(tmp, tmp, mask);
5114 tcg_gen_andi_i32(tmp2, tmp2, ~mask);
5115 tcg_gen_or_i32(tmp, tmp, tmp2);
5116 tcg_temp_free_i32(tmp2);
5118 neon_store_reg(rd, pass, tmp);
5121 } else if (op < 10) {
5122 /* Shift by immediate and narrow:
5123 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5124 int input_unsigned = (op == 8) ? !u : u;
5128 shift = shift - (1 << (size + 3));
5131 tmp64 = tcg_const_i64(shift);
5132 neon_load_reg64(cpu_V0, rm);
5133 neon_load_reg64(cpu_V1, rm + 1);
5134 for (pass = 0; pass < 2; pass++) {
5142 if (input_unsigned) {
5143 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5145 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5148 if (input_unsigned) {
5149 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5151 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5154 tmp = tcg_temp_new_i32();
5155 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5156 neon_store_reg(rd, pass, tmp);
5158 tcg_temp_free_i64(tmp64);
5161 imm = (uint16_t)shift;
5165 imm = (uint32_t)shift;
5167 tmp2 = tcg_const_i32(imm);
5168 tmp4 = neon_load_reg(rm + 1, 0);
5169 tmp5 = neon_load_reg(rm + 1, 1);
5170 for (pass = 0; pass < 2; pass++) {
5172 tmp = neon_load_reg(rm, 0);
5176 gen_neon_shift_narrow(size, tmp, tmp2, q,
5179 tmp3 = neon_load_reg(rm, 1);
5183 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5185 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5186 tcg_temp_free_i32(tmp);
5187 tcg_temp_free_i32(tmp3);
5188 tmp = tcg_temp_new_i32();
5189 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5190 neon_store_reg(rd, pass, tmp);
5192 tcg_temp_free_i32(tmp2);
5194 } else if (op == 10) {
5196 if (q || (rd & 1)) {
5199 tmp = neon_load_reg(rm, 0);
5200 tmp2 = neon_load_reg(rm, 1);
5201 for (pass = 0; pass < 2; pass++) {
5205 gen_neon_widen(cpu_V0, tmp, size, u);
5208 /* The shift is less than the width of the source
5209 type, so we can just shift the whole register. */
5210 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5211 /* Widen the result of shift: we need to clear
5212 * the potential overflow bits resulting from
5213 * left bits of the narrow input appearing as
5214 * right bits of left the neighbour narrow
5216 if (size < 2 || !u) {
5219 imm = (0xffu >> (8 - shift));
5221 } else if (size == 1) {
5222 imm = 0xffff >> (16 - shift);
5225 imm = 0xffffffff >> (32 - shift);
5228 imm64 = imm | (((uint64_t)imm) << 32);
5232 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5235 neon_store_reg64(cpu_V0, rd + pass);
5237 } else if (op >= 14) {
5238 /* VCVT fixed-point. */
5239 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5242 /* We have already masked out the must-be-1 top bit of imm6,
5243 * hence this 32-shift where the ARM ARM has 64-imm6.
5246 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5247 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
5250 gen_vfp_ulto(0, shift, 1);
5252 gen_vfp_slto(0, shift, 1);
5255 gen_vfp_toul(0, shift, 1);
5257 gen_vfp_tosl(0, shift, 1);
5259 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
5264 } else { /* (insn & 0x00380080) == 0 */
5266 if (q && (rd & 1)) {
5270 op = (insn >> 8) & 0xf;
5271 /* One register and immediate. */
5272 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5273 invert = (insn & (1 << 5)) != 0;
5274 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5275 * We choose to not special-case this and will behave as if a
5276 * valid constant encoding of 0 had been given.
5295 imm = (imm << 8) | (imm << 24);
5298 imm = (imm << 8) | 0xff;
5301 imm = (imm << 16) | 0xffff;
5304 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5312 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5313 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5319 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5320 if (op & 1 && op < 12) {
5321 tmp = neon_load_reg(rd, pass);
5323 /* The immediate value has already been inverted, so
5325 tcg_gen_andi_i32(tmp, tmp, imm);
5327 tcg_gen_ori_i32(tmp, tmp, imm);
5331 tmp = tcg_temp_new_i32();
5332 if (op == 14 && invert) {
5336 for (n = 0; n < 4; n++) {
5337 if (imm & (1 << (n + (pass & 1) * 4)))
5338 val |= 0xff << (n * 8);
5340 tcg_gen_movi_i32(tmp, val);
5342 tcg_gen_movi_i32(tmp, imm);
5345 neon_store_reg(rd, pass, tmp);
5348 } else { /* (insn & 0x00800010 == 0x00800000) */
5350 op = (insn >> 8) & 0xf;
5351 if ((insn & (1 << 6)) == 0) {
5352 /* Three registers of different lengths. */
5356 /* undefreq: bit 0 : UNDEF if size != 0
5357 * bit 1 : UNDEF if size == 0
5358 * bit 2 : UNDEF if U == 1
5359 * Note that [1:0] set implies 'always UNDEF'
5362 /* prewiden, src1_wide, src2_wide, undefreq */
5363 static const int neon_3reg_wide[16][4] = {
5364 {1, 0, 0, 0}, /* VADDL */
5365 {1, 1, 0, 0}, /* VADDW */
5366 {1, 0, 0, 0}, /* VSUBL */
5367 {1, 1, 0, 0}, /* VSUBW */
5368 {0, 1, 1, 0}, /* VADDHN */
5369 {0, 0, 0, 0}, /* VABAL */
5370 {0, 1, 1, 0}, /* VSUBHN */
5371 {0, 0, 0, 0}, /* VABDL */
5372 {0, 0, 0, 0}, /* VMLAL */
5373 {0, 0, 0, 6}, /* VQDMLAL */
5374 {0, 0, 0, 0}, /* VMLSL */
5375 {0, 0, 0, 6}, /* VQDMLSL */
5376 {0, 0, 0, 0}, /* Integer VMULL */
5377 {0, 0, 0, 2}, /* VQDMULL */
5378 {0, 0, 0, 5}, /* Polynomial VMULL */
5379 {0, 0, 0, 3}, /* Reserved: always UNDEF */
5382 prewiden = neon_3reg_wide[op][0];
5383 src1_wide = neon_3reg_wide[op][1];
5384 src2_wide = neon_3reg_wide[op][2];
5385 undefreq = neon_3reg_wide[op][3];
5387 if (((undefreq & 1) && (size != 0)) ||
5388 ((undefreq & 2) && (size == 0)) ||
5389 ((undefreq & 4) && u)) {
5392 if ((src1_wide && (rn & 1)) ||
5393 (src2_wide && (rm & 1)) ||
5394 (!src2_wide && (rd & 1))) {
5398 /* Avoid overlapping operands. Wide source operands are
5399 always aligned so will never overlap with wide
5400 destinations in problematic ways. */
5401 if (rd == rm && !src2_wide) {
5402 tmp = neon_load_reg(rm, 1);
5403 neon_store_scratch(2, tmp);
5404 } else if (rd == rn && !src1_wide) {
5405 tmp = neon_load_reg(rn, 1);
5406 neon_store_scratch(2, tmp);
5409 for (pass = 0; pass < 2; pass++) {
5411 neon_load_reg64(cpu_V0, rn + pass);
5414 if (pass == 1 && rd == rn) {
5415 tmp = neon_load_scratch(2);
5417 tmp = neon_load_reg(rn, pass);
5420 gen_neon_widen(cpu_V0, tmp, size, u);
5424 neon_load_reg64(cpu_V1, rm + pass);
5427 if (pass == 1 && rd == rm) {
5428 tmp2 = neon_load_scratch(2);
5430 tmp2 = neon_load_reg(rm, pass);
5433 gen_neon_widen(cpu_V1, tmp2, size, u);
5437 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5438 gen_neon_addl(size);
5440 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5441 gen_neon_subl(size);
5443 case 5: case 7: /* VABAL, VABDL */
5444 switch ((size << 1) | u) {
5446 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5449 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5452 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5455 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5458 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5461 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5465 tcg_temp_free_i32(tmp2);
5466 tcg_temp_free_i32(tmp);
5468 case 8: case 9: case 10: case 11: case 12: case 13:
5469 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5470 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5472 case 14: /* Polynomial VMULL */
5473 gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
5474 tcg_temp_free_i32(tmp2);
5475 tcg_temp_free_i32(tmp);
5477 default: /* 15 is RESERVED: caught earlier */
5482 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5483 neon_store_reg64(cpu_V0, rd + pass);
5484 } else if (op == 5 || (op >= 8 && op <= 11)) {
5486 neon_load_reg64(cpu_V1, rd + pass);
5488 case 10: /* VMLSL */
5489 gen_neon_negl(cpu_V0, size);
5491 case 5: case 8: /* VABAL, VMLAL */
5492 gen_neon_addl(size);
5494 case 9: case 11: /* VQDMLAL, VQDMLSL */
5495 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5497 gen_neon_negl(cpu_V0, size);
5499 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5504 neon_store_reg64(cpu_V0, rd + pass);
5505 } else if (op == 4 || op == 6) {
5506 /* Narrowing operation. */
5507 tmp = tcg_temp_new_i32();
5511 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5514 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5517 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5518 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5525 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5528 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5531 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5532 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5533 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5541 neon_store_reg(rd, 0, tmp3);
5542 neon_store_reg(rd, 1, tmp);
5545 /* Write back the result. */
5546 neon_store_reg64(cpu_V0, rd + pass);
5550 /* Two registers and a scalar. NB that for ops of this form
5551 * the ARM ARM labels bit 24 as Q, but it is in our variable
5558 case 1: /* Float VMLA scalar */
5559 case 5: /* Floating point VMLS scalar */
5560 case 9: /* Floating point VMUL scalar */
5565 case 0: /* Integer VMLA scalar */
5566 case 4: /* Integer VMLS scalar */
5567 case 8: /* Integer VMUL scalar */
5568 case 12: /* VQDMULH scalar */
5569 case 13: /* VQRDMULH scalar */
5570 if (u && ((rd | rn) & 1)) {
5573 tmp = neon_get_scalar(size, rm);
5574 neon_store_scratch(0, tmp);
5575 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5576 tmp = neon_load_scratch(0);
5577 tmp2 = neon_load_reg(rn, pass);
5580 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5582 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5584 } else if (op == 13) {
5586 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5588 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5590 } else if (op & 1) {
5591 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5592 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5593 tcg_temp_free_ptr(fpstatus);
5596 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5597 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5598 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5602 tcg_temp_free_i32(tmp2);
5605 tmp2 = neon_load_reg(rd, pass);
5608 gen_neon_add(size, tmp, tmp2);
5612 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5613 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5614 tcg_temp_free_ptr(fpstatus);
5618 gen_neon_rsb(size, tmp, tmp2);
5622 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5623 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5624 tcg_temp_free_ptr(fpstatus);
5630 tcg_temp_free_i32(tmp2);
5632 neon_store_reg(rd, pass, tmp);
5635 case 3: /* VQDMLAL scalar */
5636 case 7: /* VQDMLSL scalar */
5637 case 11: /* VQDMULL scalar */
5642 case 2: /* VMLAL sclar */
5643 case 6: /* VMLSL scalar */
5644 case 10: /* VMULL scalar */
5648 tmp2 = neon_get_scalar(size, rm);
5649 /* We need a copy of tmp2 because gen_neon_mull
5650 * deletes it during pass 0. */
5651 tmp4 = tcg_temp_new_i32();
5652 tcg_gen_mov_i32(tmp4, tmp2);
5653 tmp3 = neon_load_reg(rn, 1);
5655 for (pass = 0; pass < 2; pass++) {
5657 tmp = neon_load_reg(rn, 0);
5662 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5664 neon_load_reg64(cpu_V1, rd + pass);
5668 gen_neon_negl(cpu_V0, size);
5671 gen_neon_addl(size);
5674 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5676 gen_neon_negl(cpu_V0, size);
5678 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5684 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5689 neon_store_reg64(cpu_V0, rd + pass);
5694 default: /* 14 and 15 are RESERVED */
5698 } else { /* size == 3 */
5701 imm = (insn >> 8) & 0xf;
5706 if (q && ((rd | rn | rm) & 1)) {
5711 neon_load_reg64(cpu_V0, rn);
5713 neon_load_reg64(cpu_V1, rn + 1);
5715 } else if (imm == 8) {
5716 neon_load_reg64(cpu_V0, rn + 1);
5718 neon_load_reg64(cpu_V1, rm);
5721 tmp64 = tcg_temp_new_i64();
5723 neon_load_reg64(cpu_V0, rn);
5724 neon_load_reg64(tmp64, rn + 1);
5726 neon_load_reg64(cpu_V0, rn + 1);
5727 neon_load_reg64(tmp64, rm);
5729 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5730 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5731 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5733 neon_load_reg64(cpu_V1, rm);
5735 neon_load_reg64(cpu_V1, rm + 1);
5738 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5739 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
5740 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
5741 tcg_temp_free_i64(tmp64);
5744 neon_load_reg64(cpu_V0, rn);
5745 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
5746 neon_load_reg64(cpu_V1, rm);
5747 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5748 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5750 neon_store_reg64(cpu_V0, rd);
5752 neon_store_reg64(cpu_V1, rd + 1);
5754 } else if ((insn & (1 << 11)) == 0) {
5755 /* Two register misc. */
5756 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5757 size = (insn >> 18) & 3;
5758 /* UNDEF for unknown op values and bad op-size combinations */
5759 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
5762 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
5763 q && ((rm | rd) & 1)) {
5767 case NEON_2RM_VREV64:
5768 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5769 tmp = neon_load_reg(rm, pass * 2);
5770 tmp2 = neon_load_reg(rm, pass * 2 + 1);
5772 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5773 case 1: gen_swap_half(tmp); break;
5774 case 2: /* no-op */ break;
5777 neon_store_reg(rd, pass * 2 + 1, tmp);
5779 neon_store_reg(rd, pass * 2, tmp2);
5782 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
5783 case 1: gen_swap_half(tmp2); break;
5786 neon_store_reg(rd, pass * 2, tmp2);
5790 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
5791 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
5792 for (pass = 0; pass < q + 1; pass++) {
5793 tmp = neon_load_reg(rm, pass * 2);
5794 gen_neon_widen(cpu_V0, tmp, size, op & 1);
5795 tmp = neon_load_reg(rm, pass * 2 + 1);
5796 gen_neon_widen(cpu_V1, tmp, size, op & 1);
5798 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5799 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5800 case 2: tcg_gen_add_i64(CPU_V001); break;
5803 if (op >= NEON_2RM_VPADAL) {
5805 neon_load_reg64(cpu_V1, rd + pass);
5806 gen_neon_addl(size);
5808 neon_store_reg64(cpu_V0, rd + pass);
5814 for (n = 0; n < (q ? 4 : 2); n += 2) {
5815 tmp = neon_load_reg(rm, n);
5816 tmp2 = neon_load_reg(rd, n + 1);
5817 neon_store_reg(rm, n, tmp2);
5818 neon_store_reg(rd, n + 1, tmp);
5825 if (gen_neon_unzip(rd, rm, size, q)) {
5830 if (gen_neon_zip(rd, rm, size, q)) {
5834 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
5835 /* also VQMOVUN; op field and mnemonics don't line up */
5840 for (pass = 0; pass < 2; pass++) {
5841 neon_load_reg64(cpu_V0, rm + pass);
5842 tmp = tcg_temp_new_i32();
5843 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
5848 neon_store_reg(rd, 0, tmp2);
5849 neon_store_reg(rd, 1, tmp);
5853 case NEON_2RM_VSHLL:
5854 if (q || (rd & 1)) {
5857 tmp = neon_load_reg(rm, 0);
5858 tmp2 = neon_load_reg(rm, 1);
5859 for (pass = 0; pass < 2; pass++) {
5862 gen_neon_widen(cpu_V0, tmp, size, 1);
5863 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
5864 neon_store_reg64(cpu_V0, rd + pass);
5867 case NEON_2RM_VCVT_F16_F32:
5868 if (!arm_feature(env, ARM_FEATURE_VFP_FP16) ||
5872 tmp = tcg_temp_new_i32();
5873 tmp2 = tcg_temp_new_i32();
5874 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
5875 gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
5876 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
5877 gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
5878 tcg_gen_shli_i32(tmp2, tmp2, 16);
5879 tcg_gen_or_i32(tmp2, tmp2, tmp);
5880 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
5881 gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
5882 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
5883 neon_store_reg(rd, 0, tmp2);
5884 tmp2 = tcg_temp_new_i32();
5885 gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
5886 tcg_gen_shli_i32(tmp2, tmp2, 16);
5887 tcg_gen_or_i32(tmp2, tmp2, tmp);
5888 neon_store_reg(rd, 1, tmp2);
5889 tcg_temp_free_i32(tmp);
5891 case NEON_2RM_VCVT_F32_F16:
5892 if (!arm_feature(env, ARM_FEATURE_VFP_FP16) ||
5896 tmp3 = tcg_temp_new_i32();
5897 tmp = neon_load_reg(rm, 0);
5898 tmp2 = neon_load_reg(rm, 1);
5899 tcg_gen_ext16u_i32(tmp3, tmp);
5900 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5901 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
5902 tcg_gen_shri_i32(tmp3, tmp, 16);
5903 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5904 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
5905 tcg_temp_free_i32(tmp);
5906 tcg_gen_ext16u_i32(tmp3, tmp2);
5907 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5908 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
5909 tcg_gen_shri_i32(tmp3, tmp2, 16);
5910 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5911 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
5912 tcg_temp_free_i32(tmp2);
5913 tcg_temp_free_i32(tmp3);
5917 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5918 if (neon_2rm_is_float_op(op)) {
5919 tcg_gen_ld_f32(cpu_F0s, cpu_env,
5920 neon_reg_offset(rm, pass));
5923 tmp = neon_load_reg(rm, pass);
5926 case NEON_2RM_VREV32:
5928 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5929 case 1: gen_swap_half(tmp); break;
5933 case NEON_2RM_VREV16:
5938 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
5939 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
5940 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
5946 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
5947 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
5948 case 2: gen_helper_clz(tmp, tmp); break;
5953 gen_helper_neon_cnt_u8(tmp, tmp);
5956 tcg_gen_not_i32(tmp, tmp);
5958 case NEON_2RM_VQABS:
5961 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
5964 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
5967 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
5972 case NEON_2RM_VQNEG:
5975 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
5978 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
5981 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
5986 case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
5987 tmp2 = tcg_const_i32(0);
5989 case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
5990 case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
5991 case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
5994 tcg_temp_free(tmp2);
5995 if (op == NEON_2RM_VCLE0) {
5996 tcg_gen_not_i32(tmp, tmp);
5999 case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6000 tmp2 = tcg_const_i32(0);
6002 case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6003 case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6004 case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6007 tcg_temp_free(tmp2);
6008 if (op == NEON_2RM_VCLT0) {
6009 tcg_gen_not_i32(tmp, tmp);
6012 case NEON_2RM_VCEQ0:
6013 tmp2 = tcg_const_i32(0);
6015 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6016 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6017 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6020 tcg_temp_free(tmp2);
6024 case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
6025 case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
6026 case 2: tcg_gen_abs_i32(tmp, tmp); break;
6031 tmp2 = tcg_const_i32(0);
6032 gen_neon_rsb(size, tmp, tmp2);
6033 tcg_temp_free(tmp2);
6035 case NEON_2RM_VCGT0_F:
6037 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6038 tmp2 = tcg_const_i32(0);
6039 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6040 tcg_temp_free(tmp2);
6041 tcg_temp_free_ptr(fpstatus);
6044 case NEON_2RM_VCGE0_F:
6046 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6047 tmp2 = tcg_const_i32(0);
6048 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6049 tcg_temp_free(tmp2);
6050 tcg_temp_free_ptr(fpstatus);
6053 case NEON_2RM_VCEQ0_F:
6055 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6056 tmp2 = tcg_const_i32(0);
6057 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6058 tcg_temp_free(tmp2);
6059 tcg_temp_free_ptr(fpstatus);
6062 case NEON_2RM_VCLE0_F:
6064 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6065 tmp2 = tcg_const_i32(0);
6066 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6067 tcg_temp_free(tmp2);
6068 tcg_temp_free_ptr(fpstatus);
6071 case NEON_2RM_VCLT0_F:
6073 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6074 tmp2 = tcg_const_i32(0);
6075 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6076 tcg_temp_free(tmp2);
6077 tcg_temp_free_ptr(fpstatus);
6080 case NEON_2RM_VABS_F:
6083 case NEON_2RM_VNEG_F:
6087 tmp2 = neon_load_reg(rd, pass);
6088 neon_store_reg(rm, pass, tmp2);
6091 tmp2 = neon_load_reg(rd, pass);
6093 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6094 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6097 neon_store_reg(rm, pass, tmp2);
6099 case NEON_2RM_VRECPE:
6100 gen_helper_recpe_u32(tmp, tmp, cpu_env);
6102 case NEON_2RM_VRSQRTE:
6103 gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
6105 case NEON_2RM_VRECPE_F:
6106 gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
6108 case NEON_2RM_VRSQRTE_F:
6109 gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
6111 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6114 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6117 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6118 gen_vfp_tosiz(0, 1);
6120 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6121 gen_vfp_touiz(0, 1);
6124 /* Reserved op values were caught by the
6125 * neon_2rm_sizes[] check earlier.
6129 if (neon_2rm_is_float_op(op)) {
6130 tcg_gen_st_f32(cpu_F0s, cpu_env,
6131 neon_reg_offset(rd, pass));
6133 neon_store_reg(rd, pass, tmp);
6138 } else if ((insn & (1 << 10)) == 0) {
6140 int n = ((insn >> 8) & 3) + 1;
6141 if ((rn + n) > 32) {
6142 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6143 * helper function running off the end of the register file.
6148 if (insn & (1 << 6)) {
6149 tmp = neon_load_reg(rd, 0);
6151 tmp = tcg_temp_new_i32();
6152 tcg_gen_movi_i32(tmp, 0);
6154 tmp2 = neon_load_reg(rm, 0);
6155 tmp4 = tcg_const_i32(rn);
6156 tmp5 = tcg_const_i32(n);
6157 gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5);
6158 tcg_temp_free_i32(tmp);
6159 if (insn & (1 << 6)) {
6160 tmp = neon_load_reg(rd, 1);
6162 tmp = tcg_temp_new_i32();
6163 tcg_gen_movi_i32(tmp, 0);
6165 tmp3 = neon_load_reg(rm, 1);
6166 gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5);
6167 tcg_temp_free_i32(tmp5);
6168 tcg_temp_free_i32(tmp4);
6169 neon_store_reg(rd, 0, tmp2);
6170 neon_store_reg(rd, 1, tmp3);
6171 tcg_temp_free_i32(tmp);
6172 } else if ((insn & 0x380) == 0) {
6174 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6177 if (insn & (1 << 19)) {
6178 tmp = neon_load_reg(rm, 1);
6180 tmp = neon_load_reg(rm, 0);
6182 if (insn & (1 << 16)) {
6183 gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
6184 } else if (insn & (1 << 17)) {
6185 if ((insn >> 18) & 1)
6186 gen_neon_dup_high16(tmp);
6188 gen_neon_dup_low16(tmp);
6190 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6191 tmp2 = tcg_temp_new_i32();
6192 tcg_gen_mov_i32(tmp2, tmp);
6193 neon_store_reg(rd, pass, tmp2);
6195 tcg_temp_free_i32(tmp);
6204 static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
6206 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6207 const ARMCPRegInfo *ri;
6208 ARMCPU *cpu = arm_env_get_cpu(env);
6210 cpnum = (insn >> 8) & 0xf;
6211 if (arm_feature(env, ARM_FEATURE_XSCALE)
6212 && ((env->cp15.c15_cpar ^ 0x3fff) & (1 << cpnum)))
6215 /* First check for coprocessor space used for actual instructions */
6219 if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
6220 return disas_iwmmxt_insn(env, s, insn);
6221 } else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
6222 return disas_dsp_insn(env, s, insn);
6227 return disas_vfp_insn (env, s, insn);
6232 /* Otherwise treat as a generic register access */
6233 is64 = (insn & (1 << 25)) == 0;
6234 if (!is64 && ((insn & (1 << 4)) == 0)) {
6242 opc1 = (insn >> 4) & 0xf;
6244 rt2 = (insn >> 16) & 0xf;
6246 crn = (insn >> 16) & 0xf;
6247 opc1 = (insn >> 21) & 7;
6248 opc2 = (insn >> 5) & 7;
6251 isread = (insn >> 20) & 1;
6252 rt = (insn >> 12) & 0xf;
6254 ri = get_arm_cp_reginfo(cpu,
6255 ENCODE_CP_REG(cpnum, is64, crn, crm, opc1, opc2));
6257 /* Check access permissions */
6258 if (!cp_access_ok(env, ri, isread)) {
6262 /* Handle special cases first */
6263 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6270 gen_set_pc_im(s->pc);
6271 s->is_jmp = DISAS_WFI;
6282 if (ri->type & ARM_CP_CONST) {
6283 tmp64 = tcg_const_i64(ri->resetvalue);
6284 } else if (ri->readfn) {
6286 gen_set_pc_im(s->pc);
6287 tmp64 = tcg_temp_new_i64();
6288 tmpptr = tcg_const_ptr(ri);
6289 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
6290 tcg_temp_free_ptr(tmpptr);
6292 tmp64 = tcg_temp_new_i64();
6293 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
6295 tmp = tcg_temp_new_i32();
6296 tcg_gen_trunc_i64_i32(tmp, tmp64);
6297 store_reg(s, rt, tmp);
6298 tcg_gen_shri_i64(tmp64, tmp64, 32);
6299 tmp = tcg_temp_new_i32();
6300 tcg_gen_trunc_i64_i32(tmp, tmp64);
6301 tcg_temp_free_i64(tmp64);
6302 store_reg(s, rt2, tmp);
6305 if (ri->type & ARM_CP_CONST) {
6306 tmp = tcg_const_i32(ri->resetvalue);
6307 } else if (ri->readfn) {
6309 gen_set_pc_im(s->pc);
6310 tmp = tcg_temp_new_i32();
6311 tmpptr = tcg_const_ptr(ri);
6312 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
6313 tcg_temp_free_ptr(tmpptr);
6315 tmp = load_cpu_offset(ri->fieldoffset);
6318 /* Destination register of r15 for 32 bit loads sets
6319 * the condition codes from the high 4 bits of the value
6322 tcg_temp_free_i32(tmp);
6324 store_reg(s, rt, tmp);
6329 if (ri->type & ARM_CP_CONST) {
6330 /* If not forbidden by access permissions, treat as WI */
6336 TCGv_i64 tmp64 = tcg_temp_new_i64();
6337 tmplo = load_reg(s, rt);
6338 tmphi = load_reg(s, rt2);
6339 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
6340 tcg_temp_free_i32(tmplo);
6341 tcg_temp_free_i32(tmphi);
6343 TCGv_ptr tmpptr = tcg_const_ptr(ri);
6344 gen_set_pc_im(s->pc);
6345 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
6346 tcg_temp_free_ptr(tmpptr);
6348 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
6350 tcg_temp_free_i64(tmp64);
6355 gen_set_pc_im(s->pc);
6356 tmp = load_reg(s, rt);
6357 tmpptr = tcg_const_ptr(ri);
6358 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
6359 tcg_temp_free_ptr(tmpptr);
6360 tcg_temp_free_i32(tmp);
6362 TCGv tmp = load_reg(s, rt);
6363 store_cpu_offset(tmp, ri->fieldoffset);
6366 /* We default to ending the TB on a coprocessor register write,
6367 * but allow this to be suppressed by the register definition
6368 * (usually only necessary to work around guest bugs).
6370 if (!(ri->type & ARM_CP_SUPPRESS_TB_END)) {
6381 /* Store a 64-bit value to a register pair. Clobbers val. */
6382 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
6385 tmp = tcg_temp_new_i32();
6386 tcg_gen_trunc_i64_i32(tmp, val);
6387 store_reg(s, rlow, tmp);
6388 tmp = tcg_temp_new_i32();
6389 tcg_gen_shri_i64(val, val, 32);
6390 tcg_gen_trunc_i64_i32(tmp, val);
6391 store_reg(s, rhigh, tmp);
6394 /* load a 32-bit value from a register and perform a 64-bit accumulate. */
6395 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
6400 /* Load value and extend to 64 bits. */
6401 tmp = tcg_temp_new_i64();
6402 tmp2 = load_reg(s, rlow);
6403 tcg_gen_extu_i32_i64(tmp, tmp2);
6404 tcg_temp_free_i32(tmp2);
6405 tcg_gen_add_i64(val, val, tmp);
6406 tcg_temp_free_i64(tmp);
6409 /* load and add a 64-bit value from a register pair. */
6410 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
6416 /* Load 64-bit value rd:rn. */
6417 tmpl = load_reg(s, rlow);
6418 tmph = load_reg(s, rhigh);
6419 tmp = tcg_temp_new_i64();
6420 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
6421 tcg_temp_free_i32(tmpl);
6422 tcg_temp_free_i32(tmph);
6423 tcg_gen_add_i64(val, val, tmp);
6424 tcg_temp_free_i64(tmp);
6427 /* Set N and Z flags from a 64-bit value. */
6428 static void gen_logicq_cc(TCGv_i64 val)
6430 TCGv tmp = tcg_temp_new_i32();
6431 gen_helper_logicq_cc(tmp, val);
6433 tcg_temp_free_i32(tmp);
6436 /* Load/Store exclusive instructions are implemented by remembering
6437 the value/address loaded, and seeing if these are the same
6438 when the store is performed. This should be sufficient to implement
6439 the architecturally mandated semantics, and avoids having to monitor
6442 In system emulation mode only one CPU will be running at once, so
6443 this sequence is effectively atomic. In user emulation mode we
6444 throw an exception and handle the atomic operation elsewhere. */
6445 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
6446 TCGv addr, int size)
6452 tmp = gen_ld8u(addr, IS_USER(s));
6455 tmp = gen_ld16u(addr, IS_USER(s));
6459 tmp = gen_ld32(addr, IS_USER(s));
6464 tcg_gen_mov_i32(cpu_exclusive_val, tmp);
6465 store_reg(s, rt, tmp);
6467 TCGv tmp2 = tcg_temp_new_i32();
6468 tcg_gen_addi_i32(tmp2, addr, 4);
6469 tmp = gen_ld32(tmp2, IS_USER(s));
6470 tcg_temp_free_i32(tmp2);
6471 tcg_gen_mov_i32(cpu_exclusive_high, tmp);
6472 store_reg(s, rt2, tmp);
6474 tcg_gen_mov_i32(cpu_exclusive_addr, addr);
6477 static void gen_clrex(DisasContext *s)
6479 tcg_gen_movi_i32(cpu_exclusive_addr, -1);
6482 #ifdef CONFIG_USER_ONLY
6483 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
6484 TCGv addr, int size)
6486 tcg_gen_mov_i32(cpu_exclusive_test, addr);
6487 tcg_gen_movi_i32(cpu_exclusive_info,
6488 size | (rd << 4) | (rt << 8) | (rt2 << 12));
6489 gen_exception_insn(s, 4, EXCP_STREX);
6492 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
6493 TCGv addr, int size)
6499 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
6505 fail_label = gen_new_label();
6506 done_label = gen_new_label();
6507 tcg_gen_brcond_i32(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
6510 tmp = gen_ld8u(addr, IS_USER(s));
6513 tmp = gen_ld16u(addr, IS_USER(s));
6517 tmp = gen_ld32(addr, IS_USER(s));
6522 tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
6523 tcg_temp_free_i32(tmp);
6525 TCGv tmp2 = tcg_temp_new_i32();
6526 tcg_gen_addi_i32(tmp2, addr, 4);
6527 tmp = gen_ld32(tmp2, IS_USER(s));
6528 tcg_temp_free_i32(tmp2);
6529 tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_high, fail_label);
6530 tcg_temp_free_i32(tmp);
6532 tmp = load_reg(s, rt);
6535 gen_st8(tmp, addr, IS_USER(s));
6538 gen_st16(tmp, addr, IS_USER(s));
6542 gen_st32(tmp, addr, IS_USER(s));
6548 tcg_gen_addi_i32(addr, addr, 4);
6549 tmp = load_reg(s, rt2);
6550 gen_st32(tmp, addr, IS_USER(s));
6552 tcg_gen_movi_i32(cpu_R[rd], 0);
6553 tcg_gen_br(done_label);
6554 gen_set_label(fail_label);
6555 tcg_gen_movi_i32(cpu_R[rd], 1);
6556 gen_set_label(done_label);
6557 tcg_gen_movi_i32(cpu_exclusive_addr, -1);
6561 static void disas_arm_insn(CPUARMState * env, DisasContext *s)
6563 unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
6570 insn = arm_ldl_code(env, s->pc, s->bswap_code);
6573 /* M variants do not implement ARM mode. */
6578 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
6579 * choose to UNDEF. In ARMv5 and above the space is used
6580 * for miscellaneous unconditional instructions.
6584 /* Unconditional instructions. */
6585 if (((insn >> 25) & 7) == 1) {
6586 /* NEON Data processing. */
6587 if (!arm_feature(env, ARM_FEATURE_NEON))
6590 if (disas_neon_data_insn(env, s, insn))
6594 if ((insn & 0x0f100000) == 0x04000000) {
6595 /* NEON load/store. */
6596 if (!arm_feature(env, ARM_FEATURE_NEON))
6599 if (disas_neon_ls_insn(env, s, insn))
6603 if (((insn & 0x0f30f000) == 0x0510f000) ||
6604 ((insn & 0x0f30f010) == 0x0710f000)) {
6605 if ((insn & (1 << 22)) == 0) {
6607 if (!arm_feature(env, ARM_FEATURE_V7MP)) {
6611 /* Otherwise PLD; v5TE+ */
6615 if (((insn & 0x0f70f000) == 0x0450f000) ||
6616 ((insn & 0x0f70f010) == 0x0650f000)) {
6618 return; /* PLI; V7 */
6620 if (((insn & 0x0f700000) == 0x04100000) ||
6621 ((insn & 0x0f700010) == 0x06100000)) {
6622 if (!arm_feature(env, ARM_FEATURE_V7MP)) {
6625 return; /* v7MP: Unallocated memory hint: must NOP */
6628 if ((insn & 0x0ffffdff) == 0x01010000) {
6631 if (((insn >> 9) & 1) != s->bswap_code) {
6632 /* Dynamic endianness switching not implemented. */
6636 } else if ((insn & 0x0fffff00) == 0x057ff000) {
6637 switch ((insn >> 4) & 0xf) {
6646 /* We don't emulate caches so these are a no-op. */
6651 } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
6657 op1 = (insn & 0x1f);
6658 addr = tcg_temp_new_i32();
6659 tmp = tcg_const_i32(op1);
6660 gen_helper_get_r13_banked(addr, cpu_env, tmp);
6661 tcg_temp_free_i32(tmp);
6662 i = (insn >> 23) & 3;
6664 case 0: offset = -4; break; /* DA */
6665 case 1: offset = 0; break; /* IA */
6666 case 2: offset = -8; break; /* DB */
6667 case 3: offset = 4; break; /* IB */
6671 tcg_gen_addi_i32(addr, addr, offset);
6672 tmp = load_reg(s, 14);
6673 gen_st32(tmp, addr, 0);
6674 tmp = load_cpu_field(spsr);
6675 tcg_gen_addi_i32(addr, addr, 4);
6676 gen_st32(tmp, addr, 0);
6677 if (insn & (1 << 21)) {
6678 /* Base writeback. */
6680 case 0: offset = -8; break;
6681 case 1: offset = 4; break;
6682 case 2: offset = -4; break;
6683 case 3: offset = 0; break;
6687 tcg_gen_addi_i32(addr, addr, offset);
6688 tmp = tcg_const_i32(op1);
6689 gen_helper_set_r13_banked(cpu_env, tmp, addr);
6690 tcg_temp_free_i32(tmp);
6691 tcg_temp_free_i32(addr);
6693 tcg_temp_free_i32(addr);
6696 } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
6702 rn = (insn >> 16) & 0xf;
6703 addr = load_reg(s, rn);
6704 i = (insn >> 23) & 3;
6706 case 0: offset = -4; break; /* DA */
6707 case 1: offset = 0; break; /* IA */
6708 case 2: offset = -8; break; /* DB */
6709 case 3: offset = 4; break; /* IB */
6713 tcg_gen_addi_i32(addr, addr, offset);
6714 /* Load PC into tmp and CPSR into tmp2. */
6715 tmp = gen_ld32(addr, 0);
6716 tcg_gen_addi_i32(addr, addr, 4);
6717 tmp2 = gen_ld32(addr, 0);
6718 if (insn & (1 << 21)) {
6719 /* Base writeback. */
6721 case 0: offset = -8; break;
6722 case 1: offset = 4; break;
6723 case 2: offset = -4; break;
6724 case 3: offset = 0; break;
6728 tcg_gen_addi_i32(addr, addr, offset);
6729 store_reg(s, rn, addr);
6731 tcg_temp_free_i32(addr);
6733 gen_rfe(s, tmp, tmp2);
6735 } else if ((insn & 0x0e000000) == 0x0a000000) {
6736 /* branch link and change to thumb (blx <offset>) */
6739 val = (uint32_t)s->pc;
6740 tmp = tcg_temp_new_i32();
6741 tcg_gen_movi_i32(tmp, val);
6742 store_reg(s, 14, tmp);
6743 /* Sign-extend the 24-bit offset */
6744 offset = (((int32_t)insn) << 8) >> 8;
6745 /* offset * 4 + bit24 * 2 + (thumb bit) */
6746 val += (offset << 2) | ((insn >> 23) & 2) | 1;
6747 /* pipeline offset */
6749 /* protected by ARCH(5); above, near the start of uncond block */
6752 } else if ((insn & 0x0e000f00) == 0x0c000100) {
6753 if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
6754 /* iWMMXt register transfer. */
6755 if (env->cp15.c15_cpar & (1 << 1))
6756 if (!disas_iwmmxt_insn(env, s, insn))
6759 } else if ((insn & 0x0fe00000) == 0x0c400000) {
6760 /* Coprocessor double register transfer. */
6762 } else if ((insn & 0x0f000010) == 0x0e000010) {
6763 /* Additional coprocessor register transfer. */
6764 } else if ((insn & 0x0ff10020) == 0x01000000) {
6767 /* cps (privileged) */
6771 if (insn & (1 << 19)) {
6772 if (insn & (1 << 8))
6774 if (insn & (1 << 7))
6776 if (insn & (1 << 6))
6778 if (insn & (1 << 18))
6781 if (insn & (1 << 17)) {
6783 val |= (insn & 0x1f);
6786 gen_set_psr_im(s, mask, 0, val);
6793 /* if not always execute, we generate a conditional jump to
6795 s->condlabel = gen_new_label();
6796 gen_test_cc(cond ^ 1, s->condlabel);
6799 if ((insn & 0x0f900000) == 0x03000000) {
6800 if ((insn & (1 << 21)) == 0) {
6802 rd = (insn >> 12) & 0xf;
6803 val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
6804 if ((insn & (1 << 22)) == 0) {
6806 tmp = tcg_temp_new_i32();
6807 tcg_gen_movi_i32(tmp, val);
6810 tmp = load_reg(s, rd);
6811 tcg_gen_ext16u_i32(tmp, tmp);
6812 tcg_gen_ori_i32(tmp, tmp, val << 16);
6814 store_reg(s, rd, tmp);
6816 if (((insn >> 12) & 0xf) != 0xf)
6818 if (((insn >> 16) & 0xf) == 0) {
6819 gen_nop_hint(s, insn & 0xff);
6821 /* CPSR = immediate */
6823 shift = ((insn >> 8) & 0xf) * 2;
6825 val = (val >> shift) | (val << (32 - shift));
6826 i = ((insn & (1 << 22)) != 0);
6827 if (gen_set_psr_im(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, val))
6831 } else if ((insn & 0x0f900000) == 0x01000000
6832 && (insn & 0x00000090) != 0x00000090) {
6833 /* miscellaneous instructions */
6834 op1 = (insn >> 21) & 3;
6835 sh = (insn >> 4) & 0xf;
6838 case 0x0: /* move program status register */
6841 tmp = load_reg(s, rm);
6842 i = ((op1 & 2) != 0);
6843 if (gen_set_psr(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, tmp))
6847 rd = (insn >> 12) & 0xf;
6851 tmp = load_cpu_field(spsr);
6853 tmp = tcg_temp_new_i32();
6854 gen_helper_cpsr_read(tmp, cpu_env);
6856 store_reg(s, rd, tmp);
6861 /* branch/exchange thumb (bx). */
6863 tmp = load_reg(s, rm);
6865 } else if (op1 == 3) {
6868 rd = (insn >> 12) & 0xf;
6869 tmp = load_reg(s, rm);
6870 gen_helper_clz(tmp, tmp);
6871 store_reg(s, rd, tmp);
6879 /* Trivial implementation equivalent to bx. */
6880 tmp = load_reg(s, rm);
6891 /* branch link/exchange thumb (blx) */
6892 tmp = load_reg(s, rm);
6893 tmp2 = tcg_temp_new_i32();
6894 tcg_gen_movi_i32(tmp2, s->pc);
6895 store_reg(s, 14, tmp2);
6898 case 0x5: /* saturating add/subtract */
6900 rd = (insn >> 12) & 0xf;
6901 rn = (insn >> 16) & 0xf;
6902 tmp = load_reg(s, rm);
6903 tmp2 = load_reg(s, rn);
6905 gen_helper_double_saturate(tmp2, cpu_env, tmp2);
6907 gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
6909 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
6910 tcg_temp_free_i32(tmp2);
6911 store_reg(s, rd, tmp);
6914 /* SMC instruction (op1 == 3)
6915 and undefined instructions (op1 == 0 || op1 == 2)
6922 gen_exception_insn(s, 4, EXCP_BKPT);
6924 case 0x8: /* signed multiply */
6929 rs = (insn >> 8) & 0xf;
6930 rn = (insn >> 12) & 0xf;
6931 rd = (insn >> 16) & 0xf;
6933 /* (32 * 16) >> 16 */
6934 tmp = load_reg(s, rm);
6935 tmp2 = load_reg(s, rs);
6937 tcg_gen_sari_i32(tmp2, tmp2, 16);
6940 tmp64 = gen_muls_i64_i32(tmp, tmp2);
6941 tcg_gen_shri_i64(tmp64, tmp64, 16);
6942 tmp = tcg_temp_new_i32();
6943 tcg_gen_trunc_i64_i32(tmp, tmp64);
6944 tcg_temp_free_i64(tmp64);
6945 if ((sh & 2) == 0) {
6946 tmp2 = load_reg(s, rn);
6947 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
6948 tcg_temp_free_i32(tmp2);
6950 store_reg(s, rd, tmp);
6953 tmp = load_reg(s, rm);
6954 tmp2 = load_reg(s, rs);
6955 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
6956 tcg_temp_free_i32(tmp2);
6958 tmp64 = tcg_temp_new_i64();
6959 tcg_gen_ext_i32_i64(tmp64, tmp);
6960 tcg_temp_free_i32(tmp);
6961 gen_addq(s, tmp64, rn, rd);
6962 gen_storeq_reg(s, rn, rd, tmp64);
6963 tcg_temp_free_i64(tmp64);
6966 tmp2 = load_reg(s, rn);
6967 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
6968 tcg_temp_free_i32(tmp2);
6970 store_reg(s, rd, tmp);
6977 } else if (((insn & 0x0e000000) == 0 &&
6978 (insn & 0x00000090) != 0x90) ||
6979 ((insn & 0x0e000000) == (1 << 25))) {
6980 int set_cc, logic_cc, shiftop;
6982 op1 = (insn >> 21) & 0xf;
6983 set_cc = (insn >> 20) & 1;
6984 logic_cc = table_logic_cc[op1] & set_cc;
6986 /* data processing instruction */
6987 if (insn & (1 << 25)) {
6988 /* immediate operand */
6990 shift = ((insn >> 8) & 0xf) * 2;
6992 val = (val >> shift) | (val << (32 - shift));
6994 tmp2 = tcg_temp_new_i32();
6995 tcg_gen_movi_i32(tmp2, val);
6996 if (logic_cc && shift) {
6997 gen_set_CF_bit31(tmp2);
7002 tmp2 = load_reg(s, rm);
7003 shiftop = (insn >> 5) & 3;
7004 if (!(insn & (1 << 4))) {
7005 shift = (insn >> 7) & 0x1f;
7006 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
7008 rs = (insn >> 8) & 0xf;
7009 tmp = load_reg(s, rs);
7010 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
7013 if (op1 != 0x0f && op1 != 0x0d) {
7014 rn = (insn >> 16) & 0xf;
7015 tmp = load_reg(s, rn);
7019 rd = (insn >> 12) & 0xf;
7022 tcg_gen_and_i32(tmp, tmp, tmp2);
7026 store_reg_bx(env, s, rd, tmp);
7029 tcg_gen_xor_i32(tmp, tmp, tmp2);
7033 store_reg_bx(env, s, rd, tmp);
7036 if (set_cc && rd == 15) {
7037 /* SUBS r15, ... is used for exception return. */
7041 gen_sub_CC(tmp, tmp, tmp2);
7042 gen_exception_return(s, tmp);
7045 gen_sub_CC(tmp, tmp, tmp2);
7047 tcg_gen_sub_i32(tmp, tmp, tmp2);
7049 store_reg_bx(env, s, rd, tmp);
7054 gen_sub_CC(tmp, tmp2, tmp);
7056 tcg_gen_sub_i32(tmp, tmp2, tmp);
7058 store_reg_bx(env, s, rd, tmp);
7062 gen_add_CC(tmp, tmp, tmp2);
7064 tcg_gen_add_i32(tmp, tmp, tmp2);
7066 store_reg_bx(env, s, rd, tmp);
7070 gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
7072 gen_add_carry(tmp, tmp, tmp2);
7074 store_reg_bx(env, s, rd, tmp);
7078 gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
7080 gen_sub_carry(tmp, tmp, tmp2);
7082 store_reg_bx(env, s, rd, tmp);
7086 gen_helper_sbc_cc(tmp, cpu_env, tmp2, tmp);
7088 gen_sub_carry(tmp, tmp2, tmp);
7090 store_reg_bx(env, s, rd, tmp);
7094 tcg_gen_and_i32(tmp, tmp, tmp2);
7097 tcg_temp_free_i32(tmp);
7101 tcg_gen_xor_i32(tmp, tmp, tmp2);
7104 tcg_temp_free_i32(tmp);
7108 gen_sub_CC(tmp, tmp, tmp2);
7110 tcg_temp_free_i32(tmp);
7114 gen_add_CC(tmp, tmp, tmp2);
7116 tcg_temp_free_i32(tmp);
7119 tcg_gen_or_i32(tmp, tmp, tmp2);
7123 store_reg_bx(env, s, rd, tmp);
7126 if (logic_cc && rd == 15) {
7127 /* MOVS r15, ... is used for exception return. */
7131 gen_exception_return(s, tmp2);
7136 store_reg_bx(env, s, rd, tmp2);
7140 tcg_gen_andc_i32(tmp, tmp, tmp2);
7144 store_reg_bx(env, s, rd, tmp);
7148 tcg_gen_not_i32(tmp2, tmp2);
7152 store_reg_bx(env, s, rd, tmp2);
7155 if (op1 != 0x0f && op1 != 0x0d) {
7156 tcg_temp_free_i32(tmp2);
7159 /* other instructions */
7160 op1 = (insn >> 24) & 0xf;
7164 /* multiplies, extra load/stores */
7165 sh = (insn >> 5) & 3;
7168 rd = (insn >> 16) & 0xf;
7169 rn = (insn >> 12) & 0xf;
7170 rs = (insn >> 8) & 0xf;
7172 op1 = (insn >> 20) & 0xf;
7174 case 0: case 1: case 2: case 3: case 6:
7176 tmp = load_reg(s, rs);
7177 tmp2 = load_reg(s, rm);
7178 tcg_gen_mul_i32(tmp, tmp, tmp2);
7179 tcg_temp_free_i32(tmp2);
7180 if (insn & (1 << 22)) {
7181 /* Subtract (mls) */
7183 tmp2 = load_reg(s, rn);
7184 tcg_gen_sub_i32(tmp, tmp2, tmp);
7185 tcg_temp_free_i32(tmp2);
7186 } else if (insn & (1 << 21)) {
7188 tmp2 = load_reg(s, rn);
7189 tcg_gen_add_i32(tmp, tmp, tmp2);
7190 tcg_temp_free_i32(tmp2);
7192 if (insn & (1 << 20))
7194 store_reg(s, rd, tmp);
7197 /* 64 bit mul double accumulate (UMAAL) */
7199 tmp = load_reg(s, rs);
7200 tmp2 = load_reg(s, rm);
7201 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
7202 gen_addq_lo(s, tmp64, rn);
7203 gen_addq_lo(s, tmp64, rd);
7204 gen_storeq_reg(s, rn, rd, tmp64);
7205 tcg_temp_free_i64(tmp64);
7207 case 8: case 9: case 10: case 11:
7208 case 12: case 13: case 14: case 15:
7209 /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
7210 tmp = load_reg(s, rs);
7211 tmp2 = load_reg(s, rm);
7212 if (insn & (1 << 22)) {
7213 tmp64 = gen_muls_i64_i32(tmp, tmp2);
7215 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
7217 if (insn & (1 << 21)) { /* mult accumulate */
7218 gen_addq(s, tmp64, rn, rd);
7220 if (insn & (1 << 20)) {
7221 gen_logicq_cc(tmp64);
7223 gen_storeq_reg(s, rn, rd, tmp64);
7224 tcg_temp_free_i64(tmp64);
7230 rn = (insn >> 16) & 0xf;
7231 rd = (insn >> 12) & 0xf;
7232 if (insn & (1 << 23)) {
7233 /* load/store exclusive */
7234 op1 = (insn >> 21) & 0x3;
7239 addr = tcg_temp_local_new_i32();
7240 load_reg_var(s, addr, rn);
7241 if (insn & (1 << 20)) {
7244 gen_load_exclusive(s, rd, 15, addr, 2);
7246 case 1: /* ldrexd */
7247 gen_load_exclusive(s, rd, rd + 1, addr, 3);
7249 case 2: /* ldrexb */
7250 gen_load_exclusive(s, rd, 15, addr, 0);
7252 case 3: /* ldrexh */
7253 gen_load_exclusive(s, rd, 15, addr, 1);
7262 gen_store_exclusive(s, rd, rm, 15, addr, 2);
7264 case 1: /* strexd */
7265 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
7267 case 2: /* strexb */
7268 gen_store_exclusive(s, rd, rm, 15, addr, 0);
7270 case 3: /* strexh */
7271 gen_store_exclusive(s, rd, rm, 15, addr, 1);
7277 tcg_temp_free(addr);
7279 /* SWP instruction */
7282 /* ??? This is not really atomic. However we know
7283 we never have multiple CPUs running in parallel,
7284 so it is good enough. */
7285 addr = load_reg(s, rn);
7286 tmp = load_reg(s, rm);
7287 if (insn & (1 << 22)) {
7288 tmp2 = gen_ld8u(addr, IS_USER(s));
7289 gen_st8(tmp, addr, IS_USER(s));
7291 tmp2 = gen_ld32(addr, IS_USER(s));
7292 gen_st32(tmp, addr, IS_USER(s));
7294 tcg_temp_free_i32(addr);
7295 store_reg(s, rd, tmp2);
7301 /* Misc load/store */
7302 rn = (insn >> 16) & 0xf;
7303 rd = (insn >> 12) & 0xf;
7304 addr = load_reg(s, rn);
7305 if (insn & (1 << 24))
7306 gen_add_datah_offset(s, insn, 0, addr);
7308 if (insn & (1 << 20)) {
7312 tmp = gen_ld16u(addr, IS_USER(s));
7315 tmp = gen_ld8s(addr, IS_USER(s));
7319 tmp = gen_ld16s(addr, IS_USER(s));
7323 } else if (sh & 2) {
7328 tmp = load_reg(s, rd);
7329 gen_st32(tmp, addr, IS_USER(s));
7330 tcg_gen_addi_i32(addr, addr, 4);
7331 tmp = load_reg(s, rd + 1);
7332 gen_st32(tmp, addr, IS_USER(s));
7336 tmp = gen_ld32(addr, IS_USER(s));
7337 store_reg(s, rd, tmp);
7338 tcg_gen_addi_i32(addr, addr, 4);
7339 tmp = gen_ld32(addr, IS_USER(s));
7343 address_offset = -4;
7346 tmp = load_reg(s, rd);
7347 gen_st16(tmp, addr, IS_USER(s));
7350 /* Perform base writeback before the loaded value to
7351 ensure correct behavior with overlapping index registers.
7352 ldrd with base writeback is is undefined if the
7353 destination and index registers overlap. */
7354 if (!(insn & (1 << 24))) {
7355 gen_add_datah_offset(s, insn, address_offset, addr);
7356 store_reg(s, rn, addr);
7357 } else if (insn & (1 << 21)) {
7359 tcg_gen_addi_i32(addr, addr, address_offset);
7360 store_reg(s, rn, addr);
7362 tcg_temp_free_i32(addr);
7365 /* Complete the load. */
7366 store_reg(s, rd, tmp);
7375 if (insn & (1 << 4)) {
7377 /* Armv6 Media instructions. */
7379 rn = (insn >> 16) & 0xf;
7380 rd = (insn >> 12) & 0xf;
7381 rs = (insn >> 8) & 0xf;
7382 switch ((insn >> 23) & 3) {
7383 case 0: /* Parallel add/subtract. */
7384 op1 = (insn >> 20) & 7;
7385 tmp = load_reg(s, rn);
7386 tmp2 = load_reg(s, rm);
7387 sh = (insn >> 5) & 7;
7388 if ((op1 & 3) == 0 || sh == 5 || sh == 6)
7390 gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
7391 tcg_temp_free_i32(tmp2);
7392 store_reg(s, rd, tmp);
7395 if ((insn & 0x00700020) == 0) {
7396 /* Halfword pack. */
7397 tmp = load_reg(s, rn);
7398 tmp2 = load_reg(s, rm);
7399 shift = (insn >> 7) & 0x1f;
7400 if (insn & (1 << 6)) {
7404 tcg_gen_sari_i32(tmp2, tmp2, shift);
7405 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
7406 tcg_gen_ext16u_i32(tmp2, tmp2);
7410 tcg_gen_shli_i32(tmp2, tmp2, shift);
7411 tcg_gen_ext16u_i32(tmp, tmp);
7412 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
7414 tcg_gen_or_i32(tmp, tmp, tmp2);
7415 tcg_temp_free_i32(tmp2);
7416 store_reg(s, rd, tmp);
7417 } else if ((insn & 0x00200020) == 0x00200000) {
7419 tmp = load_reg(s, rm);
7420 shift = (insn >> 7) & 0x1f;
7421 if (insn & (1 << 6)) {
7424 tcg_gen_sari_i32(tmp, tmp, shift);
7426 tcg_gen_shli_i32(tmp, tmp, shift);
7428 sh = (insn >> 16) & 0x1f;
7429 tmp2 = tcg_const_i32(sh);
7430 if (insn & (1 << 22))
7431 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
7433 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
7434 tcg_temp_free_i32(tmp2);
7435 store_reg(s, rd, tmp);
7436 } else if ((insn & 0x00300fe0) == 0x00200f20) {
7438 tmp = load_reg(s, rm);
7439 sh = (insn >> 16) & 0x1f;
7440 tmp2 = tcg_const_i32(sh);
7441 if (insn & (1 << 22))
7442 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
7444 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
7445 tcg_temp_free_i32(tmp2);
7446 store_reg(s, rd, tmp);
7447 } else if ((insn & 0x00700fe0) == 0x00000fa0) {
7449 tmp = load_reg(s, rn);
7450 tmp2 = load_reg(s, rm);
7451 tmp3 = tcg_temp_new_i32();
7452 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
7453 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
7454 tcg_temp_free_i32(tmp3);
7455 tcg_temp_free_i32(tmp2);
7456 store_reg(s, rd, tmp);
7457 } else if ((insn & 0x000003e0) == 0x00000060) {
7458 tmp = load_reg(s, rm);
7459 shift = (insn >> 10) & 3;
7460 /* ??? In many cases it's not necessary to do a
7461 rotate, a shift is sufficient. */
7463 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
7464 op1 = (insn >> 20) & 7;
7466 case 0: gen_sxtb16(tmp); break;
7467 case 2: gen_sxtb(tmp); break;
7468 case 3: gen_sxth(tmp); break;
7469 case 4: gen_uxtb16(tmp); break;
7470 case 6: gen_uxtb(tmp); break;
7471 case 7: gen_uxth(tmp); break;
7472 default: goto illegal_op;
7475 tmp2 = load_reg(s, rn);
7476 if ((op1 & 3) == 0) {
7477 gen_add16(tmp, tmp2);
7479 tcg_gen_add_i32(tmp, tmp, tmp2);
7480 tcg_temp_free_i32(tmp2);
7483 store_reg(s, rd, tmp);
7484 } else if ((insn & 0x003f0f60) == 0x003f0f20) {
7486 tmp = load_reg(s, rm);
7487 if (insn & (1 << 22)) {
7488 if (insn & (1 << 7)) {
7492 gen_helper_rbit(tmp, tmp);
7495 if (insn & (1 << 7))
7498 tcg_gen_bswap32_i32(tmp, tmp);
7500 store_reg(s, rd, tmp);
7505 case 2: /* Multiplies (Type 3). */
7506 switch ((insn >> 20) & 0x7) {
7508 if (((insn >> 6) ^ (insn >> 7)) & 1) {
7509 /* op2 not 00x or 11x : UNDEF */
7512 /* Signed multiply most significant [accumulate].
7513 (SMMUL, SMMLA, SMMLS) */
7514 tmp = load_reg(s, rm);
7515 tmp2 = load_reg(s, rs);
7516 tmp64 = gen_muls_i64_i32(tmp, tmp2);
7519 tmp = load_reg(s, rd);
7520 if (insn & (1 << 6)) {
7521 tmp64 = gen_subq_msw(tmp64, tmp);
7523 tmp64 = gen_addq_msw(tmp64, tmp);
7526 if (insn & (1 << 5)) {
7527 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
7529 tcg_gen_shri_i64(tmp64, tmp64, 32);
7530 tmp = tcg_temp_new_i32();
7531 tcg_gen_trunc_i64_i32(tmp, tmp64);
7532 tcg_temp_free_i64(tmp64);
7533 store_reg(s, rn, tmp);
7537 /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
7538 if (insn & (1 << 7)) {
7541 tmp = load_reg(s, rm);
7542 tmp2 = load_reg(s, rs);
7543 if (insn & (1 << 5))
7544 gen_swap_half(tmp2);
7545 gen_smul_dual(tmp, tmp2);
7546 if (insn & (1 << 6)) {
7547 /* This subtraction cannot overflow. */
7548 tcg_gen_sub_i32(tmp, tmp, tmp2);
7550 /* This addition cannot overflow 32 bits;
7551 * however it may overflow considered as a signed
7552 * operation, in which case we must set the Q flag.
7554 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
7556 tcg_temp_free_i32(tmp2);
7557 if (insn & (1 << 22)) {
7558 /* smlald, smlsld */
7559 tmp64 = tcg_temp_new_i64();
7560 tcg_gen_ext_i32_i64(tmp64, tmp);
7561 tcg_temp_free_i32(tmp);
7562 gen_addq(s, tmp64, rd, rn);
7563 gen_storeq_reg(s, rd, rn, tmp64);
7564 tcg_temp_free_i64(tmp64);
7566 /* smuad, smusd, smlad, smlsd */
7569 tmp2 = load_reg(s, rd);
7570 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
7571 tcg_temp_free_i32(tmp2);
7573 store_reg(s, rn, tmp);
7579 if (!arm_feature(env, ARM_FEATURE_ARM_DIV)) {
7582 if (((insn >> 5) & 7) || (rd != 15)) {
7585 tmp = load_reg(s, rm);
7586 tmp2 = load_reg(s, rs);
7587 if (insn & (1 << 21)) {
7588 gen_helper_udiv(tmp, tmp, tmp2);
7590 gen_helper_sdiv(tmp, tmp, tmp2);
7592 tcg_temp_free_i32(tmp2);
7593 store_reg(s, rn, tmp);
7600 op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
7602 case 0: /* Unsigned sum of absolute differences. */
7604 tmp = load_reg(s, rm);
7605 tmp2 = load_reg(s, rs);
7606 gen_helper_usad8(tmp, tmp, tmp2);
7607 tcg_temp_free_i32(tmp2);
7609 tmp2 = load_reg(s, rd);
7610 tcg_gen_add_i32(tmp, tmp, tmp2);
7611 tcg_temp_free_i32(tmp2);
7613 store_reg(s, rn, tmp);
7615 case 0x20: case 0x24: case 0x28: case 0x2c:
7616 /* Bitfield insert/clear. */
7618 shift = (insn >> 7) & 0x1f;
7619 i = (insn >> 16) & 0x1f;
7622 tmp = tcg_temp_new_i32();
7623 tcg_gen_movi_i32(tmp, 0);
7625 tmp = load_reg(s, rm);
7628 tmp2 = load_reg(s, rd);
7629 tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
7630 tcg_temp_free_i32(tmp2);
7632 store_reg(s, rd, tmp);
7634 case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
7635 case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
7637 tmp = load_reg(s, rm);
7638 shift = (insn >> 7) & 0x1f;
7639 i = ((insn >> 16) & 0x1f) + 1;
7644 gen_ubfx(tmp, shift, (1u << i) - 1);
7646 gen_sbfx(tmp, shift, i);
7649 store_reg(s, rd, tmp);
7659 /* Check for undefined extension instructions
7660 * per the ARM Bible IE:
7661 * xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
7663 sh = (0xf << 20) | (0xf << 4);
7664 if (op1 == 0x7 && ((insn & sh) == sh))
7668 /* load/store byte/word */
7669 rn = (insn >> 16) & 0xf;
7670 rd = (insn >> 12) & 0xf;
7671 tmp2 = load_reg(s, rn);
7672 i = (IS_USER(s) || (insn & 0x01200000) == 0x00200000);
7673 if (insn & (1 << 24))
7674 gen_add_data_offset(s, insn, tmp2);
7675 if (insn & (1 << 20)) {
7677 if (insn & (1 << 22)) {
7678 tmp = gen_ld8u(tmp2, i);
7680 tmp = gen_ld32(tmp2, i);
7684 tmp = load_reg(s, rd);
7685 if (insn & (1 << 22))
7686 gen_st8(tmp, tmp2, i);
7688 gen_st32(tmp, tmp2, i);
7690 if (!(insn & (1 << 24))) {
7691 gen_add_data_offset(s, insn, tmp2);
7692 store_reg(s, rn, tmp2);
7693 } else if (insn & (1 << 21)) {
7694 store_reg(s, rn, tmp2);
7696 tcg_temp_free_i32(tmp2);
7698 if (insn & (1 << 20)) {
7699 /* Complete the load. */
7700 store_reg_from_load(env, s, rd, tmp);
7706 int j, n, user, loaded_base;
7708 /* load/store multiple words */
7709 /* XXX: store correct base if write back */
7711 if (insn & (1 << 22)) {
7713 goto illegal_op; /* only usable in supervisor mode */
7715 if ((insn & (1 << 15)) == 0)
7718 rn = (insn >> 16) & 0xf;
7719 addr = load_reg(s, rn);
7721 /* compute total size */
7723 TCGV_UNUSED(loaded_var);
7726 if (insn & (1 << i))
7729 /* XXX: test invalid n == 0 case ? */
7730 if (insn & (1 << 23)) {
7731 if (insn & (1 << 24)) {
7733 tcg_gen_addi_i32(addr, addr, 4);
7735 /* post increment */
7738 if (insn & (1 << 24)) {
7740 tcg_gen_addi_i32(addr, addr, -(n * 4));
7742 /* post decrement */
7744 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7749 if (insn & (1 << i)) {
7750 if (insn & (1 << 20)) {
7752 tmp = gen_ld32(addr, IS_USER(s));
7754 tmp2 = tcg_const_i32(i);
7755 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
7756 tcg_temp_free_i32(tmp2);
7757 tcg_temp_free_i32(tmp);
7758 } else if (i == rn) {
7762 store_reg_from_load(env, s, i, tmp);
7767 /* special case: r15 = PC + 8 */
7768 val = (long)s->pc + 4;
7769 tmp = tcg_temp_new_i32();
7770 tcg_gen_movi_i32(tmp, val);
7772 tmp = tcg_temp_new_i32();
7773 tmp2 = tcg_const_i32(i);
7774 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
7775 tcg_temp_free_i32(tmp2);
7777 tmp = load_reg(s, i);
7779 gen_st32(tmp, addr, IS_USER(s));
7782 /* no need to add after the last transfer */
7784 tcg_gen_addi_i32(addr, addr, 4);
7787 if (insn & (1 << 21)) {
7789 if (insn & (1 << 23)) {
7790 if (insn & (1 << 24)) {
7793 /* post increment */
7794 tcg_gen_addi_i32(addr, addr, 4);
7797 if (insn & (1 << 24)) {
7800 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7802 /* post decrement */
7803 tcg_gen_addi_i32(addr, addr, -(n * 4));
7806 store_reg(s, rn, addr);
7808 tcg_temp_free_i32(addr);
7811 store_reg(s, rn, loaded_var);
7813 if ((insn & (1 << 22)) && !user) {
7814 /* Restore CPSR from SPSR. */
7815 tmp = load_cpu_field(spsr);
7816 gen_set_cpsr(tmp, 0xffffffff);
7817 tcg_temp_free_i32(tmp);
7818 s->is_jmp = DISAS_UPDATE;
7827 /* branch (and link) */
7828 val = (int32_t)s->pc;
7829 if (insn & (1 << 24)) {
7830 tmp = tcg_temp_new_i32();
7831 tcg_gen_movi_i32(tmp, val);
7832 store_reg(s, 14, tmp);
7834 offset = (((int32_t)insn << 8) >> 8);
7835 val += (offset << 2) + 4;
7843 if (disas_coproc_insn(env, s, insn))
7848 gen_set_pc_im(s->pc);
7849 s->is_jmp = DISAS_SWI;
7853 gen_exception_insn(s, 4, EXCP_UDEF);
7859 /* Return true if this is a Thumb-2 logical op. */
7861 thumb2_logic_op(int op)
7866 /* Generate code for a Thumb-2 data processing operation. If CONDS is nonzero
7867 then set condition code flags based on the result of the operation.
7868 If SHIFTER_OUT is nonzero then set the carry flag for logical operations
7869 to the high bit of T1.
7870 Returns zero if the opcode is valid. */
7873 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCGv t0, TCGv t1)
7880 tcg_gen_and_i32(t0, t0, t1);
7884 tcg_gen_andc_i32(t0, t0, t1);
7888 tcg_gen_or_i32(t0, t0, t1);
7892 tcg_gen_orc_i32(t0, t0, t1);
7896 tcg_gen_xor_i32(t0, t0, t1);
7901 gen_add_CC(t0, t0, t1);
7903 tcg_gen_add_i32(t0, t0, t1);
7907 gen_helper_adc_cc(t0, cpu_env, t0, t1);
7913 gen_helper_sbc_cc(t0, cpu_env, t0, t1);
7915 gen_sub_carry(t0, t0, t1);
7919 gen_sub_CC(t0, t0, t1);
7921 tcg_gen_sub_i32(t0, t0, t1);
7925 gen_sub_CC(t0, t1, t0);
7927 tcg_gen_sub_i32(t0, t1, t0);
7929 default: /* 5, 6, 7, 9, 12, 15. */
7935 gen_set_CF_bit31(t1);
7940 /* Translate a 32-bit thumb instruction. Returns nonzero if the instruction
7942 static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw1)
7944 uint32_t insn, imm, shift, offset;
7945 uint32_t rd, rn, rm, rs;
7956 if (!(arm_feature(env, ARM_FEATURE_THUMB2)
7957 || arm_feature (env, ARM_FEATURE_M))) {
7958 /* Thumb-1 cores may need to treat bl and blx as a pair of
7959 16-bit instructions to get correct prefetch abort behavior. */
7961 if ((insn & (1 << 12)) == 0) {
7963 /* Second half of blx. */
7964 offset = ((insn & 0x7ff) << 1);
7965 tmp = load_reg(s, 14);
7966 tcg_gen_addi_i32(tmp, tmp, offset);
7967 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
7969 tmp2 = tcg_temp_new_i32();
7970 tcg_gen_movi_i32(tmp2, s->pc | 1);
7971 store_reg(s, 14, tmp2);
7975 if (insn & (1 << 11)) {
7976 /* Second half of bl. */
7977 offset = ((insn & 0x7ff) << 1) | 1;
7978 tmp = load_reg(s, 14);
7979 tcg_gen_addi_i32(tmp, tmp, offset);
7981 tmp2 = tcg_temp_new_i32();
7982 tcg_gen_movi_i32(tmp2, s->pc | 1);
7983 store_reg(s, 14, tmp2);
7987 if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
7988 /* Instruction spans a page boundary. Implement it as two
7989 16-bit instructions in case the second half causes an
7991 offset = ((int32_t)insn << 21) >> 9;
7992 tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset);
7995 /* Fall through to 32-bit decode. */
7998 insn = arm_lduw_code(env, s->pc, s->bswap_code);
8000 insn |= (uint32_t)insn_hw1 << 16;
8002 if ((insn & 0xf800e800) != 0xf000e800) {
8006 rn = (insn >> 16) & 0xf;
8007 rs = (insn >> 12) & 0xf;
8008 rd = (insn >> 8) & 0xf;
8010 switch ((insn >> 25) & 0xf) {
8011 case 0: case 1: case 2: case 3:
8012 /* 16-bit instructions. Should never happen. */
8015 if (insn & (1 << 22)) {
8016 /* Other load/store, table branch. */
8017 if (insn & 0x01200000) {
8018 /* Load/store doubleword. */
8020 addr = tcg_temp_new_i32();
8021 tcg_gen_movi_i32(addr, s->pc & ~3);
8023 addr = load_reg(s, rn);
8025 offset = (insn & 0xff) * 4;
8026 if ((insn & (1 << 23)) == 0)
8028 if (insn & (1 << 24)) {
8029 tcg_gen_addi_i32(addr, addr, offset);
8032 if (insn & (1 << 20)) {
8034 tmp = gen_ld32(addr, IS_USER(s));
8035 store_reg(s, rs, tmp);
8036 tcg_gen_addi_i32(addr, addr, 4);
8037 tmp = gen_ld32(addr, IS_USER(s));
8038 store_reg(s, rd, tmp);
8041 tmp = load_reg(s, rs);
8042 gen_st32(tmp, addr, IS_USER(s));
8043 tcg_gen_addi_i32(addr, addr, 4);
8044 tmp = load_reg(s, rd);
8045 gen_st32(tmp, addr, IS_USER(s));
8047 if (insn & (1 << 21)) {
8048 /* Base writeback. */
8051 tcg_gen_addi_i32(addr, addr, offset - 4);
8052 store_reg(s, rn, addr);
8054 tcg_temp_free_i32(addr);
8056 } else if ((insn & (1 << 23)) == 0) {
8057 /* Load/store exclusive word. */
8058 addr = tcg_temp_local_new();
8059 load_reg_var(s, addr, rn);
8060 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
8061 if (insn & (1 << 20)) {
8062 gen_load_exclusive(s, rs, 15, addr, 2);
8064 gen_store_exclusive(s, rd, rs, 15, addr, 2);
8066 tcg_temp_free(addr);
8067 } else if ((insn & (1 << 6)) == 0) {
8070 addr = tcg_temp_new_i32();
8071 tcg_gen_movi_i32(addr, s->pc);
8073 addr = load_reg(s, rn);
8075 tmp = load_reg(s, rm);
8076 tcg_gen_add_i32(addr, addr, tmp);
8077 if (insn & (1 << 4)) {
8079 tcg_gen_add_i32(addr, addr, tmp);
8080 tcg_temp_free_i32(tmp);
8081 tmp = gen_ld16u(addr, IS_USER(s));
8083 tcg_temp_free_i32(tmp);
8084 tmp = gen_ld8u(addr, IS_USER(s));
8086 tcg_temp_free_i32(addr);
8087 tcg_gen_shli_i32(tmp, tmp, 1);
8088 tcg_gen_addi_i32(tmp, tmp, s->pc);
8089 store_reg(s, 15, tmp);
8091 /* Load/store exclusive byte/halfword/doubleword. */
8093 op = (insn >> 4) & 0x3;
8097 addr = tcg_temp_local_new();
8098 load_reg_var(s, addr, rn);
8099 if (insn & (1 << 20)) {
8100 gen_load_exclusive(s, rs, rd, addr, op);
8102 gen_store_exclusive(s, rm, rs, rd, addr, op);
8104 tcg_temp_free(addr);
8107 /* Load/store multiple, RFE, SRS. */
8108 if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
8109 /* Not available in user mode. */
8112 if (insn & (1 << 20)) {
8114 addr = load_reg(s, rn);
8115 if ((insn & (1 << 24)) == 0)
8116 tcg_gen_addi_i32(addr, addr, -8);
8117 /* Load PC into tmp and CPSR into tmp2. */
8118 tmp = gen_ld32(addr, 0);
8119 tcg_gen_addi_i32(addr, addr, 4);
8120 tmp2 = gen_ld32(addr, 0);
8121 if (insn & (1 << 21)) {
8122 /* Base writeback. */
8123 if (insn & (1 << 24)) {
8124 tcg_gen_addi_i32(addr, addr, 4);
8126 tcg_gen_addi_i32(addr, addr, -4);
8128 store_reg(s, rn, addr);
8130 tcg_temp_free_i32(addr);
8132 gen_rfe(s, tmp, tmp2);
8136 addr = tcg_temp_new_i32();
8137 tmp = tcg_const_i32(op);
8138 gen_helper_get_r13_banked(addr, cpu_env, tmp);
8139 tcg_temp_free_i32(tmp);
8140 if ((insn & (1 << 24)) == 0) {
8141 tcg_gen_addi_i32(addr, addr, -8);
8143 tmp = load_reg(s, 14);
8144 gen_st32(tmp, addr, 0);
8145 tcg_gen_addi_i32(addr, addr, 4);
8146 tmp = tcg_temp_new_i32();
8147 gen_helper_cpsr_read(tmp, cpu_env);
8148 gen_st32(tmp, addr, 0);
8149 if (insn & (1 << 21)) {
8150 if ((insn & (1 << 24)) == 0) {
8151 tcg_gen_addi_i32(addr, addr, -4);
8153 tcg_gen_addi_i32(addr, addr, 4);
8155 tmp = tcg_const_i32(op);
8156 gen_helper_set_r13_banked(cpu_env, tmp, addr);
8157 tcg_temp_free_i32(tmp);
8159 tcg_temp_free_i32(addr);
8163 int i, loaded_base = 0;
8165 /* Load/store multiple. */
8166 addr = load_reg(s, rn);
8168 for (i = 0; i < 16; i++) {
8169 if (insn & (1 << i))
8172 if (insn & (1 << 24)) {
8173 tcg_gen_addi_i32(addr, addr, -offset);
8176 TCGV_UNUSED(loaded_var);
8177 for (i = 0; i < 16; i++) {
8178 if ((insn & (1 << i)) == 0)
8180 if (insn & (1 << 20)) {
8182 tmp = gen_ld32(addr, IS_USER(s));
8185 } else if (i == rn) {
8189 store_reg(s, i, tmp);
8193 tmp = load_reg(s, i);
8194 gen_st32(tmp, addr, IS_USER(s));
8196 tcg_gen_addi_i32(addr, addr, 4);
8199 store_reg(s, rn, loaded_var);
8201 if (insn & (1 << 21)) {
8202 /* Base register writeback. */
8203 if (insn & (1 << 24)) {
8204 tcg_gen_addi_i32(addr, addr, -offset);
8206 /* Fault if writeback register is in register list. */
8207 if (insn & (1 << rn))
8209 store_reg(s, rn, addr);
8211 tcg_temp_free_i32(addr);
8218 op = (insn >> 21) & 0xf;
8220 /* Halfword pack. */
8221 tmp = load_reg(s, rn);
8222 tmp2 = load_reg(s, rm);
8223 shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
8224 if (insn & (1 << 5)) {
8228 tcg_gen_sari_i32(tmp2, tmp2, shift);
8229 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
8230 tcg_gen_ext16u_i32(tmp2, tmp2);
8234 tcg_gen_shli_i32(tmp2, tmp2, shift);
8235 tcg_gen_ext16u_i32(tmp, tmp);
8236 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
8238 tcg_gen_or_i32(tmp, tmp, tmp2);
8239 tcg_temp_free_i32(tmp2);
8240 store_reg(s, rd, tmp);
8242 /* Data processing register constant shift. */
8244 tmp = tcg_temp_new_i32();
8245 tcg_gen_movi_i32(tmp, 0);
8247 tmp = load_reg(s, rn);
8249 tmp2 = load_reg(s, rm);
8251 shiftop = (insn >> 4) & 3;
8252 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
8253 conds = (insn & (1 << 20)) != 0;
8254 logic_cc = (conds && thumb2_logic_op(op));
8255 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
8256 if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
8258 tcg_temp_free_i32(tmp2);
8260 store_reg(s, rd, tmp);
8262 tcg_temp_free_i32(tmp);
8266 case 13: /* Misc data processing. */
8267 op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
8268 if (op < 4 && (insn & 0xf000) != 0xf000)
8271 case 0: /* Register controlled shift. */
8272 tmp = load_reg(s, rn);
8273 tmp2 = load_reg(s, rm);
8274 if ((insn & 0x70) != 0)
8276 op = (insn >> 21) & 3;
8277 logic_cc = (insn & (1 << 20)) != 0;
8278 gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
8281 store_reg_bx(env, s, rd, tmp);
8283 case 1: /* Sign/zero extend. */
8284 tmp = load_reg(s, rm);
8285 shift = (insn >> 4) & 3;
8286 /* ??? In many cases it's not necessary to do a
8287 rotate, a shift is sufficient. */
8289 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
8290 op = (insn >> 20) & 7;
8292 case 0: gen_sxth(tmp); break;
8293 case 1: gen_uxth(tmp); break;
8294 case 2: gen_sxtb16(tmp); break;
8295 case 3: gen_uxtb16(tmp); break;
8296 case 4: gen_sxtb(tmp); break;
8297 case 5: gen_uxtb(tmp); break;
8298 default: goto illegal_op;
8301 tmp2 = load_reg(s, rn);
8302 if ((op >> 1) == 1) {
8303 gen_add16(tmp, tmp2);
8305 tcg_gen_add_i32(tmp, tmp, tmp2);
8306 tcg_temp_free_i32(tmp2);
8309 store_reg(s, rd, tmp);
8311 case 2: /* SIMD add/subtract. */
8312 op = (insn >> 20) & 7;
8313 shift = (insn >> 4) & 7;
8314 if ((op & 3) == 3 || (shift & 3) == 3)
8316 tmp = load_reg(s, rn);
8317 tmp2 = load_reg(s, rm);
8318 gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
8319 tcg_temp_free_i32(tmp2);
8320 store_reg(s, rd, tmp);
8322 case 3: /* Other data processing. */
8323 op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
8325 /* Saturating add/subtract. */
8326 tmp = load_reg(s, rn);
8327 tmp2 = load_reg(s, rm);
8329 gen_helper_double_saturate(tmp, cpu_env, tmp);
8331 gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
8333 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
8334 tcg_temp_free_i32(tmp2);
8336 tmp = load_reg(s, rn);
8338 case 0x0a: /* rbit */
8339 gen_helper_rbit(tmp, tmp);
8341 case 0x08: /* rev */
8342 tcg_gen_bswap32_i32(tmp, tmp);
8344 case 0x09: /* rev16 */
8347 case 0x0b: /* revsh */
8350 case 0x10: /* sel */
8351 tmp2 = load_reg(s, rm);
8352 tmp3 = tcg_temp_new_i32();
8353 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
8354 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
8355 tcg_temp_free_i32(tmp3);
8356 tcg_temp_free_i32(tmp2);
8358 case 0x18: /* clz */
8359 gen_helper_clz(tmp, tmp);
8365 store_reg(s, rd, tmp);
8367 case 4: case 5: /* 32-bit multiply. Sum of absolute differences. */
8368 op = (insn >> 4) & 0xf;
8369 tmp = load_reg(s, rn);
8370 tmp2 = load_reg(s, rm);
8371 switch ((insn >> 20) & 7) {
8372 case 0: /* 32 x 32 -> 32 */
8373 tcg_gen_mul_i32(tmp, tmp, tmp2);
8374 tcg_temp_free_i32(tmp2);
8376 tmp2 = load_reg(s, rs);
8378 tcg_gen_sub_i32(tmp, tmp2, tmp);
8380 tcg_gen_add_i32(tmp, tmp, tmp2);
8381 tcg_temp_free_i32(tmp2);
8384 case 1: /* 16 x 16 -> 32 */
8385 gen_mulxy(tmp, tmp2, op & 2, op & 1);
8386 tcg_temp_free_i32(tmp2);
8388 tmp2 = load_reg(s, rs);
8389 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8390 tcg_temp_free_i32(tmp2);
8393 case 2: /* Dual multiply add. */
8394 case 4: /* Dual multiply subtract. */
8396 gen_swap_half(tmp2);
8397 gen_smul_dual(tmp, tmp2);
8398 if (insn & (1 << 22)) {
8399 /* This subtraction cannot overflow. */
8400 tcg_gen_sub_i32(tmp, tmp, tmp2);
8402 /* This addition cannot overflow 32 bits;
8403 * however it may overflow considered as a signed
8404 * operation, in which case we must set the Q flag.
8406 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8408 tcg_temp_free_i32(tmp2);
8411 tmp2 = load_reg(s, rs);
8412 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8413 tcg_temp_free_i32(tmp2);
8416 case 3: /* 32 * 16 -> 32msb */
8418 tcg_gen_sari_i32(tmp2, tmp2, 16);
8421 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8422 tcg_gen_shri_i64(tmp64, tmp64, 16);
8423 tmp = tcg_temp_new_i32();
8424 tcg_gen_trunc_i64_i32(tmp, tmp64);
8425 tcg_temp_free_i64(tmp64);
8428 tmp2 = load_reg(s, rs);
8429 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8430 tcg_temp_free_i32(tmp2);
8433 case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
8434 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8436 tmp = load_reg(s, rs);
8437 if (insn & (1 << 20)) {
8438 tmp64 = gen_addq_msw(tmp64, tmp);
8440 tmp64 = gen_subq_msw(tmp64, tmp);
8443 if (insn & (1 << 4)) {
8444 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
8446 tcg_gen_shri_i64(tmp64, tmp64, 32);
8447 tmp = tcg_temp_new_i32();
8448 tcg_gen_trunc_i64_i32(tmp, tmp64);
8449 tcg_temp_free_i64(tmp64);
8451 case 7: /* Unsigned sum of absolute differences. */
8452 gen_helper_usad8(tmp, tmp, tmp2);
8453 tcg_temp_free_i32(tmp2);
8455 tmp2 = load_reg(s, rs);
8456 tcg_gen_add_i32(tmp, tmp, tmp2);
8457 tcg_temp_free_i32(tmp2);
8461 store_reg(s, rd, tmp);
8463 case 6: case 7: /* 64-bit multiply, Divide. */
8464 op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
8465 tmp = load_reg(s, rn);
8466 tmp2 = load_reg(s, rm);
8467 if ((op & 0x50) == 0x10) {
8469 if (!arm_feature(env, ARM_FEATURE_THUMB_DIV)) {
8473 gen_helper_udiv(tmp, tmp, tmp2);
8475 gen_helper_sdiv(tmp, tmp, tmp2);
8476 tcg_temp_free_i32(tmp2);
8477 store_reg(s, rd, tmp);
8478 } else if ((op & 0xe) == 0xc) {
8479 /* Dual multiply accumulate long. */
8481 gen_swap_half(tmp2);
8482 gen_smul_dual(tmp, tmp2);
8484 tcg_gen_sub_i32(tmp, tmp, tmp2);
8486 tcg_gen_add_i32(tmp, tmp, tmp2);
8488 tcg_temp_free_i32(tmp2);
8490 tmp64 = tcg_temp_new_i64();
8491 tcg_gen_ext_i32_i64(tmp64, tmp);
8492 tcg_temp_free_i32(tmp);
8493 gen_addq(s, tmp64, rs, rd);
8494 gen_storeq_reg(s, rs, rd, tmp64);
8495 tcg_temp_free_i64(tmp64);
8498 /* Unsigned 64-bit multiply */
8499 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
8503 gen_mulxy(tmp, tmp2, op & 2, op & 1);
8504 tcg_temp_free_i32(tmp2);
8505 tmp64 = tcg_temp_new_i64();
8506 tcg_gen_ext_i32_i64(tmp64, tmp);
8507 tcg_temp_free_i32(tmp);
8509 /* Signed 64-bit multiply */
8510 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8515 gen_addq_lo(s, tmp64, rs);
8516 gen_addq_lo(s, tmp64, rd);
8517 } else if (op & 0x40) {
8518 /* 64-bit accumulate. */
8519 gen_addq(s, tmp64, rs, rd);
8521 gen_storeq_reg(s, rs, rd, tmp64);
8522 tcg_temp_free_i64(tmp64);
8527 case 6: case 7: case 14: case 15:
8529 if (((insn >> 24) & 3) == 3) {
8530 /* Translate into the equivalent ARM encoding. */
8531 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
8532 if (disas_neon_data_insn(env, s, insn))
8535 if (insn & (1 << 28))
8537 if (disas_coproc_insn (env, s, insn))
8541 case 8: case 9: case 10: case 11:
8542 if (insn & (1 << 15)) {
8543 /* Branches, misc control. */
8544 if (insn & 0x5000) {
8545 /* Unconditional branch. */
8546 /* signextend(hw1[10:0]) -> offset[:12]. */
8547 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
8548 /* hw1[10:0] -> offset[11:1]. */
8549 offset |= (insn & 0x7ff) << 1;
8550 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
8551 offset[24:22] already have the same value because of the
8552 sign extension above. */
8553 offset ^= ((~insn) & (1 << 13)) << 10;
8554 offset ^= ((~insn) & (1 << 11)) << 11;
8556 if (insn & (1 << 14)) {
8557 /* Branch and link. */
8558 tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
8562 if (insn & (1 << 12)) {
8567 offset &= ~(uint32_t)2;
8568 /* thumb2 bx, no need to check */
8569 gen_bx_im(s, offset);
8571 } else if (((insn >> 23) & 7) == 7) {
8573 if (insn & (1 << 13))
8576 if (insn & (1 << 26)) {
8577 /* Secure monitor call (v6Z) */
8578 goto illegal_op; /* not implemented. */
8580 op = (insn >> 20) & 7;
8582 case 0: /* msr cpsr. */
8584 tmp = load_reg(s, rn);
8585 addr = tcg_const_i32(insn & 0xff);
8586 gen_helper_v7m_msr(cpu_env, addr, tmp);
8587 tcg_temp_free_i32(addr);
8588 tcg_temp_free_i32(tmp);
8593 case 1: /* msr spsr. */
8596 tmp = load_reg(s, rn);
8598 msr_mask(env, s, (insn >> 8) & 0xf, op == 1),
8602 case 2: /* cps, nop-hint. */
8603 if (((insn >> 8) & 7) == 0) {
8604 gen_nop_hint(s, insn & 0xff);
8606 /* Implemented as NOP in user mode. */
8611 if (insn & (1 << 10)) {
8612 if (insn & (1 << 7))
8614 if (insn & (1 << 6))
8616 if (insn & (1 << 5))
8618 if (insn & (1 << 9))
8619 imm = CPSR_A | CPSR_I | CPSR_F;
8621 if (insn & (1 << 8)) {
8623 imm |= (insn & 0x1f);
8626 gen_set_psr_im(s, offset, 0, imm);
8629 case 3: /* Special control operations. */
8631 op = (insn >> 4) & 0xf;
8639 /* These execute as NOPs. */
8646 /* Trivial implementation equivalent to bx. */
8647 tmp = load_reg(s, rn);
8650 case 5: /* Exception return. */
8654 if (rn != 14 || rd != 15) {
8657 tmp = load_reg(s, rn);
8658 tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
8659 gen_exception_return(s, tmp);
8661 case 6: /* mrs cpsr. */
8662 tmp = tcg_temp_new_i32();
8664 addr = tcg_const_i32(insn & 0xff);
8665 gen_helper_v7m_mrs(tmp, cpu_env, addr);
8666 tcg_temp_free_i32(addr);
8668 gen_helper_cpsr_read(tmp, cpu_env);
8670 store_reg(s, rd, tmp);
8672 case 7: /* mrs spsr. */
8673 /* Not accessible in user mode. */
8674 if (IS_USER(s) || IS_M(env))
8676 tmp = load_cpu_field(spsr);
8677 store_reg(s, rd, tmp);
8682 /* Conditional branch. */
8683 op = (insn >> 22) & 0xf;
8684 /* Generate a conditional jump to next instruction. */
8685 s->condlabel = gen_new_label();
8686 gen_test_cc(op ^ 1, s->condlabel);
8689 /* offset[11:1] = insn[10:0] */
8690 offset = (insn & 0x7ff) << 1;
8691 /* offset[17:12] = insn[21:16]. */
8692 offset |= (insn & 0x003f0000) >> 4;
8693 /* offset[31:20] = insn[26]. */
8694 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
8695 /* offset[18] = insn[13]. */
8696 offset |= (insn & (1 << 13)) << 5;
8697 /* offset[19] = insn[11]. */
8698 offset |= (insn & (1 << 11)) << 8;
8700 /* jump to the offset */
8701 gen_jmp(s, s->pc + offset);
8704 /* Data processing immediate. */
8705 if (insn & (1 << 25)) {
8706 if (insn & (1 << 24)) {
8707 if (insn & (1 << 20))
8709 /* Bitfield/Saturate. */
8710 op = (insn >> 21) & 7;
8712 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
8714 tmp = tcg_temp_new_i32();
8715 tcg_gen_movi_i32(tmp, 0);
8717 tmp = load_reg(s, rn);
8720 case 2: /* Signed bitfield extract. */
8722 if (shift + imm > 32)
8725 gen_sbfx(tmp, shift, imm);
8727 case 6: /* Unsigned bitfield extract. */
8729 if (shift + imm > 32)
8732 gen_ubfx(tmp, shift, (1u << imm) - 1);
8734 case 3: /* Bitfield insert/clear. */
8737 imm = imm + 1 - shift;
8739 tmp2 = load_reg(s, rd);
8740 tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
8741 tcg_temp_free_i32(tmp2);
8746 default: /* Saturate. */
8749 tcg_gen_sari_i32(tmp, tmp, shift);
8751 tcg_gen_shli_i32(tmp, tmp, shift);
8753 tmp2 = tcg_const_i32(imm);
8756 if ((op & 1) && shift == 0)
8757 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
8759 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
8762 if ((op & 1) && shift == 0)
8763 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
8765 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
8767 tcg_temp_free_i32(tmp2);
8770 store_reg(s, rd, tmp);
8772 imm = ((insn & 0x04000000) >> 15)
8773 | ((insn & 0x7000) >> 4) | (insn & 0xff);
8774 if (insn & (1 << 22)) {
8775 /* 16-bit immediate. */
8776 imm |= (insn >> 4) & 0xf000;
8777 if (insn & (1 << 23)) {
8779 tmp = load_reg(s, rd);
8780 tcg_gen_ext16u_i32(tmp, tmp);
8781 tcg_gen_ori_i32(tmp, tmp, imm << 16);
8784 tmp = tcg_temp_new_i32();
8785 tcg_gen_movi_i32(tmp, imm);
8788 /* Add/sub 12-bit immediate. */
8790 offset = s->pc & ~(uint32_t)3;
8791 if (insn & (1 << 23))
8795 tmp = tcg_temp_new_i32();
8796 tcg_gen_movi_i32(tmp, offset);
8798 tmp = load_reg(s, rn);
8799 if (insn & (1 << 23))
8800 tcg_gen_subi_i32(tmp, tmp, imm);
8802 tcg_gen_addi_i32(tmp, tmp, imm);
8805 store_reg(s, rd, tmp);
8808 int shifter_out = 0;
8809 /* modified 12-bit immediate. */
8810 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
8811 imm = (insn & 0xff);
8814 /* Nothing to do. */
8816 case 1: /* 00XY00XY */
8819 case 2: /* XY00XY00 */
8823 case 3: /* XYXYXYXY */
8827 default: /* Rotated constant. */
8828 shift = (shift << 1) | (imm >> 7);
8830 imm = imm << (32 - shift);
8834 tmp2 = tcg_temp_new_i32();
8835 tcg_gen_movi_i32(tmp2, imm);
8836 rn = (insn >> 16) & 0xf;
8838 tmp = tcg_temp_new_i32();
8839 tcg_gen_movi_i32(tmp, 0);
8841 tmp = load_reg(s, rn);
8843 op = (insn >> 21) & 0xf;
8844 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
8845 shifter_out, tmp, tmp2))
8847 tcg_temp_free_i32(tmp2);
8848 rd = (insn >> 8) & 0xf;
8850 store_reg(s, rd, tmp);
8852 tcg_temp_free_i32(tmp);
8857 case 12: /* Load/store single data item. */
8862 if ((insn & 0x01100000) == 0x01000000) {
8863 if (disas_neon_ls_insn(env, s, insn))
8867 op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
8869 if (!(insn & (1 << 20))) {
8873 /* Byte or halfword load space with dest == r15 : memory hints.
8874 * Catch them early so we don't emit pointless addressing code.
8875 * This space is a mix of:
8876 * PLD/PLDW/PLI, which we implement as NOPs (note that unlike
8877 * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
8879 * unallocated hints, which must be treated as NOPs
8880 * UNPREDICTABLE space, which we NOP or UNDEF depending on
8881 * which is easiest for the decoding logic
8882 * Some space which must UNDEF
8884 int op1 = (insn >> 23) & 3;
8885 int op2 = (insn >> 6) & 0x3f;
8890 /* UNPREDICTABLE, unallocated hint or
8891 * PLD/PLDW/PLI (literal)
8896 return 0; /* PLD/PLDW/PLI or unallocated hint */
8898 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
8899 return 0; /* PLD/PLDW/PLI or unallocated hint */
8901 /* UNDEF space, or an UNPREDICTABLE */
8907 addr = tcg_temp_new_i32();
8909 /* s->pc has already been incremented by 4. */
8910 imm = s->pc & 0xfffffffc;
8911 if (insn & (1 << 23))
8912 imm += insn & 0xfff;
8914 imm -= insn & 0xfff;
8915 tcg_gen_movi_i32(addr, imm);
8917 addr = load_reg(s, rn);
8918 if (insn & (1 << 23)) {
8919 /* Positive offset. */
8921 tcg_gen_addi_i32(addr, addr, imm);
8924 switch ((insn >> 8) & 0xf) {
8925 case 0x0: /* Shifted Register. */
8926 shift = (insn >> 4) & 0xf;
8928 tcg_temp_free_i32(addr);
8931 tmp = load_reg(s, rm);
8933 tcg_gen_shli_i32(tmp, tmp, shift);
8934 tcg_gen_add_i32(addr, addr, tmp);
8935 tcg_temp_free_i32(tmp);
8937 case 0xc: /* Negative offset. */
8938 tcg_gen_addi_i32(addr, addr, -imm);
8940 case 0xe: /* User privilege. */
8941 tcg_gen_addi_i32(addr, addr, imm);
8944 case 0x9: /* Post-decrement. */
8947 case 0xb: /* Post-increment. */
8951 case 0xd: /* Pre-decrement. */
8954 case 0xf: /* Pre-increment. */
8955 tcg_gen_addi_i32(addr, addr, imm);
8959 tcg_temp_free_i32(addr);
8964 if (insn & (1 << 20)) {
8967 case 0: tmp = gen_ld8u(addr, user); break;
8968 case 4: tmp = gen_ld8s(addr, user); break;
8969 case 1: tmp = gen_ld16u(addr, user); break;
8970 case 5: tmp = gen_ld16s(addr, user); break;
8971 case 2: tmp = gen_ld32(addr, user); break;
8973 tcg_temp_free_i32(addr);
8979 store_reg(s, rs, tmp);
8983 tmp = load_reg(s, rs);
8985 case 0: gen_st8(tmp, addr, user); break;
8986 case 1: gen_st16(tmp, addr, user); break;
8987 case 2: gen_st32(tmp, addr, user); break;
8989 tcg_temp_free_i32(addr);
8994 tcg_gen_addi_i32(addr, addr, imm);
8996 store_reg(s, rn, addr);
8998 tcg_temp_free_i32(addr);
9010 static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
9012 uint32_t val, insn, op, rm, rn, rd, shift, cond;
9019 if (s->condexec_mask) {
9020 cond = s->condexec_cond;
9021 if (cond != 0x0e) { /* Skip conditional when condition is AL. */
9022 s->condlabel = gen_new_label();
9023 gen_test_cc(cond ^ 1, s->condlabel);
9028 insn = arm_lduw_code(env, s->pc, s->bswap_code);
9031 switch (insn >> 12) {
9035 op = (insn >> 11) & 3;
9038 rn = (insn >> 3) & 7;
9039 tmp = load_reg(s, rn);
9040 if (insn & (1 << 10)) {
9042 tmp2 = tcg_temp_new_i32();
9043 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
9046 rm = (insn >> 6) & 7;
9047 tmp2 = load_reg(s, rm);
9049 if (insn & (1 << 9)) {
9050 if (s->condexec_mask)
9051 tcg_gen_sub_i32(tmp, tmp, tmp2);
9053 gen_sub_CC(tmp, tmp, tmp2);
9055 if (s->condexec_mask)
9056 tcg_gen_add_i32(tmp, tmp, tmp2);
9058 gen_add_CC(tmp, tmp, tmp2);
9060 tcg_temp_free_i32(tmp2);
9061 store_reg(s, rd, tmp);
9063 /* shift immediate */
9064 rm = (insn >> 3) & 7;
9065 shift = (insn >> 6) & 0x1f;
9066 tmp = load_reg(s, rm);
9067 gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
9068 if (!s->condexec_mask)
9070 store_reg(s, rd, tmp);
9074 /* arithmetic large immediate */
9075 op = (insn >> 11) & 3;
9076 rd = (insn >> 8) & 0x7;
9077 if (op == 0) { /* mov */
9078 tmp = tcg_temp_new_i32();
9079 tcg_gen_movi_i32(tmp, insn & 0xff);
9080 if (!s->condexec_mask)
9082 store_reg(s, rd, tmp);
9084 tmp = load_reg(s, rd);
9085 tmp2 = tcg_temp_new_i32();
9086 tcg_gen_movi_i32(tmp2, insn & 0xff);
9089 gen_sub_CC(tmp, tmp, tmp2);
9090 tcg_temp_free_i32(tmp);
9091 tcg_temp_free_i32(tmp2);
9094 if (s->condexec_mask)
9095 tcg_gen_add_i32(tmp, tmp, tmp2);
9097 gen_add_CC(tmp, tmp, tmp2);
9098 tcg_temp_free_i32(tmp2);
9099 store_reg(s, rd, tmp);
9102 if (s->condexec_mask)
9103 tcg_gen_sub_i32(tmp, tmp, tmp2);
9105 gen_sub_CC(tmp, tmp, tmp2);
9106 tcg_temp_free_i32(tmp2);
9107 store_reg(s, rd, tmp);
9113 if (insn & (1 << 11)) {
9114 rd = (insn >> 8) & 7;
9115 /* load pc-relative. Bit 1 of PC is ignored. */
9116 val = s->pc + 2 + ((insn & 0xff) * 4);
9117 val &= ~(uint32_t)2;
9118 addr = tcg_temp_new_i32();
9119 tcg_gen_movi_i32(addr, val);
9120 tmp = gen_ld32(addr, IS_USER(s));
9121 tcg_temp_free_i32(addr);
9122 store_reg(s, rd, tmp);
9125 if (insn & (1 << 10)) {
9126 /* data processing extended or blx */
9127 rd = (insn & 7) | ((insn >> 4) & 8);
9128 rm = (insn >> 3) & 0xf;
9129 op = (insn >> 8) & 3;
9132 tmp = load_reg(s, rd);
9133 tmp2 = load_reg(s, rm);
9134 tcg_gen_add_i32(tmp, tmp, tmp2);
9135 tcg_temp_free_i32(tmp2);
9136 store_reg(s, rd, tmp);
9139 tmp = load_reg(s, rd);
9140 tmp2 = load_reg(s, rm);
9141 gen_sub_CC(tmp, tmp, tmp2);
9142 tcg_temp_free_i32(tmp2);
9143 tcg_temp_free_i32(tmp);
9145 case 2: /* mov/cpy */
9146 tmp = load_reg(s, rm);
9147 store_reg(s, rd, tmp);
9149 case 3:/* branch [and link] exchange thumb register */
9150 tmp = load_reg(s, rm);
9151 if (insn & (1 << 7)) {
9153 val = (uint32_t)s->pc | 1;
9154 tmp2 = tcg_temp_new_i32();
9155 tcg_gen_movi_i32(tmp2, val);
9156 store_reg(s, 14, tmp2);
9158 /* already thumb, no need to check */
9165 /* data processing register */
9167 rm = (insn >> 3) & 7;
9168 op = (insn >> 6) & 0xf;
9169 if (op == 2 || op == 3 || op == 4 || op == 7) {
9170 /* the shift/rotate ops want the operands backwards */
9179 if (op == 9) { /* neg */
9180 tmp = tcg_temp_new_i32();
9181 tcg_gen_movi_i32(tmp, 0);
9182 } else if (op != 0xf) { /* mvn doesn't read its first operand */
9183 tmp = load_reg(s, rd);
9188 tmp2 = load_reg(s, rm);
9191 tcg_gen_and_i32(tmp, tmp, tmp2);
9192 if (!s->condexec_mask)
9196 tcg_gen_xor_i32(tmp, tmp, tmp2);
9197 if (!s->condexec_mask)
9201 if (s->condexec_mask) {
9202 gen_shl(tmp2, tmp2, tmp);
9204 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
9209 if (s->condexec_mask) {
9210 gen_shr(tmp2, tmp2, tmp);
9212 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
9217 if (s->condexec_mask) {
9218 gen_sar(tmp2, tmp2, tmp);
9220 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
9225 if (s->condexec_mask)
9228 gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
9231 if (s->condexec_mask)
9232 gen_sub_carry(tmp, tmp, tmp2);
9234 gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
9237 if (s->condexec_mask) {
9238 tcg_gen_andi_i32(tmp, tmp, 0x1f);
9239 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
9241 gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
9246 tcg_gen_and_i32(tmp, tmp, tmp2);
9251 if (s->condexec_mask)
9252 tcg_gen_neg_i32(tmp, tmp2);
9254 gen_sub_CC(tmp, tmp, tmp2);
9257 gen_sub_CC(tmp, tmp, tmp2);
9261 gen_add_CC(tmp, tmp, tmp2);
9265 tcg_gen_or_i32(tmp, tmp, tmp2);
9266 if (!s->condexec_mask)
9270 tcg_gen_mul_i32(tmp, tmp, tmp2);
9271 if (!s->condexec_mask)
9275 tcg_gen_andc_i32(tmp, tmp, tmp2);
9276 if (!s->condexec_mask)
9280 tcg_gen_not_i32(tmp2, tmp2);
9281 if (!s->condexec_mask)
9289 store_reg(s, rm, tmp2);
9291 tcg_temp_free_i32(tmp);
9293 store_reg(s, rd, tmp);
9294 tcg_temp_free_i32(tmp2);
9297 tcg_temp_free_i32(tmp);
9298 tcg_temp_free_i32(tmp2);
9303 /* load/store register offset. */
9305 rn = (insn >> 3) & 7;
9306 rm = (insn >> 6) & 7;
9307 op = (insn >> 9) & 7;
9308 addr = load_reg(s, rn);
9309 tmp = load_reg(s, rm);
9310 tcg_gen_add_i32(addr, addr, tmp);
9311 tcg_temp_free_i32(tmp);
9313 if (op < 3) /* store */
9314 tmp = load_reg(s, rd);
9318 gen_st32(tmp, addr, IS_USER(s));
9321 gen_st16(tmp, addr, IS_USER(s));
9324 gen_st8(tmp, addr, IS_USER(s));
9327 tmp = gen_ld8s(addr, IS_USER(s));
9330 tmp = gen_ld32(addr, IS_USER(s));
9333 tmp = gen_ld16u(addr, IS_USER(s));
9336 tmp = gen_ld8u(addr, IS_USER(s));
9339 tmp = gen_ld16s(addr, IS_USER(s));
9342 if (op >= 3) /* load */
9343 store_reg(s, rd, tmp);
9344 tcg_temp_free_i32(addr);
9348 /* load/store word immediate offset */
9350 rn = (insn >> 3) & 7;
9351 addr = load_reg(s, rn);
9352 val = (insn >> 4) & 0x7c;
9353 tcg_gen_addi_i32(addr, addr, val);
9355 if (insn & (1 << 11)) {
9357 tmp = gen_ld32(addr, IS_USER(s));
9358 store_reg(s, rd, tmp);
9361 tmp = load_reg(s, rd);
9362 gen_st32(tmp, addr, IS_USER(s));
9364 tcg_temp_free_i32(addr);
9368 /* load/store byte immediate offset */
9370 rn = (insn >> 3) & 7;
9371 addr = load_reg(s, rn);
9372 val = (insn >> 6) & 0x1f;
9373 tcg_gen_addi_i32(addr, addr, val);
9375 if (insn & (1 << 11)) {
9377 tmp = gen_ld8u(addr, IS_USER(s));
9378 store_reg(s, rd, tmp);
9381 tmp = load_reg(s, rd);
9382 gen_st8(tmp, addr, IS_USER(s));
9384 tcg_temp_free_i32(addr);
9388 /* load/store halfword immediate offset */
9390 rn = (insn >> 3) & 7;
9391 addr = load_reg(s, rn);
9392 val = (insn >> 5) & 0x3e;
9393 tcg_gen_addi_i32(addr, addr, val);
9395 if (insn & (1 << 11)) {
9397 tmp = gen_ld16u(addr, IS_USER(s));
9398 store_reg(s, rd, tmp);
9401 tmp = load_reg(s, rd);
9402 gen_st16(tmp, addr, IS_USER(s));
9404 tcg_temp_free_i32(addr);
9408 /* load/store from stack */
9409 rd = (insn >> 8) & 7;
9410 addr = load_reg(s, 13);
9411 val = (insn & 0xff) * 4;
9412 tcg_gen_addi_i32(addr, addr, val);
9414 if (insn & (1 << 11)) {
9416 tmp = gen_ld32(addr, IS_USER(s));
9417 store_reg(s, rd, tmp);
9420 tmp = load_reg(s, rd);
9421 gen_st32(tmp, addr, IS_USER(s));
9423 tcg_temp_free_i32(addr);
9427 /* add to high reg */
9428 rd = (insn >> 8) & 7;
9429 if (insn & (1 << 11)) {
9431 tmp = load_reg(s, 13);
9433 /* PC. bit 1 is ignored. */
9434 tmp = tcg_temp_new_i32();
9435 tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
9437 val = (insn & 0xff) * 4;
9438 tcg_gen_addi_i32(tmp, tmp, val);
9439 store_reg(s, rd, tmp);
9444 op = (insn >> 8) & 0xf;
9447 /* adjust stack pointer */
9448 tmp = load_reg(s, 13);
9449 val = (insn & 0x7f) * 4;
9450 if (insn & (1 << 7))
9451 val = -(int32_t)val;
9452 tcg_gen_addi_i32(tmp, tmp, val);
9453 store_reg(s, 13, tmp);
9456 case 2: /* sign/zero extend. */
9459 rm = (insn >> 3) & 7;
9460 tmp = load_reg(s, rm);
9461 switch ((insn >> 6) & 3) {
9462 case 0: gen_sxth(tmp); break;
9463 case 1: gen_sxtb(tmp); break;
9464 case 2: gen_uxth(tmp); break;
9465 case 3: gen_uxtb(tmp); break;
9467 store_reg(s, rd, tmp);
9469 case 4: case 5: case 0xc: case 0xd:
9471 addr = load_reg(s, 13);
9472 if (insn & (1 << 8))
9476 for (i = 0; i < 8; i++) {
9477 if (insn & (1 << i))
9480 if ((insn & (1 << 11)) == 0) {
9481 tcg_gen_addi_i32(addr, addr, -offset);
9483 for (i = 0; i < 8; i++) {
9484 if (insn & (1 << i)) {
9485 if (insn & (1 << 11)) {
9487 tmp = gen_ld32(addr, IS_USER(s));
9488 store_reg(s, i, tmp);
9491 tmp = load_reg(s, i);
9492 gen_st32(tmp, addr, IS_USER(s));
9494 /* advance to the next address. */
9495 tcg_gen_addi_i32(addr, addr, 4);
9499 if (insn & (1 << 8)) {
9500 if (insn & (1 << 11)) {
9502 tmp = gen_ld32(addr, IS_USER(s));
9503 /* don't set the pc until the rest of the instruction
9507 tmp = load_reg(s, 14);
9508 gen_st32(tmp, addr, IS_USER(s));
9510 tcg_gen_addi_i32(addr, addr, 4);
9512 if ((insn & (1 << 11)) == 0) {
9513 tcg_gen_addi_i32(addr, addr, -offset);
9515 /* write back the new stack pointer */
9516 store_reg(s, 13, addr);
9517 /* set the new PC value */
9518 if ((insn & 0x0900) == 0x0900) {
9519 store_reg_from_load(env, s, 15, tmp);
9523 case 1: case 3: case 9: case 11: /* czb */
9525 tmp = load_reg(s, rm);
9526 s->condlabel = gen_new_label();
9528 if (insn & (1 << 11))
9529 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
9531 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
9532 tcg_temp_free_i32(tmp);
9533 offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
9534 val = (uint32_t)s->pc + 2;
9539 case 15: /* IT, nop-hint. */
9540 if ((insn & 0xf) == 0) {
9541 gen_nop_hint(s, (insn >> 4) & 0xf);
9545 s->condexec_cond = (insn >> 4) & 0xe;
9546 s->condexec_mask = insn & 0x1f;
9547 /* No actual code generated for this insn, just setup state. */
9550 case 0xe: /* bkpt */
9552 gen_exception_insn(s, 2, EXCP_BKPT);
9557 rn = (insn >> 3) & 0x7;
9559 tmp = load_reg(s, rn);
9560 switch ((insn >> 6) & 3) {
9561 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
9562 case 1: gen_rev16(tmp); break;
9563 case 3: gen_revsh(tmp); break;
9564 default: goto illegal_op;
9566 store_reg(s, rd, tmp);
9570 switch ((insn >> 5) & 7) {
9574 if (((insn >> 3) & 1) != s->bswap_code) {
9575 /* Dynamic endianness switching not implemented. */
9586 tmp = tcg_const_i32((insn & (1 << 4)) != 0);
9589 addr = tcg_const_i32(19);
9590 gen_helper_v7m_msr(cpu_env, addr, tmp);
9591 tcg_temp_free_i32(addr);
9595 addr = tcg_const_i32(16);
9596 gen_helper_v7m_msr(cpu_env, addr, tmp);
9597 tcg_temp_free_i32(addr);
9599 tcg_temp_free_i32(tmp);
9602 if (insn & (1 << 4)) {
9603 shift = CPSR_A | CPSR_I | CPSR_F;
9607 gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
9622 /* load/store multiple */
9624 TCGV_UNUSED(loaded_var);
9625 rn = (insn >> 8) & 0x7;
9626 addr = load_reg(s, rn);
9627 for (i = 0; i < 8; i++) {
9628 if (insn & (1 << i)) {
9629 if (insn & (1 << 11)) {
9631 tmp = gen_ld32(addr, IS_USER(s));
9635 store_reg(s, i, tmp);
9639 tmp = load_reg(s, i);
9640 gen_st32(tmp, addr, IS_USER(s));
9642 /* advance to the next address */
9643 tcg_gen_addi_i32(addr, addr, 4);
9646 if ((insn & (1 << rn)) == 0) {
9647 /* base reg not in list: base register writeback */
9648 store_reg(s, rn, addr);
9650 /* base reg in list: if load, complete it now */
9651 if (insn & (1 << 11)) {
9652 store_reg(s, rn, loaded_var);
9654 tcg_temp_free_i32(addr);
9659 /* conditional branch or swi */
9660 cond = (insn >> 8) & 0xf;
9666 gen_set_pc_im(s->pc);
9667 s->is_jmp = DISAS_SWI;
9670 /* generate a conditional jump to next instruction */
9671 s->condlabel = gen_new_label();
9672 gen_test_cc(cond ^ 1, s->condlabel);
9675 /* jump to the offset */
9676 val = (uint32_t)s->pc + 2;
9677 offset = ((int32_t)insn << 24) >> 24;
9683 if (insn & (1 << 11)) {
9684 if (disas_thumb2_insn(env, s, insn))
9688 /* unconditional branch */
9689 val = (uint32_t)s->pc;
9690 offset = ((int32_t)insn << 21) >> 21;
9691 val += (offset << 1) + 2;
9696 if (disas_thumb2_insn(env, s, insn))
9702 gen_exception_insn(s, 4, EXCP_UDEF);
9706 gen_exception_insn(s, 2, EXCP_UDEF);
9709 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
9710 basic block 'tb'. If search_pc is TRUE, also generate PC
9711 information for each intermediate instruction. */
9712 static inline void gen_intermediate_code_internal(CPUARMState *env,
9713 TranslationBlock *tb,
9716 DisasContext dc1, *dc = &dc1;
9718 uint16_t *gen_opc_end;
9720 target_ulong pc_start;
9721 uint32_t next_page_start;
9725 /* generate intermediate code */
9730 gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
9732 dc->is_jmp = DISAS_NEXT;
9734 dc->singlestep_enabled = env->singlestep_enabled;
9736 dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
9737 dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
9738 dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
9739 dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
9740 #if !defined(CONFIG_USER_ONLY)
9741 dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
9743 dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
9744 dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
9745 dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
9746 cpu_F0s = tcg_temp_new_i32();
9747 cpu_F1s = tcg_temp_new_i32();
9748 cpu_F0d = tcg_temp_new_i64();
9749 cpu_F1d = tcg_temp_new_i64();
9752 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
9753 cpu_M0 = tcg_temp_new_i64();
9754 next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
9757 max_insns = tb->cflags & CF_COUNT_MASK;
9759 max_insns = CF_COUNT_MASK;
9763 tcg_clear_temp_count();
9765 /* A note on handling of the condexec (IT) bits:
9767 * We want to avoid the overhead of having to write the updated condexec
9768 * bits back to the CPUARMState for every instruction in an IT block. So:
9769 * (1) if the condexec bits are not already zero then we write
9770 * zero back into the CPUARMState now. This avoids complications trying
9771 * to do it at the end of the block. (For example if we don't do this
9772 * it's hard to identify whether we can safely skip writing condexec
9773 * at the end of the TB, which we definitely want to do for the case
9774 * where a TB doesn't do anything with the IT state at all.)
9775 * (2) if we are going to leave the TB then we call gen_set_condexec()
9776 * which will write the correct value into CPUARMState if zero is wrong.
9777 * This is done both for leaving the TB at the end, and for leaving
9778 * it because of an exception we know will happen, which is done in
9779 * gen_exception_insn(). The latter is necessary because we need to
9780 * leave the TB with the PC/IT state just prior to execution of the
9781 * instruction which caused the exception.
9782 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9783 * then the CPUARMState will be wrong and we need to reset it.
9784 * This is handled in the same way as restoration of the
9785 * PC in these situations: we will be called again with search_pc=1
9786 * and generate a mapping of the condexec bits for each PC in
9787 * gen_opc_condexec_bits[]. restore_state_to_opc() then uses
9788 * this to restore the condexec bits.
9790 * Note that there are no instructions which can read the condexec
9791 * bits, and none which can write non-static values to them, so
9792 * we don't need to care about whether CPUARMState is correct in the
9796 /* Reset the conditional execution bits immediately. This avoids
9797 complications trying to do it at the end of the block. */
9798 if (dc->condexec_mask || dc->condexec_cond)
9800 TCGv tmp = tcg_temp_new_i32();
9801 tcg_gen_movi_i32(tmp, 0);
9802 store_cpu_field(tmp, condexec_bits);
9805 #ifdef CONFIG_USER_ONLY
9806 /* Intercept jump to the magic kernel page. */
9807 if (dc->pc >= 0xffff0000) {
9808 /* We always get here via a jump, so know we are not in a
9809 conditional execution block. */
9810 gen_exception(EXCP_KERNEL_TRAP);
9811 dc->is_jmp = DISAS_UPDATE;
9815 if (dc->pc >= 0xfffffff0 && IS_M(env)) {
9816 /* We always get here via a jump, so know we are not in a
9817 conditional execution block. */
9818 gen_exception(EXCP_EXCEPTION_EXIT);
9819 dc->is_jmp = DISAS_UPDATE;
9824 if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
9825 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
9826 if (bp->pc == dc->pc) {
9827 gen_exception_insn(dc, 0, EXCP_DEBUG);
9828 /* Advance PC so that clearing the breakpoint will
9829 invalidate this TB. */
9831 goto done_generating;
9837 j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
9841 tcg_ctx.gen_opc_instr_start[lj++] = 0;
9843 tcg_ctx.gen_opc_pc[lj] = dc->pc;
9844 gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9845 tcg_ctx.gen_opc_instr_start[lj] = 1;
9846 tcg_ctx.gen_opc_icount[lj] = num_insns;
9849 if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
9852 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
9853 tcg_gen_debug_insn_start(dc->pc);
9857 disas_thumb_insn(env, dc);
9858 if (dc->condexec_mask) {
9859 dc->condexec_cond = (dc->condexec_cond & 0xe)
9860 | ((dc->condexec_mask >> 4) & 1);
9861 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9862 if (dc->condexec_mask == 0) {
9863 dc->condexec_cond = 0;
9867 disas_arm_insn(env, dc);
9870 if (dc->condjmp && !dc->is_jmp) {
9871 gen_set_label(dc->condlabel);
9875 if (tcg_check_temp_count()) {
9876 fprintf(stderr, "TCG temporary leak before %08x\n", dc->pc);
9879 /* Translation stops when a conditional branch is encountered.
9880 * Otherwise the subsequent code could get translated several times.
9881 * Also stop translation when a page boundary is reached. This
9882 * ensures prefetch aborts occur at the right place. */
9884 } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
9885 !env->singlestep_enabled &&
9887 dc->pc < next_page_start &&
9888 num_insns < max_insns);
9890 if (tb->cflags & CF_LAST_IO) {
9892 /* FIXME: This can theoretically happen with self-modifying
9894 cpu_abort(env, "IO on conditional branch instruction");
9899 /* At this stage dc->condjmp will only be set when the skipped
9900 instruction was a conditional branch or trap, and the PC has
9901 already been written. */
9902 if (unlikely(env->singlestep_enabled)) {
9903 /* Make sure the pc is updated, and raise a debug exception. */
9905 gen_set_condexec(dc);
9906 if (dc->is_jmp == DISAS_SWI) {
9907 gen_exception(EXCP_SWI);
9909 gen_exception(EXCP_DEBUG);
9911 gen_set_label(dc->condlabel);
9913 if (dc->condjmp || !dc->is_jmp) {
9914 gen_set_pc_im(dc->pc);
9917 gen_set_condexec(dc);
9918 if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
9919 gen_exception(EXCP_SWI);
9921 /* FIXME: Single stepping a WFI insn will not halt
9923 gen_exception(EXCP_DEBUG);
9926 /* While branches must always occur at the end of an IT block,
9927 there are a few other things that can cause us to terminate
9928 the TB in the middle of an IT block:
9929 - Exception generating instructions (bkpt, swi, undefined).
9931 - Hardware watchpoints.
9932 Hardware breakpoints have already been handled and skip this code.
9934 gen_set_condexec(dc);
9935 switch(dc->is_jmp) {
9937 gen_goto_tb(dc, 1, dc->pc);
9942 /* indicate that the hash table must be used to find the next TB */
9946 /* nothing more to generate */
9949 gen_helper_wfi(cpu_env);
9952 gen_exception(EXCP_SWI);
9956 gen_set_label(dc->condlabel);
9957 gen_set_condexec(dc);
9958 gen_goto_tb(dc, 1, dc->pc);
9964 gen_icount_end(tb, num_insns);
9965 *tcg_ctx.gen_opc_ptr = INDEX_op_end;
9968 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
9969 qemu_log("----------------\n");
9970 qemu_log("IN: %s\n", lookup_symbol(pc_start));
9971 log_target_disas(env, pc_start, dc->pc - pc_start,
9972 dc->thumb | (dc->bswap_code << 1));
9977 j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
9980 tcg_ctx.gen_opc_instr_start[lj++] = 0;
9982 tb->size = dc->pc - pc_start;
9983 tb->icount = num_insns;
9987 void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
9989 gen_intermediate_code_internal(env, tb, 0);
9992 void gen_intermediate_code_pc(CPUARMState *env, TranslationBlock *tb)
9994 gen_intermediate_code_internal(env, tb, 1);
9997 static const char *cpu_mode_names[16] = {
9998 "usr", "fiq", "irq", "svc", "???", "???", "???", "abt",
9999 "???", "???", "???", "und", "???", "???", "???", "sys"
10002 void cpu_dump_state(CPUARMState *env, FILE *f, fprintf_function cpu_fprintf,
10008 for(i=0;i<16;i++) {
10009 cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
10011 cpu_fprintf(f, "\n");
10013 cpu_fprintf(f, " ");
10015 psr = cpsr_read(env);
10016 cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n",
10018 psr & (1 << 31) ? 'N' : '-',
10019 psr & (1 << 30) ? 'Z' : '-',
10020 psr & (1 << 29) ? 'C' : '-',
10021 psr & (1 << 28) ? 'V' : '-',
10022 psr & CPSR_T ? 'T' : 'A',
10023 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
10025 if (flags & CPU_DUMP_FPU) {
10026 int numvfpregs = 0;
10027 if (arm_feature(env, ARM_FEATURE_VFP)) {
10030 if (arm_feature(env, ARM_FEATURE_VFP3)) {
10033 for (i = 0; i < numvfpregs; i++) {
10034 uint64_t v = float64_val(env->vfp.regs[i]);
10035 cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
10036 i * 2, (uint32_t)v,
10037 i * 2 + 1, (uint32_t)(v >> 32),
10040 cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
10044 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
10046 env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
10047 env->condexec_bits = gen_opc_condexec_bits[pc_pos];