4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
36 #define ENABLE_ARCH_4T arm_feature(env, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5 arm_feature(env, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE arm_feature(env, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J 0
41 #define ENABLE_ARCH_6 arm_feature(env, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K arm_feature(env, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2 arm_feature(env, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7 arm_feature(env, ARM_FEATURE_V7)
46 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
48 /* internal defines */
49 typedef struct DisasContext {
52 /* Nonzero if this instruction has been conditionally skipped. */
54 /* The label that will be jumped to when the instruction is skipped. */
56 /* Thumb-2 condtional execution bits. */
59 struct TranslationBlock *tb;
60 int singlestep_enabled;
62 #if !defined(CONFIG_USER_ONLY)
70 static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
72 #if defined(CONFIG_USER_ONLY)
75 #define IS_USER(s) (s->user)
78 /* These instructions trap after executing, so defer them until after the
79 conditional executions state has been updated. */
83 static TCGv_ptr cpu_env;
84 /* We reuse the same 64-bit temporaries for efficiency. */
85 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
86 static TCGv_i32 cpu_R[16];
87 static TCGv_i32 cpu_exclusive_addr;
88 static TCGv_i32 cpu_exclusive_val;
89 static TCGv_i32 cpu_exclusive_high;
90 #ifdef CONFIG_USER_ONLY
91 static TCGv_i32 cpu_exclusive_test;
92 static TCGv_i32 cpu_exclusive_info;
95 /* FIXME: These should be removed. */
96 static TCGv cpu_F0s, cpu_F1s;
97 static TCGv_i64 cpu_F0d, cpu_F1d;
99 #include "gen-icount.h"
101 static const char *regnames[] =
102 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
103 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
105 /* initialize TCG globals. */
106 void arm_translate_init(void)
110 cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
112 for (i = 0; i < 16; i++) {
113 cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
114 offsetof(CPUState, regs[i]),
117 cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0,
118 offsetof(CPUState, exclusive_addr), "exclusive_addr");
119 cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0,
120 offsetof(CPUState, exclusive_val), "exclusive_val");
121 cpu_exclusive_high = tcg_global_mem_new_i32(TCG_AREG0,
122 offsetof(CPUState, exclusive_high), "exclusive_high");
123 #ifdef CONFIG_USER_ONLY
124 cpu_exclusive_test = tcg_global_mem_new_i32(TCG_AREG0,
125 offsetof(CPUState, exclusive_test), "exclusive_test");
126 cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
127 offsetof(CPUState, exclusive_info), "exclusive_info");
134 static inline TCGv load_cpu_offset(int offset)
136 TCGv tmp = tcg_temp_new_i32();
137 tcg_gen_ld_i32(tmp, cpu_env, offset);
141 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUState, name))
143 static inline void store_cpu_offset(TCGv var, int offset)
145 tcg_gen_st_i32(var, cpu_env, offset);
146 tcg_temp_free_i32(var);
149 #define store_cpu_field(var, name) \
150 store_cpu_offset(var, offsetof(CPUState, name))
152 /* Set a variable to the value of a CPU register. */
153 static void load_reg_var(DisasContext *s, TCGv var, int reg)
157 /* normaly, since we updated PC, we need only to add one insn */
159 addr = (long)s->pc + 2;
161 addr = (long)s->pc + 4;
162 tcg_gen_movi_i32(var, addr);
164 tcg_gen_mov_i32(var, cpu_R[reg]);
168 /* Create a new temporary and set it to the value of a CPU register. */
169 static inline TCGv load_reg(DisasContext *s, int reg)
171 TCGv tmp = tcg_temp_new_i32();
172 load_reg_var(s, tmp, reg);
176 /* Set a CPU register. The source must be a temporary and will be
178 static void store_reg(DisasContext *s, int reg, TCGv var)
181 tcg_gen_andi_i32(var, var, ~1);
182 s->is_jmp = DISAS_JUMP;
184 tcg_gen_mov_i32(cpu_R[reg], var);
185 tcg_temp_free_i32(var);
188 /* Value extensions. */
189 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
190 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
191 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
192 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
194 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
195 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
198 static inline void gen_set_cpsr(TCGv var, uint32_t mask)
200 TCGv tmp_mask = tcg_const_i32(mask);
201 gen_helper_cpsr_write(var, tmp_mask);
202 tcg_temp_free_i32(tmp_mask);
204 /* Set NZCV flags from the high 4 bits of var. */
205 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
207 static void gen_exception(int excp)
209 TCGv tmp = tcg_temp_new_i32();
210 tcg_gen_movi_i32(tmp, excp);
211 gen_helper_exception(tmp);
212 tcg_temp_free_i32(tmp);
215 static void gen_smul_dual(TCGv a, TCGv b)
217 TCGv tmp1 = tcg_temp_new_i32();
218 TCGv tmp2 = tcg_temp_new_i32();
219 tcg_gen_ext16s_i32(tmp1, a);
220 tcg_gen_ext16s_i32(tmp2, b);
221 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
222 tcg_temp_free_i32(tmp2);
223 tcg_gen_sari_i32(a, a, 16);
224 tcg_gen_sari_i32(b, b, 16);
225 tcg_gen_mul_i32(b, b, a);
226 tcg_gen_mov_i32(a, tmp1);
227 tcg_temp_free_i32(tmp1);
230 /* Byteswap each halfword. */
231 static void gen_rev16(TCGv var)
233 TCGv tmp = tcg_temp_new_i32();
234 tcg_gen_shri_i32(tmp, var, 8);
235 tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
236 tcg_gen_shli_i32(var, var, 8);
237 tcg_gen_andi_i32(var, var, 0xff00ff00);
238 tcg_gen_or_i32(var, var, tmp);
239 tcg_temp_free_i32(tmp);
242 /* Byteswap low halfword and sign extend. */
243 static void gen_revsh(TCGv var)
245 tcg_gen_ext16u_i32(var, var);
246 tcg_gen_bswap16_i32(var, var);
247 tcg_gen_ext16s_i32(var, var);
250 /* Unsigned bitfield extract. */
251 static void gen_ubfx(TCGv var, int shift, uint32_t mask)
254 tcg_gen_shri_i32(var, var, shift);
255 tcg_gen_andi_i32(var, var, mask);
258 /* Signed bitfield extract. */
259 static void gen_sbfx(TCGv var, int shift, int width)
264 tcg_gen_sari_i32(var, var, shift);
265 if (shift + width < 32) {
266 signbit = 1u << (width - 1);
267 tcg_gen_andi_i32(var, var, (1u << width) - 1);
268 tcg_gen_xori_i32(var, var, signbit);
269 tcg_gen_subi_i32(var, var, signbit);
273 /* Bitfield insertion. Insert val into base. Clobbers base and val. */
274 static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
276 tcg_gen_andi_i32(val, val, mask);
277 tcg_gen_shli_i32(val, val, shift);
278 tcg_gen_andi_i32(base, base, ~(mask << shift));
279 tcg_gen_or_i32(dest, base, val);
282 /* Return (b << 32) + a. Mark inputs as dead */
283 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b)
285 TCGv_i64 tmp64 = tcg_temp_new_i64();
287 tcg_gen_extu_i32_i64(tmp64, b);
288 tcg_temp_free_i32(b);
289 tcg_gen_shli_i64(tmp64, tmp64, 32);
290 tcg_gen_add_i64(a, tmp64, a);
292 tcg_temp_free_i64(tmp64);
296 /* Return (b << 32) - a. Mark inputs as dead. */
297 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv b)
299 TCGv_i64 tmp64 = tcg_temp_new_i64();
301 tcg_gen_extu_i32_i64(tmp64, b);
302 tcg_temp_free_i32(b);
303 tcg_gen_shli_i64(tmp64, tmp64, 32);
304 tcg_gen_sub_i64(a, tmp64, a);
306 tcg_temp_free_i64(tmp64);
310 /* FIXME: Most targets have native widening multiplication.
311 It would be good to use that instead of a full wide multiply. */
312 /* 32x32->64 multiply. Marks inputs as dead. */
313 static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
315 TCGv_i64 tmp1 = tcg_temp_new_i64();
316 TCGv_i64 tmp2 = tcg_temp_new_i64();
318 tcg_gen_extu_i32_i64(tmp1, a);
319 tcg_temp_free_i32(a);
320 tcg_gen_extu_i32_i64(tmp2, b);
321 tcg_temp_free_i32(b);
322 tcg_gen_mul_i64(tmp1, tmp1, tmp2);
323 tcg_temp_free_i64(tmp2);
327 static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
329 TCGv_i64 tmp1 = tcg_temp_new_i64();
330 TCGv_i64 tmp2 = tcg_temp_new_i64();
332 tcg_gen_ext_i32_i64(tmp1, a);
333 tcg_temp_free_i32(a);
334 tcg_gen_ext_i32_i64(tmp2, b);
335 tcg_temp_free_i32(b);
336 tcg_gen_mul_i64(tmp1, tmp1, tmp2);
337 tcg_temp_free_i64(tmp2);
341 /* Swap low and high halfwords. */
342 static void gen_swap_half(TCGv var)
344 TCGv tmp = tcg_temp_new_i32();
345 tcg_gen_shri_i32(tmp, var, 16);
346 tcg_gen_shli_i32(var, var, 16);
347 tcg_gen_or_i32(var, var, tmp);
348 tcg_temp_free_i32(tmp);
351 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
352 tmp = (t0 ^ t1) & 0x8000;
355 t0 = (t0 + t1) ^ tmp;
358 static void gen_add16(TCGv t0, TCGv t1)
360 TCGv tmp = tcg_temp_new_i32();
361 tcg_gen_xor_i32(tmp, t0, t1);
362 tcg_gen_andi_i32(tmp, tmp, 0x8000);
363 tcg_gen_andi_i32(t0, t0, ~0x8000);
364 tcg_gen_andi_i32(t1, t1, ~0x8000);
365 tcg_gen_add_i32(t0, t0, t1);
366 tcg_gen_xor_i32(t0, t0, tmp);
367 tcg_temp_free_i32(tmp);
368 tcg_temp_free_i32(t1);
371 #define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
373 /* Set CF to the top bit of var. */
374 static void gen_set_CF_bit31(TCGv var)
376 TCGv tmp = tcg_temp_new_i32();
377 tcg_gen_shri_i32(tmp, var, 31);
379 tcg_temp_free_i32(tmp);
382 /* Set N and Z flags from var. */
383 static inline void gen_logic_CC(TCGv var)
385 tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, NF));
386 tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, ZF));
390 static void gen_adc(TCGv t0, TCGv t1)
393 tcg_gen_add_i32(t0, t0, t1);
394 tmp = load_cpu_field(CF);
395 tcg_gen_add_i32(t0, t0, tmp);
396 tcg_temp_free_i32(tmp);
399 /* dest = T0 + T1 + CF. */
400 static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
403 tcg_gen_add_i32(dest, t0, t1);
404 tmp = load_cpu_field(CF);
405 tcg_gen_add_i32(dest, dest, tmp);
406 tcg_temp_free_i32(tmp);
409 /* dest = T0 - T1 + CF - 1. */
410 static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
413 tcg_gen_sub_i32(dest, t0, t1);
414 tmp = load_cpu_field(CF);
415 tcg_gen_add_i32(dest, dest, tmp);
416 tcg_gen_subi_i32(dest, dest, 1);
417 tcg_temp_free_i32(tmp);
420 /* FIXME: Implement this natively. */
421 #define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
423 static void shifter_out_im(TCGv var, int shift)
425 TCGv tmp = tcg_temp_new_i32();
427 tcg_gen_andi_i32(tmp, var, 1);
429 tcg_gen_shri_i32(tmp, var, shift);
431 tcg_gen_andi_i32(tmp, tmp, 1);
434 tcg_temp_free_i32(tmp);
437 /* Shift by immediate. Includes special handling for shift == 0. */
438 static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
444 shifter_out_im(var, 32 - shift);
445 tcg_gen_shli_i32(var, var, shift);
451 tcg_gen_shri_i32(var, var, 31);
454 tcg_gen_movi_i32(var, 0);
457 shifter_out_im(var, shift - 1);
458 tcg_gen_shri_i32(var, var, shift);
465 shifter_out_im(var, shift - 1);
468 tcg_gen_sari_i32(var, var, shift);
470 case 3: /* ROR/RRX */
473 shifter_out_im(var, shift - 1);
474 tcg_gen_rotri_i32(var, var, shift); break;
476 TCGv tmp = load_cpu_field(CF);
478 shifter_out_im(var, 0);
479 tcg_gen_shri_i32(var, var, 1);
480 tcg_gen_shli_i32(tmp, tmp, 31);
481 tcg_gen_or_i32(var, var, tmp);
482 tcg_temp_free_i32(tmp);
487 static inline void gen_arm_shift_reg(TCGv var, int shiftop,
488 TCGv shift, int flags)
492 case 0: gen_helper_shl_cc(var, var, shift); break;
493 case 1: gen_helper_shr_cc(var, var, shift); break;
494 case 2: gen_helper_sar_cc(var, var, shift); break;
495 case 3: gen_helper_ror_cc(var, var, shift); break;
499 case 0: gen_helper_shl(var, var, shift); break;
500 case 1: gen_helper_shr(var, var, shift); break;
501 case 2: gen_helper_sar(var, var, shift); break;
502 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
503 tcg_gen_rotr_i32(var, var, shift); break;
506 tcg_temp_free_i32(shift);
509 #define PAS_OP(pfx) \
511 case 0: gen_pas_helper(glue(pfx,add16)); break; \
512 case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
513 case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
514 case 3: gen_pas_helper(glue(pfx,sub16)); break; \
515 case 4: gen_pas_helper(glue(pfx,add8)); break; \
516 case 7: gen_pas_helper(glue(pfx,sub8)); break; \
518 static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
523 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
525 tmp = tcg_temp_new_ptr();
526 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
528 tcg_temp_free_ptr(tmp);
531 tmp = tcg_temp_new_ptr();
532 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
534 tcg_temp_free_ptr(tmp);
536 #undef gen_pas_helper
537 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
550 #undef gen_pas_helper
555 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
556 #define PAS_OP(pfx) \
558 case 0: gen_pas_helper(glue(pfx,add8)); break; \
559 case 1: gen_pas_helper(glue(pfx,add16)); break; \
560 case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
561 case 4: gen_pas_helper(glue(pfx,sub8)); break; \
562 case 5: gen_pas_helper(glue(pfx,sub16)); break; \
563 case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
565 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
570 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
572 tmp = tcg_temp_new_ptr();
573 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
575 tcg_temp_free_ptr(tmp);
578 tmp = tcg_temp_new_ptr();
579 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
581 tcg_temp_free_ptr(tmp);
583 #undef gen_pas_helper
584 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
597 #undef gen_pas_helper
602 static void gen_test_cc(int cc, int label)
610 tmp = load_cpu_field(ZF);
611 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
614 tmp = load_cpu_field(ZF);
615 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
618 tmp = load_cpu_field(CF);
619 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
622 tmp = load_cpu_field(CF);
623 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
626 tmp = load_cpu_field(NF);
627 tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
630 tmp = load_cpu_field(NF);
631 tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
634 tmp = load_cpu_field(VF);
635 tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
638 tmp = load_cpu_field(VF);
639 tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
641 case 8: /* hi: C && !Z */
642 inv = gen_new_label();
643 tmp = load_cpu_field(CF);
644 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
645 tcg_temp_free_i32(tmp);
646 tmp = load_cpu_field(ZF);
647 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
650 case 9: /* ls: !C || Z */
651 tmp = load_cpu_field(CF);
652 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
653 tcg_temp_free_i32(tmp);
654 tmp = load_cpu_field(ZF);
655 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
657 case 10: /* ge: N == V -> N ^ V == 0 */
658 tmp = load_cpu_field(VF);
659 tmp2 = load_cpu_field(NF);
660 tcg_gen_xor_i32(tmp, tmp, tmp2);
661 tcg_temp_free_i32(tmp2);
662 tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
664 case 11: /* lt: N != V -> N ^ V != 0 */
665 tmp = load_cpu_field(VF);
666 tmp2 = load_cpu_field(NF);
667 tcg_gen_xor_i32(tmp, tmp, tmp2);
668 tcg_temp_free_i32(tmp2);
669 tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
671 case 12: /* gt: !Z && N == V */
672 inv = gen_new_label();
673 tmp = load_cpu_field(ZF);
674 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
675 tcg_temp_free_i32(tmp);
676 tmp = load_cpu_field(VF);
677 tmp2 = load_cpu_field(NF);
678 tcg_gen_xor_i32(tmp, tmp, tmp2);
679 tcg_temp_free_i32(tmp2);
680 tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
683 case 13: /* le: Z || N != V */
684 tmp = load_cpu_field(ZF);
685 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
686 tcg_temp_free_i32(tmp);
687 tmp = load_cpu_field(VF);
688 tmp2 = load_cpu_field(NF);
689 tcg_gen_xor_i32(tmp, tmp, tmp2);
690 tcg_temp_free_i32(tmp2);
691 tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
694 fprintf(stderr, "Bad condition code 0x%x\n", cc);
697 tcg_temp_free_i32(tmp);
700 static const uint8_t table_logic_cc[16] = {
719 /* Set PC and Thumb state from an immediate address. */
720 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
724 s->is_jmp = DISAS_UPDATE;
725 if (s->thumb != (addr & 1)) {
726 tmp = tcg_temp_new_i32();
727 tcg_gen_movi_i32(tmp, addr & 1);
728 tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
729 tcg_temp_free_i32(tmp);
731 tcg_gen_movi_i32(cpu_R[15], addr & ~1);
734 /* Set PC and Thumb state from var. var is marked as dead. */
735 static inline void gen_bx(DisasContext *s, TCGv var)
737 s->is_jmp = DISAS_UPDATE;
738 tcg_gen_andi_i32(cpu_R[15], var, ~1);
739 tcg_gen_andi_i32(var, var, 1);
740 store_cpu_field(var, thumb);
743 /* Variant of store_reg which uses branch&exchange logic when storing
744 to r15 in ARM architecture v7 and above. The source must be a temporary
745 and will be marked as dead. */
746 static inline void store_reg_bx(CPUState *env, DisasContext *s,
749 if (reg == 15 && ENABLE_ARCH_7) {
752 store_reg(s, reg, var);
756 /* Variant of store_reg which uses branch&exchange logic when storing
757 * to r15 in ARM architecture v5T and above. This is used for storing
758 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
759 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
760 static inline void store_reg_from_load(CPUState *env, DisasContext *s,
763 if (reg == 15 && ENABLE_ARCH_5) {
766 store_reg(s, reg, var);
770 static inline TCGv gen_ld8s(TCGv addr, int index)
772 TCGv tmp = tcg_temp_new_i32();
773 tcg_gen_qemu_ld8s(tmp, addr, index);
776 static inline TCGv gen_ld8u(TCGv addr, int index)
778 TCGv tmp = tcg_temp_new_i32();
779 tcg_gen_qemu_ld8u(tmp, addr, index);
782 static inline TCGv gen_ld16s(TCGv addr, int index)
784 TCGv tmp = tcg_temp_new_i32();
785 tcg_gen_qemu_ld16s(tmp, addr, index);
788 static inline TCGv gen_ld16u(TCGv addr, int index)
790 TCGv tmp = tcg_temp_new_i32();
791 tcg_gen_qemu_ld16u(tmp, addr, index);
794 static inline TCGv gen_ld32(TCGv addr, int index)
796 TCGv tmp = tcg_temp_new_i32();
797 tcg_gen_qemu_ld32u(tmp, addr, index);
800 static inline TCGv_i64 gen_ld64(TCGv addr, int index)
802 TCGv_i64 tmp = tcg_temp_new_i64();
803 tcg_gen_qemu_ld64(tmp, addr, index);
806 static inline void gen_st8(TCGv val, TCGv addr, int index)
808 tcg_gen_qemu_st8(val, addr, index);
809 tcg_temp_free_i32(val);
811 static inline void gen_st16(TCGv val, TCGv addr, int index)
813 tcg_gen_qemu_st16(val, addr, index);
814 tcg_temp_free_i32(val);
816 static inline void gen_st32(TCGv val, TCGv addr, int index)
818 tcg_gen_qemu_st32(val, addr, index);
819 tcg_temp_free_i32(val);
821 static inline void gen_st64(TCGv_i64 val, TCGv addr, int index)
823 tcg_gen_qemu_st64(val, addr, index);
824 tcg_temp_free_i64(val);
827 static inline void gen_set_pc_im(uint32_t val)
829 tcg_gen_movi_i32(cpu_R[15], val);
832 /* Force a TB lookup after an instruction that changes the CPU state. */
833 static inline void gen_lookup_tb(DisasContext *s)
835 tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
836 s->is_jmp = DISAS_UPDATE;
839 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
842 int val, rm, shift, shiftop;
845 if (!(insn & (1 << 25))) {
848 if (!(insn & (1 << 23)))
851 tcg_gen_addi_i32(var, var, val);
855 shift = (insn >> 7) & 0x1f;
856 shiftop = (insn >> 5) & 3;
857 offset = load_reg(s, rm);
858 gen_arm_shift_im(offset, shiftop, shift, 0);
859 if (!(insn & (1 << 23)))
860 tcg_gen_sub_i32(var, var, offset);
862 tcg_gen_add_i32(var, var, offset);
863 tcg_temp_free_i32(offset);
867 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
873 if (insn & (1 << 22)) {
875 val = (insn & 0xf) | ((insn >> 4) & 0xf0);
876 if (!(insn & (1 << 23)))
880 tcg_gen_addi_i32(var, var, val);
884 tcg_gen_addi_i32(var, var, extra);
886 offset = load_reg(s, rm);
887 if (!(insn & (1 << 23)))
888 tcg_gen_sub_i32(var, var, offset);
890 tcg_gen_add_i32(var, var, offset);
891 tcg_temp_free_i32(offset);
895 static TCGv_ptr get_fpstatus_ptr(int neon)
897 TCGv_ptr statusptr = tcg_temp_new_ptr();
900 offset = offsetof(CPUState, vfp.standard_fp_status);
902 offset = offsetof(CPUState, vfp.fp_status);
904 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
908 #define VFP_OP2(name) \
909 static inline void gen_vfp_##name(int dp) \
911 TCGv_ptr fpst = get_fpstatus_ptr(0); \
913 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \
915 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \
917 tcg_temp_free_ptr(fpst); \
927 static inline void gen_vfp_F1_mul(int dp)
929 /* Like gen_vfp_mul() but put result in F1 */
930 TCGv_ptr fpst = get_fpstatus_ptr(0);
932 gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
934 gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
936 tcg_temp_free_ptr(fpst);
939 static inline void gen_vfp_F1_neg(int dp)
941 /* Like gen_vfp_neg() but put result in F1 */
943 gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
945 gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
949 static inline void gen_vfp_abs(int dp)
952 gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
954 gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
957 static inline void gen_vfp_neg(int dp)
960 gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
962 gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
965 static inline void gen_vfp_sqrt(int dp)
968 gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
970 gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
973 static inline void gen_vfp_cmp(int dp)
976 gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
978 gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
981 static inline void gen_vfp_cmpe(int dp)
984 gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
986 gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
989 static inline void gen_vfp_F1_ld0(int dp)
992 tcg_gen_movi_i64(cpu_F1d, 0);
994 tcg_gen_movi_i32(cpu_F1s, 0);
997 #define VFP_GEN_ITOF(name) \
998 static inline void gen_vfp_##name(int dp, int neon) \
1000 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1002 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1004 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1006 tcg_temp_free_ptr(statusptr); \
1013 #define VFP_GEN_FTOI(name) \
1014 static inline void gen_vfp_##name(int dp, int neon) \
1016 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1018 gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1020 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1022 tcg_temp_free_ptr(statusptr); \
1031 #define VFP_GEN_FIX(name) \
1032 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1034 TCGv tmp_shift = tcg_const_i32(shift); \
1035 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1037 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \
1039 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \
1041 tcg_temp_free_i32(tmp_shift); \
1042 tcg_temp_free_ptr(statusptr); \
1054 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv addr)
1057 tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s));
1059 tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s));
1062 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv addr)
1065 tcg_gen_qemu_st64(cpu_F0d, addr, IS_USER(s));
1067 tcg_gen_qemu_st32(cpu_F0s, addr, IS_USER(s));
1071 vfp_reg_offset (int dp, int reg)
1074 return offsetof(CPUARMState, vfp.regs[reg]);
1076 return offsetof(CPUARMState, vfp.regs[reg >> 1])
1077 + offsetof(CPU_DoubleU, l.upper);
1079 return offsetof(CPUARMState, vfp.regs[reg >> 1])
1080 + offsetof(CPU_DoubleU, l.lower);
1084 /* Return the offset of a 32-bit piece of a NEON register.
1085 zero is the least significant end of the register. */
1087 neon_reg_offset (int reg, int n)
1091 return vfp_reg_offset(0, sreg);
1094 static TCGv neon_load_reg(int reg, int pass)
1096 TCGv tmp = tcg_temp_new_i32();
1097 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1101 static void neon_store_reg(int reg, int pass, TCGv var)
1103 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1104 tcg_temp_free_i32(var);
1107 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1109 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1112 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1114 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1117 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1118 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1119 #define tcg_gen_st_f32 tcg_gen_st_i32
1120 #define tcg_gen_st_f64 tcg_gen_st_i64
1122 static inline void gen_mov_F0_vreg(int dp, int reg)
1125 tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1127 tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1130 static inline void gen_mov_F1_vreg(int dp, int reg)
1133 tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1135 tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1138 static inline void gen_mov_vreg_F0(int dp, int reg)
1141 tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1143 tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1146 #define ARM_CP_RW_BIT (1 << 20)
1148 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1150 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
1153 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1155 tcg_gen_st_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
1158 static inline TCGv iwmmxt_load_creg(int reg)
1160 TCGv var = tcg_temp_new_i32();
1161 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
1165 static inline void iwmmxt_store_creg(int reg, TCGv var)
1167 tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
1168 tcg_temp_free_i32(var);
1171 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1173 iwmmxt_store_reg(cpu_M0, rn);
1176 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1178 iwmmxt_load_reg(cpu_M0, rn);
1181 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1183 iwmmxt_load_reg(cpu_V1, rn);
1184 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1187 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1189 iwmmxt_load_reg(cpu_V1, rn);
1190 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1193 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1195 iwmmxt_load_reg(cpu_V1, rn);
1196 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1199 #define IWMMXT_OP(name) \
1200 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1202 iwmmxt_load_reg(cpu_V1, rn); \
1203 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1206 #define IWMMXT_OP_ENV(name) \
1207 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1209 iwmmxt_load_reg(cpu_V1, rn); \
1210 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1213 #define IWMMXT_OP_ENV_SIZE(name) \
1214 IWMMXT_OP_ENV(name##b) \
1215 IWMMXT_OP_ENV(name##w) \
1216 IWMMXT_OP_ENV(name##l)
1218 #define IWMMXT_OP_ENV1(name) \
1219 static inline void gen_op_iwmmxt_##name##_M0(void) \
1221 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1235 IWMMXT_OP_ENV_SIZE(unpackl)
1236 IWMMXT_OP_ENV_SIZE(unpackh)
1238 IWMMXT_OP_ENV1(unpacklub)
1239 IWMMXT_OP_ENV1(unpackluw)
1240 IWMMXT_OP_ENV1(unpacklul)
1241 IWMMXT_OP_ENV1(unpackhub)
1242 IWMMXT_OP_ENV1(unpackhuw)
1243 IWMMXT_OP_ENV1(unpackhul)
1244 IWMMXT_OP_ENV1(unpacklsb)
1245 IWMMXT_OP_ENV1(unpacklsw)
1246 IWMMXT_OP_ENV1(unpacklsl)
1247 IWMMXT_OP_ENV1(unpackhsb)
1248 IWMMXT_OP_ENV1(unpackhsw)
1249 IWMMXT_OP_ENV1(unpackhsl)
1251 IWMMXT_OP_ENV_SIZE(cmpeq)
1252 IWMMXT_OP_ENV_SIZE(cmpgtu)
1253 IWMMXT_OP_ENV_SIZE(cmpgts)
1255 IWMMXT_OP_ENV_SIZE(mins)
1256 IWMMXT_OP_ENV_SIZE(minu)
1257 IWMMXT_OP_ENV_SIZE(maxs)
1258 IWMMXT_OP_ENV_SIZE(maxu)
1260 IWMMXT_OP_ENV_SIZE(subn)
1261 IWMMXT_OP_ENV_SIZE(addn)
1262 IWMMXT_OP_ENV_SIZE(subu)
1263 IWMMXT_OP_ENV_SIZE(addu)
1264 IWMMXT_OP_ENV_SIZE(subs)
1265 IWMMXT_OP_ENV_SIZE(adds)
1267 IWMMXT_OP_ENV(avgb0)
1268 IWMMXT_OP_ENV(avgb1)
1269 IWMMXT_OP_ENV(avgw0)
1270 IWMMXT_OP_ENV(avgw1)
1274 IWMMXT_OP_ENV(packuw)
1275 IWMMXT_OP_ENV(packul)
1276 IWMMXT_OP_ENV(packuq)
1277 IWMMXT_OP_ENV(packsw)
1278 IWMMXT_OP_ENV(packsl)
1279 IWMMXT_OP_ENV(packsq)
1281 static void gen_op_iwmmxt_set_mup(void)
1284 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1285 tcg_gen_ori_i32(tmp, tmp, 2);
1286 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1289 static void gen_op_iwmmxt_set_cup(void)
1292 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1293 tcg_gen_ori_i32(tmp, tmp, 1);
1294 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1297 static void gen_op_iwmmxt_setpsr_nz(void)
1299 TCGv tmp = tcg_temp_new_i32();
1300 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1301 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1304 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1306 iwmmxt_load_reg(cpu_V1, rn);
1307 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1308 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1311 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest)
1317 rd = (insn >> 16) & 0xf;
1318 tmp = load_reg(s, rd);
1320 offset = (insn & 0xff) << ((insn >> 7) & 2);
1321 if (insn & (1 << 24)) {
1323 if (insn & (1 << 23))
1324 tcg_gen_addi_i32(tmp, tmp, offset);
1326 tcg_gen_addi_i32(tmp, tmp, -offset);
1327 tcg_gen_mov_i32(dest, tmp);
1328 if (insn & (1 << 21))
1329 store_reg(s, rd, tmp);
1331 tcg_temp_free_i32(tmp);
1332 } else if (insn & (1 << 21)) {
1334 tcg_gen_mov_i32(dest, tmp);
1335 if (insn & (1 << 23))
1336 tcg_gen_addi_i32(tmp, tmp, offset);
1338 tcg_gen_addi_i32(tmp, tmp, -offset);
1339 store_reg(s, rd, tmp);
1340 } else if (!(insn & (1 << 23)))
1345 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest)
1347 int rd = (insn >> 0) & 0xf;
1350 if (insn & (1 << 8)) {
1351 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1354 tmp = iwmmxt_load_creg(rd);
1357 tmp = tcg_temp_new_i32();
1358 iwmmxt_load_reg(cpu_V0, rd);
1359 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
1361 tcg_gen_andi_i32(tmp, tmp, mask);
1362 tcg_gen_mov_i32(dest, tmp);
1363 tcg_temp_free_i32(tmp);
1367 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1368 (ie. an undefined instruction). */
1369 static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
1372 int rdhi, rdlo, rd0, rd1, i;
1374 TCGv tmp, tmp2, tmp3;
1376 if ((insn & 0x0e000e00) == 0x0c000000) {
1377 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1379 rdlo = (insn >> 12) & 0xf;
1380 rdhi = (insn >> 16) & 0xf;
1381 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1382 iwmmxt_load_reg(cpu_V0, wrd);
1383 tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
1384 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1385 tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
1386 } else { /* TMCRR */
1387 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1388 iwmmxt_store_reg(cpu_V0, wrd);
1389 gen_op_iwmmxt_set_mup();
1394 wrd = (insn >> 12) & 0xf;
1395 addr = tcg_temp_new_i32();
1396 if (gen_iwmmxt_address(s, insn, addr)) {
1397 tcg_temp_free_i32(addr);
1400 if (insn & ARM_CP_RW_BIT) {
1401 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1402 tmp = tcg_temp_new_i32();
1403 tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
1404 iwmmxt_store_creg(wrd, tmp);
1407 if (insn & (1 << 8)) {
1408 if (insn & (1 << 22)) { /* WLDRD */
1409 tcg_gen_qemu_ld64(cpu_M0, addr, IS_USER(s));
1411 } else { /* WLDRW wRd */
1412 tmp = gen_ld32(addr, IS_USER(s));
1415 if (insn & (1 << 22)) { /* WLDRH */
1416 tmp = gen_ld16u(addr, IS_USER(s));
1417 } else { /* WLDRB */
1418 tmp = gen_ld8u(addr, IS_USER(s));
1422 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1423 tcg_temp_free_i32(tmp);
1425 gen_op_iwmmxt_movq_wRn_M0(wrd);
1428 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1429 tmp = iwmmxt_load_creg(wrd);
1430 gen_st32(tmp, addr, IS_USER(s));
1432 gen_op_iwmmxt_movq_M0_wRn(wrd);
1433 tmp = tcg_temp_new_i32();
1434 if (insn & (1 << 8)) {
1435 if (insn & (1 << 22)) { /* WSTRD */
1436 tcg_temp_free_i32(tmp);
1437 tcg_gen_qemu_st64(cpu_M0, addr, IS_USER(s));
1438 } else { /* WSTRW wRd */
1439 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1440 gen_st32(tmp, addr, IS_USER(s));
1443 if (insn & (1 << 22)) { /* WSTRH */
1444 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1445 gen_st16(tmp, addr, IS_USER(s));
1446 } else { /* WSTRB */
1447 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1448 gen_st8(tmp, addr, IS_USER(s));
1453 tcg_temp_free_i32(addr);
1457 if ((insn & 0x0f000000) != 0x0e000000)
1460 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1461 case 0x000: /* WOR */
1462 wrd = (insn >> 12) & 0xf;
1463 rd0 = (insn >> 0) & 0xf;
1464 rd1 = (insn >> 16) & 0xf;
1465 gen_op_iwmmxt_movq_M0_wRn(rd0);
1466 gen_op_iwmmxt_orq_M0_wRn(rd1);
1467 gen_op_iwmmxt_setpsr_nz();
1468 gen_op_iwmmxt_movq_wRn_M0(wrd);
1469 gen_op_iwmmxt_set_mup();
1470 gen_op_iwmmxt_set_cup();
1472 case 0x011: /* TMCR */
1475 rd = (insn >> 12) & 0xf;
1476 wrd = (insn >> 16) & 0xf;
1478 case ARM_IWMMXT_wCID:
1479 case ARM_IWMMXT_wCASF:
1481 case ARM_IWMMXT_wCon:
1482 gen_op_iwmmxt_set_cup();
1484 case ARM_IWMMXT_wCSSF:
1485 tmp = iwmmxt_load_creg(wrd);
1486 tmp2 = load_reg(s, rd);
1487 tcg_gen_andc_i32(tmp, tmp, tmp2);
1488 tcg_temp_free_i32(tmp2);
1489 iwmmxt_store_creg(wrd, tmp);
1491 case ARM_IWMMXT_wCGR0:
1492 case ARM_IWMMXT_wCGR1:
1493 case ARM_IWMMXT_wCGR2:
1494 case ARM_IWMMXT_wCGR3:
1495 gen_op_iwmmxt_set_cup();
1496 tmp = load_reg(s, rd);
1497 iwmmxt_store_creg(wrd, tmp);
1503 case 0x100: /* WXOR */
1504 wrd = (insn >> 12) & 0xf;
1505 rd0 = (insn >> 0) & 0xf;
1506 rd1 = (insn >> 16) & 0xf;
1507 gen_op_iwmmxt_movq_M0_wRn(rd0);
1508 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1509 gen_op_iwmmxt_setpsr_nz();
1510 gen_op_iwmmxt_movq_wRn_M0(wrd);
1511 gen_op_iwmmxt_set_mup();
1512 gen_op_iwmmxt_set_cup();
1514 case 0x111: /* TMRC */
1517 rd = (insn >> 12) & 0xf;
1518 wrd = (insn >> 16) & 0xf;
1519 tmp = iwmmxt_load_creg(wrd);
1520 store_reg(s, rd, tmp);
1522 case 0x300: /* WANDN */
1523 wrd = (insn >> 12) & 0xf;
1524 rd0 = (insn >> 0) & 0xf;
1525 rd1 = (insn >> 16) & 0xf;
1526 gen_op_iwmmxt_movq_M0_wRn(rd0);
1527 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1528 gen_op_iwmmxt_andq_M0_wRn(rd1);
1529 gen_op_iwmmxt_setpsr_nz();
1530 gen_op_iwmmxt_movq_wRn_M0(wrd);
1531 gen_op_iwmmxt_set_mup();
1532 gen_op_iwmmxt_set_cup();
1534 case 0x200: /* WAND */
1535 wrd = (insn >> 12) & 0xf;
1536 rd0 = (insn >> 0) & 0xf;
1537 rd1 = (insn >> 16) & 0xf;
1538 gen_op_iwmmxt_movq_M0_wRn(rd0);
1539 gen_op_iwmmxt_andq_M0_wRn(rd1);
1540 gen_op_iwmmxt_setpsr_nz();
1541 gen_op_iwmmxt_movq_wRn_M0(wrd);
1542 gen_op_iwmmxt_set_mup();
1543 gen_op_iwmmxt_set_cup();
1545 case 0x810: case 0xa10: /* WMADD */
1546 wrd = (insn >> 12) & 0xf;
1547 rd0 = (insn >> 0) & 0xf;
1548 rd1 = (insn >> 16) & 0xf;
1549 gen_op_iwmmxt_movq_M0_wRn(rd0);
1550 if (insn & (1 << 21))
1551 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1553 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1554 gen_op_iwmmxt_movq_wRn_M0(wrd);
1555 gen_op_iwmmxt_set_mup();
1557 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1558 wrd = (insn >> 12) & 0xf;
1559 rd0 = (insn >> 16) & 0xf;
1560 rd1 = (insn >> 0) & 0xf;
1561 gen_op_iwmmxt_movq_M0_wRn(rd0);
1562 switch ((insn >> 22) & 3) {
1564 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1567 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1570 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1575 gen_op_iwmmxt_movq_wRn_M0(wrd);
1576 gen_op_iwmmxt_set_mup();
1577 gen_op_iwmmxt_set_cup();
1579 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1580 wrd = (insn >> 12) & 0xf;
1581 rd0 = (insn >> 16) & 0xf;
1582 rd1 = (insn >> 0) & 0xf;
1583 gen_op_iwmmxt_movq_M0_wRn(rd0);
1584 switch ((insn >> 22) & 3) {
1586 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1589 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1592 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1597 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598 gen_op_iwmmxt_set_mup();
1599 gen_op_iwmmxt_set_cup();
1601 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1602 wrd = (insn >> 12) & 0xf;
1603 rd0 = (insn >> 16) & 0xf;
1604 rd1 = (insn >> 0) & 0xf;
1605 gen_op_iwmmxt_movq_M0_wRn(rd0);
1606 if (insn & (1 << 22))
1607 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1609 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1610 if (!(insn & (1 << 20)))
1611 gen_op_iwmmxt_addl_M0_wRn(wrd);
1612 gen_op_iwmmxt_movq_wRn_M0(wrd);
1613 gen_op_iwmmxt_set_mup();
1615 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1616 wrd = (insn >> 12) & 0xf;
1617 rd0 = (insn >> 16) & 0xf;
1618 rd1 = (insn >> 0) & 0xf;
1619 gen_op_iwmmxt_movq_M0_wRn(rd0);
1620 if (insn & (1 << 21)) {
1621 if (insn & (1 << 20))
1622 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1624 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1626 if (insn & (1 << 20))
1627 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1629 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1631 gen_op_iwmmxt_movq_wRn_M0(wrd);
1632 gen_op_iwmmxt_set_mup();
1634 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1635 wrd = (insn >> 12) & 0xf;
1636 rd0 = (insn >> 16) & 0xf;
1637 rd1 = (insn >> 0) & 0xf;
1638 gen_op_iwmmxt_movq_M0_wRn(rd0);
1639 if (insn & (1 << 21))
1640 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1642 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1643 if (!(insn & (1 << 20))) {
1644 iwmmxt_load_reg(cpu_V1, wrd);
1645 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1647 gen_op_iwmmxt_movq_wRn_M0(wrd);
1648 gen_op_iwmmxt_set_mup();
1650 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1651 wrd = (insn >> 12) & 0xf;
1652 rd0 = (insn >> 16) & 0xf;
1653 rd1 = (insn >> 0) & 0xf;
1654 gen_op_iwmmxt_movq_M0_wRn(rd0);
1655 switch ((insn >> 22) & 3) {
1657 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1660 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1663 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1668 gen_op_iwmmxt_movq_wRn_M0(wrd);
1669 gen_op_iwmmxt_set_mup();
1670 gen_op_iwmmxt_set_cup();
1672 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1673 wrd = (insn >> 12) & 0xf;
1674 rd0 = (insn >> 16) & 0xf;
1675 rd1 = (insn >> 0) & 0xf;
1676 gen_op_iwmmxt_movq_M0_wRn(rd0);
1677 if (insn & (1 << 22)) {
1678 if (insn & (1 << 20))
1679 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1681 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1683 if (insn & (1 << 20))
1684 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1686 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1688 gen_op_iwmmxt_movq_wRn_M0(wrd);
1689 gen_op_iwmmxt_set_mup();
1690 gen_op_iwmmxt_set_cup();
1692 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1693 wrd = (insn >> 12) & 0xf;
1694 rd0 = (insn >> 16) & 0xf;
1695 rd1 = (insn >> 0) & 0xf;
1696 gen_op_iwmmxt_movq_M0_wRn(rd0);
1697 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1698 tcg_gen_andi_i32(tmp, tmp, 7);
1699 iwmmxt_load_reg(cpu_V1, rd1);
1700 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1701 tcg_temp_free_i32(tmp);
1702 gen_op_iwmmxt_movq_wRn_M0(wrd);
1703 gen_op_iwmmxt_set_mup();
1705 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1706 if (((insn >> 6) & 3) == 3)
1708 rd = (insn >> 12) & 0xf;
1709 wrd = (insn >> 16) & 0xf;
1710 tmp = load_reg(s, rd);
1711 gen_op_iwmmxt_movq_M0_wRn(wrd);
1712 switch ((insn >> 6) & 3) {
1714 tmp2 = tcg_const_i32(0xff);
1715 tmp3 = tcg_const_i32((insn & 7) << 3);
1718 tmp2 = tcg_const_i32(0xffff);
1719 tmp3 = tcg_const_i32((insn & 3) << 4);
1722 tmp2 = tcg_const_i32(0xffffffff);
1723 tmp3 = tcg_const_i32((insn & 1) << 5);
1729 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1730 tcg_temp_free(tmp3);
1731 tcg_temp_free(tmp2);
1732 tcg_temp_free_i32(tmp);
1733 gen_op_iwmmxt_movq_wRn_M0(wrd);
1734 gen_op_iwmmxt_set_mup();
1736 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1737 rd = (insn >> 12) & 0xf;
1738 wrd = (insn >> 16) & 0xf;
1739 if (rd == 15 || ((insn >> 22) & 3) == 3)
1741 gen_op_iwmmxt_movq_M0_wRn(wrd);
1742 tmp = tcg_temp_new_i32();
1743 switch ((insn >> 22) & 3) {
1745 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1746 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1748 tcg_gen_ext8s_i32(tmp, tmp);
1750 tcg_gen_andi_i32(tmp, tmp, 0xff);
1754 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1755 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1757 tcg_gen_ext16s_i32(tmp, tmp);
1759 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1763 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1764 tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1767 store_reg(s, rd, tmp);
1769 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1770 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1772 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1773 switch ((insn >> 22) & 3) {
1775 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1778 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1781 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1784 tcg_gen_shli_i32(tmp, tmp, 28);
1786 tcg_temp_free_i32(tmp);
1788 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1789 if (((insn >> 6) & 3) == 3)
1791 rd = (insn >> 12) & 0xf;
1792 wrd = (insn >> 16) & 0xf;
1793 tmp = load_reg(s, rd);
1794 switch ((insn >> 6) & 3) {
1796 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1799 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1802 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1805 tcg_temp_free_i32(tmp);
1806 gen_op_iwmmxt_movq_wRn_M0(wrd);
1807 gen_op_iwmmxt_set_mup();
1809 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1810 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1812 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1813 tmp2 = tcg_temp_new_i32();
1814 tcg_gen_mov_i32(tmp2, tmp);
1815 switch ((insn >> 22) & 3) {
1817 for (i = 0; i < 7; i ++) {
1818 tcg_gen_shli_i32(tmp2, tmp2, 4);
1819 tcg_gen_and_i32(tmp, tmp, tmp2);
1823 for (i = 0; i < 3; i ++) {
1824 tcg_gen_shli_i32(tmp2, tmp2, 8);
1825 tcg_gen_and_i32(tmp, tmp, tmp2);
1829 tcg_gen_shli_i32(tmp2, tmp2, 16);
1830 tcg_gen_and_i32(tmp, tmp, tmp2);
1834 tcg_temp_free_i32(tmp2);
1835 tcg_temp_free_i32(tmp);
1837 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
1838 wrd = (insn >> 12) & 0xf;
1839 rd0 = (insn >> 16) & 0xf;
1840 gen_op_iwmmxt_movq_M0_wRn(rd0);
1841 switch ((insn >> 22) & 3) {
1843 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1846 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1849 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1854 gen_op_iwmmxt_movq_wRn_M0(wrd);
1855 gen_op_iwmmxt_set_mup();
1857 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
1858 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1860 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1861 tmp2 = tcg_temp_new_i32();
1862 tcg_gen_mov_i32(tmp2, tmp);
1863 switch ((insn >> 22) & 3) {
1865 for (i = 0; i < 7; i ++) {
1866 tcg_gen_shli_i32(tmp2, tmp2, 4);
1867 tcg_gen_or_i32(tmp, tmp, tmp2);
1871 for (i = 0; i < 3; i ++) {
1872 tcg_gen_shli_i32(tmp2, tmp2, 8);
1873 tcg_gen_or_i32(tmp, tmp, tmp2);
1877 tcg_gen_shli_i32(tmp2, tmp2, 16);
1878 tcg_gen_or_i32(tmp, tmp, tmp2);
1882 tcg_temp_free_i32(tmp2);
1883 tcg_temp_free_i32(tmp);
1885 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
1886 rd = (insn >> 12) & 0xf;
1887 rd0 = (insn >> 16) & 0xf;
1888 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1890 gen_op_iwmmxt_movq_M0_wRn(rd0);
1891 tmp = tcg_temp_new_i32();
1892 switch ((insn >> 22) & 3) {
1894 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1897 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
1900 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
1903 store_reg(s, rd, tmp);
1905 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
1906 case 0x906: case 0xb06: case 0xd06: case 0xf06:
1907 wrd = (insn >> 12) & 0xf;
1908 rd0 = (insn >> 16) & 0xf;
1909 rd1 = (insn >> 0) & 0xf;
1910 gen_op_iwmmxt_movq_M0_wRn(rd0);
1911 switch ((insn >> 22) & 3) {
1913 if (insn & (1 << 21))
1914 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
1916 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
1919 if (insn & (1 << 21))
1920 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
1922 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
1925 if (insn & (1 << 21))
1926 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
1928 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
1933 gen_op_iwmmxt_movq_wRn_M0(wrd);
1934 gen_op_iwmmxt_set_mup();
1935 gen_op_iwmmxt_set_cup();
1937 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
1938 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
1939 wrd = (insn >> 12) & 0xf;
1940 rd0 = (insn >> 16) & 0xf;
1941 gen_op_iwmmxt_movq_M0_wRn(rd0);
1942 switch ((insn >> 22) & 3) {
1944 if (insn & (1 << 21))
1945 gen_op_iwmmxt_unpacklsb_M0();
1947 gen_op_iwmmxt_unpacklub_M0();
1950 if (insn & (1 << 21))
1951 gen_op_iwmmxt_unpacklsw_M0();
1953 gen_op_iwmmxt_unpackluw_M0();
1956 if (insn & (1 << 21))
1957 gen_op_iwmmxt_unpacklsl_M0();
1959 gen_op_iwmmxt_unpacklul_M0();
1964 gen_op_iwmmxt_movq_wRn_M0(wrd);
1965 gen_op_iwmmxt_set_mup();
1966 gen_op_iwmmxt_set_cup();
1968 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
1969 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
1970 wrd = (insn >> 12) & 0xf;
1971 rd0 = (insn >> 16) & 0xf;
1972 gen_op_iwmmxt_movq_M0_wRn(rd0);
1973 switch ((insn >> 22) & 3) {
1975 if (insn & (1 << 21))
1976 gen_op_iwmmxt_unpackhsb_M0();
1978 gen_op_iwmmxt_unpackhub_M0();
1981 if (insn & (1 << 21))
1982 gen_op_iwmmxt_unpackhsw_M0();
1984 gen_op_iwmmxt_unpackhuw_M0();
1987 if (insn & (1 << 21))
1988 gen_op_iwmmxt_unpackhsl_M0();
1990 gen_op_iwmmxt_unpackhul_M0();
1995 gen_op_iwmmxt_movq_wRn_M0(wrd);
1996 gen_op_iwmmxt_set_mup();
1997 gen_op_iwmmxt_set_cup();
1999 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2000 case 0x214: case 0x614: case 0xa14: case 0xe14:
2001 if (((insn >> 22) & 3) == 0)
2003 wrd = (insn >> 12) & 0xf;
2004 rd0 = (insn >> 16) & 0xf;
2005 gen_op_iwmmxt_movq_M0_wRn(rd0);
2006 tmp = tcg_temp_new_i32();
2007 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2008 tcg_temp_free_i32(tmp);
2011 switch ((insn >> 22) & 3) {
2013 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2016 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2019 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2022 tcg_temp_free_i32(tmp);
2023 gen_op_iwmmxt_movq_wRn_M0(wrd);
2024 gen_op_iwmmxt_set_mup();
2025 gen_op_iwmmxt_set_cup();
2027 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2028 case 0x014: case 0x414: case 0x814: case 0xc14:
2029 if (((insn >> 22) & 3) == 0)
2031 wrd = (insn >> 12) & 0xf;
2032 rd0 = (insn >> 16) & 0xf;
2033 gen_op_iwmmxt_movq_M0_wRn(rd0);
2034 tmp = tcg_temp_new_i32();
2035 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2036 tcg_temp_free_i32(tmp);
2039 switch ((insn >> 22) & 3) {
2041 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2044 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2047 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2050 tcg_temp_free_i32(tmp);
2051 gen_op_iwmmxt_movq_wRn_M0(wrd);
2052 gen_op_iwmmxt_set_mup();
2053 gen_op_iwmmxt_set_cup();
2055 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2056 case 0x114: case 0x514: case 0x914: case 0xd14:
2057 if (((insn >> 22) & 3) == 0)
2059 wrd = (insn >> 12) & 0xf;
2060 rd0 = (insn >> 16) & 0xf;
2061 gen_op_iwmmxt_movq_M0_wRn(rd0);
2062 tmp = tcg_temp_new_i32();
2063 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2064 tcg_temp_free_i32(tmp);
2067 switch ((insn >> 22) & 3) {
2069 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2072 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2075 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2078 tcg_temp_free_i32(tmp);
2079 gen_op_iwmmxt_movq_wRn_M0(wrd);
2080 gen_op_iwmmxt_set_mup();
2081 gen_op_iwmmxt_set_cup();
2083 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2084 case 0x314: case 0x714: case 0xb14: case 0xf14:
2085 if (((insn >> 22) & 3) == 0)
2087 wrd = (insn >> 12) & 0xf;
2088 rd0 = (insn >> 16) & 0xf;
2089 gen_op_iwmmxt_movq_M0_wRn(rd0);
2090 tmp = tcg_temp_new_i32();
2091 switch ((insn >> 22) & 3) {
2093 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2094 tcg_temp_free_i32(tmp);
2097 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2100 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2101 tcg_temp_free_i32(tmp);
2104 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2107 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2108 tcg_temp_free_i32(tmp);
2111 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2114 tcg_temp_free_i32(tmp);
2115 gen_op_iwmmxt_movq_wRn_M0(wrd);
2116 gen_op_iwmmxt_set_mup();
2117 gen_op_iwmmxt_set_cup();
2119 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2120 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2121 wrd = (insn >> 12) & 0xf;
2122 rd0 = (insn >> 16) & 0xf;
2123 rd1 = (insn >> 0) & 0xf;
2124 gen_op_iwmmxt_movq_M0_wRn(rd0);
2125 switch ((insn >> 22) & 3) {
2127 if (insn & (1 << 21))
2128 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2130 gen_op_iwmmxt_minub_M0_wRn(rd1);
2133 if (insn & (1 << 21))
2134 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2136 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2139 if (insn & (1 << 21))
2140 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2142 gen_op_iwmmxt_minul_M0_wRn(rd1);
2147 gen_op_iwmmxt_movq_wRn_M0(wrd);
2148 gen_op_iwmmxt_set_mup();
2150 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2151 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2152 wrd = (insn >> 12) & 0xf;
2153 rd0 = (insn >> 16) & 0xf;
2154 rd1 = (insn >> 0) & 0xf;
2155 gen_op_iwmmxt_movq_M0_wRn(rd0);
2156 switch ((insn >> 22) & 3) {
2158 if (insn & (1 << 21))
2159 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2161 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2164 if (insn & (1 << 21))
2165 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2167 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2170 if (insn & (1 << 21))
2171 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2173 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2178 gen_op_iwmmxt_movq_wRn_M0(wrd);
2179 gen_op_iwmmxt_set_mup();
2181 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2182 case 0x402: case 0x502: case 0x602: case 0x702:
2183 wrd = (insn >> 12) & 0xf;
2184 rd0 = (insn >> 16) & 0xf;
2185 rd1 = (insn >> 0) & 0xf;
2186 gen_op_iwmmxt_movq_M0_wRn(rd0);
2187 tmp = tcg_const_i32((insn >> 20) & 3);
2188 iwmmxt_load_reg(cpu_V1, rd1);
2189 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2191 gen_op_iwmmxt_movq_wRn_M0(wrd);
2192 gen_op_iwmmxt_set_mup();
2194 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2195 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2196 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2197 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2198 wrd = (insn >> 12) & 0xf;
2199 rd0 = (insn >> 16) & 0xf;
2200 rd1 = (insn >> 0) & 0xf;
2201 gen_op_iwmmxt_movq_M0_wRn(rd0);
2202 switch ((insn >> 20) & 0xf) {
2204 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2207 gen_op_iwmmxt_subub_M0_wRn(rd1);
2210 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2213 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2216 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2219 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2222 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2225 gen_op_iwmmxt_subul_M0_wRn(rd1);
2228 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2233 gen_op_iwmmxt_movq_wRn_M0(wrd);
2234 gen_op_iwmmxt_set_mup();
2235 gen_op_iwmmxt_set_cup();
2237 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2238 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2239 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2240 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2241 wrd = (insn >> 12) & 0xf;
2242 rd0 = (insn >> 16) & 0xf;
2243 gen_op_iwmmxt_movq_M0_wRn(rd0);
2244 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2245 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2247 gen_op_iwmmxt_movq_wRn_M0(wrd);
2248 gen_op_iwmmxt_set_mup();
2249 gen_op_iwmmxt_set_cup();
2251 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2252 case 0x418: case 0x518: case 0x618: case 0x718:
2253 case 0x818: case 0x918: case 0xa18: case 0xb18:
2254 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2255 wrd = (insn >> 12) & 0xf;
2256 rd0 = (insn >> 16) & 0xf;
2257 rd1 = (insn >> 0) & 0xf;
2258 gen_op_iwmmxt_movq_M0_wRn(rd0);
2259 switch ((insn >> 20) & 0xf) {
2261 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2264 gen_op_iwmmxt_addub_M0_wRn(rd1);
2267 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2270 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2273 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2276 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2279 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2282 gen_op_iwmmxt_addul_M0_wRn(rd1);
2285 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2290 gen_op_iwmmxt_movq_wRn_M0(wrd);
2291 gen_op_iwmmxt_set_mup();
2292 gen_op_iwmmxt_set_cup();
2294 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2295 case 0x408: case 0x508: case 0x608: case 0x708:
2296 case 0x808: case 0x908: case 0xa08: case 0xb08:
2297 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2298 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2300 wrd = (insn >> 12) & 0xf;
2301 rd0 = (insn >> 16) & 0xf;
2302 rd1 = (insn >> 0) & 0xf;
2303 gen_op_iwmmxt_movq_M0_wRn(rd0);
2304 switch ((insn >> 22) & 3) {
2306 if (insn & (1 << 21))
2307 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2309 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2312 if (insn & (1 << 21))
2313 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2315 gen_op_iwmmxt_packul_M0_wRn(rd1);
2318 if (insn & (1 << 21))
2319 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2321 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2324 gen_op_iwmmxt_movq_wRn_M0(wrd);
2325 gen_op_iwmmxt_set_mup();
2326 gen_op_iwmmxt_set_cup();
2328 case 0x201: case 0x203: case 0x205: case 0x207:
2329 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2330 case 0x211: case 0x213: case 0x215: case 0x217:
2331 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2332 wrd = (insn >> 5) & 0xf;
2333 rd0 = (insn >> 12) & 0xf;
2334 rd1 = (insn >> 0) & 0xf;
2335 if (rd0 == 0xf || rd1 == 0xf)
2337 gen_op_iwmmxt_movq_M0_wRn(wrd);
2338 tmp = load_reg(s, rd0);
2339 tmp2 = load_reg(s, rd1);
2340 switch ((insn >> 16) & 0xf) {
2341 case 0x0: /* TMIA */
2342 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2344 case 0x8: /* TMIAPH */
2345 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2347 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2348 if (insn & (1 << 16))
2349 tcg_gen_shri_i32(tmp, tmp, 16);
2350 if (insn & (1 << 17))
2351 tcg_gen_shri_i32(tmp2, tmp2, 16);
2352 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2355 tcg_temp_free_i32(tmp2);
2356 tcg_temp_free_i32(tmp);
2359 tcg_temp_free_i32(tmp2);
2360 tcg_temp_free_i32(tmp);
2361 gen_op_iwmmxt_movq_wRn_M0(wrd);
2362 gen_op_iwmmxt_set_mup();
2371 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2372 (ie. an undefined instruction). */
2373 static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2375 int acc, rd0, rd1, rdhi, rdlo;
2378 if ((insn & 0x0ff00f10) == 0x0e200010) {
2379 /* Multiply with Internal Accumulate Format */
2380 rd0 = (insn >> 12) & 0xf;
2382 acc = (insn >> 5) & 7;
2387 tmp = load_reg(s, rd0);
2388 tmp2 = load_reg(s, rd1);
2389 switch ((insn >> 16) & 0xf) {
2391 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2393 case 0x8: /* MIAPH */
2394 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2396 case 0xc: /* MIABB */
2397 case 0xd: /* MIABT */
2398 case 0xe: /* MIATB */
2399 case 0xf: /* MIATT */
2400 if (insn & (1 << 16))
2401 tcg_gen_shri_i32(tmp, tmp, 16);
2402 if (insn & (1 << 17))
2403 tcg_gen_shri_i32(tmp2, tmp2, 16);
2404 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2409 tcg_temp_free_i32(tmp2);
2410 tcg_temp_free_i32(tmp);
2412 gen_op_iwmmxt_movq_wRn_M0(acc);
2416 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2417 /* Internal Accumulator Access Format */
2418 rdhi = (insn >> 16) & 0xf;
2419 rdlo = (insn >> 12) & 0xf;
2425 if (insn & ARM_CP_RW_BIT) { /* MRA */
2426 iwmmxt_load_reg(cpu_V0, acc);
2427 tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
2428 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2429 tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
2430 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2432 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2433 iwmmxt_store_reg(cpu_V0, acc);
2441 /* Disassemble system coprocessor instruction. Return nonzero if
2442 instruction is not defined. */
2443 static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2446 uint32_t rd = (insn >> 12) & 0xf;
2447 uint32_t cp = (insn >> 8) & 0xf;
2452 if (insn & ARM_CP_RW_BIT) {
2453 if (!env->cp[cp].cp_read)
2455 gen_set_pc_im(s->pc);
2456 tmp = tcg_temp_new_i32();
2457 tmp2 = tcg_const_i32(insn);
2458 gen_helper_get_cp(tmp, cpu_env, tmp2);
2459 tcg_temp_free(tmp2);
2460 store_reg(s, rd, tmp);
2462 if (!env->cp[cp].cp_write)
2464 gen_set_pc_im(s->pc);
2465 tmp = load_reg(s, rd);
2466 tmp2 = tcg_const_i32(insn);
2467 gen_helper_set_cp(cpu_env, tmp2, tmp);
2468 tcg_temp_free(tmp2);
2469 tcg_temp_free_i32(tmp);
2474 static int cp15_user_ok(CPUState *env, uint32_t insn)
2476 int cpn = (insn >> 16) & 0xf;
2477 int cpm = insn & 0xf;
2478 int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
2480 if (arm_feature(env, ARM_FEATURE_V7) && cpn == 9) {
2481 /* Performance monitor registers fall into three categories:
2482 * (a) always UNDEF in usermode
2483 * (b) UNDEF only if PMUSERENR.EN is 0
2484 * (c) always read OK and UNDEF on write (PMUSERENR only)
2486 if ((cpm == 12 && (op < 6)) ||
2487 (cpm == 13 && (op < 3))) {
2488 return env->cp15.c9_pmuserenr;
2489 } else if (cpm == 14 && op == 0 && (insn & ARM_CP_RW_BIT)) {
2490 /* PMUSERENR, read only */
2496 if (cpn == 13 && cpm == 0) {
2498 if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
2502 /* ISB, DSB, DMB. */
2503 if ((cpm == 5 && op == 4)
2504 || (cpm == 10 && (op == 4 || op == 5)))
2510 static int cp15_tls_load_store(CPUState *env, DisasContext *s, uint32_t insn, uint32_t rd)
2513 int cpn = (insn >> 16) & 0xf;
2514 int cpm = insn & 0xf;
2515 int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
2517 if (!arm_feature(env, ARM_FEATURE_V6K))
2520 if (!(cpn == 13 && cpm == 0))
2523 if (insn & ARM_CP_RW_BIT) {
2526 tmp = load_cpu_field(cp15.c13_tls1);
2529 tmp = load_cpu_field(cp15.c13_tls2);
2532 tmp = load_cpu_field(cp15.c13_tls3);
2537 store_reg(s, rd, tmp);
2540 tmp = load_reg(s, rd);
2543 store_cpu_field(tmp, cp15.c13_tls1);
2546 store_cpu_field(tmp, cp15.c13_tls2);
2549 store_cpu_field(tmp, cp15.c13_tls3);
2552 tcg_temp_free_i32(tmp);
2559 /* Disassemble system coprocessor (cp15) instruction. Return nonzero if
2560 instruction is not defined. */
2561 static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
2566 /* M profile cores use memory mapped registers instead of cp15. */
2567 if (arm_feature(env, ARM_FEATURE_M))
2570 if ((insn & (1 << 25)) == 0) {
2571 if (insn & (1 << 20)) {
2575 /* mcrr. Used for block cache operations, so implement as no-op. */
2578 if ((insn & (1 << 4)) == 0) {
2582 if (IS_USER(s) && !cp15_user_ok(env, insn)) {
2586 /* Pre-v7 versions of the architecture implemented WFI via coprocessor
2587 * instructions rather than a separate instruction.
2589 if ((insn & 0x0fff0fff) == 0x0e070f90) {
2590 /* 0,c7,c0,4: Standard v6 WFI (also used in some pre-v6 cores).
2591 * In v7, this must NOP.
2593 if (!arm_feature(env, ARM_FEATURE_V7)) {
2594 /* Wait for interrupt. */
2595 gen_set_pc_im(s->pc);
2596 s->is_jmp = DISAS_WFI;
2601 if ((insn & 0x0fff0fff) == 0x0e070f58) {
2602 /* 0,c7,c8,2: Not all pre-v6 cores implemented this WFI,
2603 * so this is slightly over-broad.
2605 if (!arm_feature(env, ARM_FEATURE_V6)) {
2606 /* Wait for interrupt. */
2607 gen_set_pc_im(s->pc);
2608 s->is_jmp = DISAS_WFI;
2611 /* Otherwise fall through to handle via helper function.
2612 * In particular, on v7 and some v6 cores this is one of
2613 * the VA-PA registers.
2617 rd = (insn >> 12) & 0xf;
2619 if (cp15_tls_load_store(env, s, insn, rd))
2622 tmp2 = tcg_const_i32(insn);
2623 if (insn & ARM_CP_RW_BIT) {
2624 tmp = tcg_temp_new_i32();
2625 gen_helper_get_cp15(tmp, cpu_env, tmp2);
2626 /* If the destination register is r15 then sets condition codes. */
2628 store_reg(s, rd, tmp);
2630 tcg_temp_free_i32(tmp);
2632 tmp = load_reg(s, rd);
2633 gen_helper_set_cp15(cpu_env, tmp2, tmp);
2634 tcg_temp_free_i32(tmp);
2635 /* Normally we would always end the TB here, but Linux
2636 * arch/arm/mach-pxa/sleep.S expects two instructions following
2637 * an MMU enable to execute from cache. Imitate this behaviour. */
2638 if (!arm_feature(env, ARM_FEATURE_XSCALE) ||
2639 (insn & 0x0fff0fff) != 0x0e010f10)
2642 tcg_temp_free_i32(tmp2);
2646 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2647 #define VFP_SREG(insn, bigbit, smallbit) \
2648 ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2649 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2650 if (arm_feature(env, ARM_FEATURE_VFP3)) { \
2651 reg = (((insn) >> (bigbit)) & 0x0f) \
2652 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2654 if (insn & (1 << (smallbit))) \
2656 reg = ((insn) >> (bigbit)) & 0x0f; \
2659 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2660 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2661 #define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
2662 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2663 #define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
2664 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2666 /* Move between integer and VFP cores. */
2667 static TCGv gen_vfp_mrs(void)
2669 TCGv tmp = tcg_temp_new_i32();
2670 tcg_gen_mov_i32(tmp, cpu_F0s);
2674 static void gen_vfp_msr(TCGv tmp)
2676 tcg_gen_mov_i32(cpu_F0s, tmp);
2677 tcg_temp_free_i32(tmp);
2680 static void gen_neon_dup_u8(TCGv var, int shift)
2682 TCGv tmp = tcg_temp_new_i32();
2684 tcg_gen_shri_i32(var, var, shift);
2685 tcg_gen_ext8u_i32(var, var);
2686 tcg_gen_shli_i32(tmp, var, 8);
2687 tcg_gen_or_i32(var, var, tmp);
2688 tcg_gen_shli_i32(tmp, var, 16);
2689 tcg_gen_or_i32(var, var, tmp);
2690 tcg_temp_free_i32(tmp);
2693 static void gen_neon_dup_low16(TCGv var)
2695 TCGv tmp = tcg_temp_new_i32();
2696 tcg_gen_ext16u_i32(var, var);
2697 tcg_gen_shli_i32(tmp, var, 16);
2698 tcg_gen_or_i32(var, var, tmp);
2699 tcg_temp_free_i32(tmp);
2702 static void gen_neon_dup_high16(TCGv var)
2704 TCGv tmp = tcg_temp_new_i32();
2705 tcg_gen_andi_i32(var, var, 0xffff0000);
2706 tcg_gen_shri_i32(tmp, var, 16);
2707 tcg_gen_or_i32(var, var, tmp);
2708 tcg_temp_free_i32(tmp);
2711 static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size)
2713 /* Load a single Neon element and replicate into a 32 bit TCG reg */
2717 tmp = gen_ld8u(addr, IS_USER(s));
2718 gen_neon_dup_u8(tmp, 0);
2721 tmp = gen_ld16u(addr, IS_USER(s));
2722 gen_neon_dup_low16(tmp);
2725 tmp = gen_ld32(addr, IS_USER(s));
2727 default: /* Avoid compiler warnings. */
2733 /* Disassemble a VFP instruction. Returns nonzero if an error occurred
2734 (ie. an undefined instruction). */
2735 static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
2737 uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
2743 if (!arm_feature(env, ARM_FEATURE_VFP))
2746 if (!s->vfp_enabled) {
2747 /* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */
2748 if ((insn & 0x0fe00fff) != 0x0ee00a10)
2750 rn = (insn >> 16) & 0xf;
2751 if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
2752 && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
2755 dp = ((insn & 0xf00) == 0xb00);
2756 switch ((insn >> 24) & 0xf) {
2758 if (insn & (1 << 4)) {
2759 /* single register transfer */
2760 rd = (insn >> 12) & 0xf;
2765 VFP_DREG_N(rn, insn);
2768 if (insn & 0x00c00060
2769 && !arm_feature(env, ARM_FEATURE_NEON))
2772 pass = (insn >> 21) & 1;
2773 if (insn & (1 << 22)) {
2775 offset = ((insn >> 5) & 3) * 8;
2776 } else if (insn & (1 << 5)) {
2778 offset = (insn & (1 << 6)) ? 16 : 0;
2783 if (insn & ARM_CP_RW_BIT) {
2785 tmp = neon_load_reg(rn, pass);
2789 tcg_gen_shri_i32(tmp, tmp, offset);
2790 if (insn & (1 << 23))
2796 if (insn & (1 << 23)) {
2798 tcg_gen_shri_i32(tmp, tmp, 16);
2804 tcg_gen_sari_i32(tmp, tmp, 16);
2813 store_reg(s, rd, tmp);
2816 tmp = load_reg(s, rd);
2817 if (insn & (1 << 23)) {
2820 gen_neon_dup_u8(tmp, 0);
2821 } else if (size == 1) {
2822 gen_neon_dup_low16(tmp);
2824 for (n = 0; n <= pass * 2; n++) {
2825 tmp2 = tcg_temp_new_i32();
2826 tcg_gen_mov_i32(tmp2, tmp);
2827 neon_store_reg(rn, n, tmp2);
2829 neon_store_reg(rn, n, tmp);
2834 tmp2 = neon_load_reg(rn, pass);
2835 gen_bfi(tmp, tmp2, tmp, offset, 0xff);
2836 tcg_temp_free_i32(tmp2);
2839 tmp2 = neon_load_reg(rn, pass);
2840 gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
2841 tcg_temp_free_i32(tmp2);
2846 neon_store_reg(rn, pass, tmp);
2850 if ((insn & 0x6f) != 0x00)
2852 rn = VFP_SREG_N(insn);
2853 if (insn & ARM_CP_RW_BIT) {
2855 if (insn & (1 << 21)) {
2856 /* system register */
2861 /* VFP2 allows access to FSID from userspace.
2862 VFP3 restricts all id registers to privileged
2865 && arm_feature(env, ARM_FEATURE_VFP3))
2867 tmp = load_cpu_field(vfp.xregs[rn]);
2872 tmp = load_cpu_field(vfp.xregs[rn]);
2874 case ARM_VFP_FPINST:
2875 case ARM_VFP_FPINST2:
2876 /* Not present in VFP3. */
2878 || arm_feature(env, ARM_FEATURE_VFP3))
2880 tmp = load_cpu_field(vfp.xregs[rn]);
2884 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
2885 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
2887 tmp = tcg_temp_new_i32();
2888 gen_helper_vfp_get_fpscr(tmp, cpu_env);
2894 || !arm_feature(env, ARM_FEATURE_VFP3))
2896 tmp = load_cpu_field(vfp.xregs[rn]);
2902 gen_mov_F0_vreg(0, rn);
2903 tmp = gen_vfp_mrs();
2906 /* Set the 4 flag bits in the CPSR. */
2908 tcg_temp_free_i32(tmp);
2910 store_reg(s, rd, tmp);
2914 tmp = load_reg(s, rd);
2915 if (insn & (1 << 21)) {
2917 /* system register */
2922 /* Writes are ignored. */
2925 gen_helper_vfp_set_fpscr(cpu_env, tmp);
2926 tcg_temp_free_i32(tmp);
2932 /* TODO: VFP subarchitecture support.
2933 * For now, keep the EN bit only */
2934 tcg_gen_andi_i32(tmp, tmp, 1 << 30);
2935 store_cpu_field(tmp, vfp.xregs[rn]);
2938 case ARM_VFP_FPINST:
2939 case ARM_VFP_FPINST2:
2940 store_cpu_field(tmp, vfp.xregs[rn]);
2947 gen_mov_vreg_F0(0, rn);
2952 /* data processing */
2953 /* The opcode is in bits 23, 21, 20 and 6. */
2954 op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
2958 rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
2960 /* rn is register number */
2961 VFP_DREG_N(rn, insn);
2964 if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) {
2965 /* Integer or single precision destination. */
2966 rd = VFP_SREG_D(insn);
2968 VFP_DREG_D(rd, insn);
2971 (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) {
2972 /* VCVT from int is always from S reg regardless of dp bit.
2973 * VCVT with immediate frac_bits has same format as SREG_M
2975 rm = VFP_SREG_M(insn);
2977 VFP_DREG_M(rm, insn);
2980 rn = VFP_SREG_N(insn);
2981 if (op == 15 && rn == 15) {
2982 /* Double precision destination. */
2983 VFP_DREG_D(rd, insn);
2985 rd = VFP_SREG_D(insn);
2987 /* NB that we implicitly rely on the encoding for the frac_bits
2988 * in VCVT of fixed to float being the same as that of an SREG_M
2990 rm = VFP_SREG_M(insn);
2993 veclen = s->vec_len;
2994 if (op == 15 && rn > 3)
2997 /* Shut up compiler warnings. */
3008 /* Figure out what type of vector operation this is. */
3009 if ((rd & bank_mask) == 0) {
3014 delta_d = (s->vec_stride >> 1) + 1;
3016 delta_d = s->vec_stride + 1;
3018 if ((rm & bank_mask) == 0) {
3019 /* mixed scalar/vector */
3028 /* Load the initial operands. */
3033 /* Integer source */
3034 gen_mov_F0_vreg(0, rm);
3039 gen_mov_F0_vreg(dp, rd);
3040 gen_mov_F1_vreg(dp, rm);
3044 /* Compare with zero */
3045 gen_mov_F0_vreg(dp, rd);
3056 /* Source and destination the same. */
3057 gen_mov_F0_vreg(dp, rd);
3060 /* One source operand. */
3061 gen_mov_F0_vreg(dp, rm);
3065 /* Two source operands. */
3066 gen_mov_F0_vreg(dp, rn);
3067 gen_mov_F1_vreg(dp, rm);
3071 /* Perform the calculation. */
3073 case 0: /* VMLA: fd + (fn * fm) */
3074 /* Note that order of inputs to the add matters for NaNs */
3076 gen_mov_F0_vreg(dp, rd);
3079 case 1: /* VMLS: fd + -(fn * fm) */
3082 gen_mov_F0_vreg(dp, rd);
3085 case 2: /* VNMLS: -fd + (fn * fm) */
3086 /* Note that it isn't valid to replace (-A + B) with (B - A)
3087 * or similar plausible looking simplifications
3088 * because this will give wrong results for NaNs.
3091 gen_mov_F0_vreg(dp, rd);
3095 case 3: /* VNMLA: -fd + -(fn * fm) */
3098 gen_mov_F0_vreg(dp, rd);
3102 case 4: /* mul: fn * fm */
3105 case 5: /* nmul: -(fn * fm) */
3109 case 6: /* add: fn + fm */
3112 case 7: /* sub: fn - fm */
3115 case 8: /* div: fn / fm */
3118 case 14: /* fconst */
3119 if (!arm_feature(env, ARM_FEATURE_VFP3))
3122 n = (insn << 12) & 0x80000000;
3123 i = ((insn >> 12) & 0x70) | (insn & 0xf);
3130 tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3137 tcg_gen_movi_i32(cpu_F0s, n);
3140 case 15: /* extension space */
3154 case 4: /* vcvtb.f32.f16 */
3155 if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
3157 tmp = gen_vfp_mrs();
3158 tcg_gen_ext16u_i32(tmp, tmp);
3159 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
3160 tcg_temp_free_i32(tmp);
3162 case 5: /* vcvtt.f32.f16 */
3163 if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
3165 tmp = gen_vfp_mrs();
3166 tcg_gen_shri_i32(tmp, tmp, 16);
3167 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
3168 tcg_temp_free_i32(tmp);
3170 case 6: /* vcvtb.f16.f32 */
3171 if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
3173 tmp = tcg_temp_new_i32();
3174 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
3175 gen_mov_F0_vreg(0, rd);
3176 tmp2 = gen_vfp_mrs();
3177 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3178 tcg_gen_or_i32(tmp, tmp, tmp2);
3179 tcg_temp_free_i32(tmp2);
3182 case 7: /* vcvtt.f16.f32 */
3183 if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
3185 tmp = tcg_temp_new_i32();
3186 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
3187 tcg_gen_shli_i32(tmp, tmp, 16);
3188 gen_mov_F0_vreg(0, rd);
3189 tmp2 = gen_vfp_mrs();
3190 tcg_gen_ext16u_i32(tmp2, tmp2);
3191 tcg_gen_or_i32(tmp, tmp, tmp2);
3192 tcg_temp_free_i32(tmp2);
3204 case 11: /* cmpez */
3208 case 15: /* single<->double conversion */
3210 gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3212 gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3214 case 16: /* fuito */
3215 gen_vfp_uito(dp, 0);
3217 case 17: /* fsito */
3218 gen_vfp_sito(dp, 0);
3220 case 20: /* fshto */
3221 if (!arm_feature(env, ARM_FEATURE_VFP3))
3223 gen_vfp_shto(dp, 16 - rm, 0);
3225 case 21: /* fslto */
3226 if (!arm_feature(env, ARM_FEATURE_VFP3))
3228 gen_vfp_slto(dp, 32 - rm, 0);
3230 case 22: /* fuhto */
3231 if (!arm_feature(env, ARM_FEATURE_VFP3))
3233 gen_vfp_uhto(dp, 16 - rm, 0);
3235 case 23: /* fulto */
3236 if (!arm_feature(env, ARM_FEATURE_VFP3))
3238 gen_vfp_ulto(dp, 32 - rm, 0);
3240 case 24: /* ftoui */
3241 gen_vfp_toui(dp, 0);
3243 case 25: /* ftouiz */
3244 gen_vfp_touiz(dp, 0);
3246 case 26: /* ftosi */
3247 gen_vfp_tosi(dp, 0);
3249 case 27: /* ftosiz */
3250 gen_vfp_tosiz(dp, 0);
3252 case 28: /* ftosh */
3253 if (!arm_feature(env, ARM_FEATURE_VFP3))
3255 gen_vfp_tosh(dp, 16 - rm, 0);
3257 case 29: /* ftosl */
3258 if (!arm_feature(env, ARM_FEATURE_VFP3))
3260 gen_vfp_tosl(dp, 32 - rm, 0);
3262 case 30: /* ftouh */
3263 if (!arm_feature(env, ARM_FEATURE_VFP3))
3265 gen_vfp_touh(dp, 16 - rm, 0);
3267 case 31: /* ftoul */
3268 if (!arm_feature(env, ARM_FEATURE_VFP3))
3270 gen_vfp_toul(dp, 32 - rm, 0);
3272 default: /* undefined */
3273 printf ("rn:%d\n", rn);
3277 default: /* undefined */
3278 printf ("op:%d\n", op);
3282 /* Write back the result. */
3283 if (op == 15 && (rn >= 8 && rn <= 11))
3284 ; /* Comparison, do nothing. */
3285 else if (op == 15 && dp && ((rn & 0x1c) == 0x18))
3286 /* VCVT double to int: always integer result. */
3287 gen_mov_vreg_F0(0, rd);
3288 else if (op == 15 && rn == 15)
3290 gen_mov_vreg_F0(!dp, rd);
3292 gen_mov_vreg_F0(dp, rd);
3294 /* break out of the loop if we have finished */
3298 if (op == 15 && delta_m == 0) {
3299 /* single source one-many */
3301 rd = ((rd + delta_d) & (bank_mask - 1))
3303 gen_mov_vreg_F0(dp, rd);
3307 /* Setup the next operands. */
3309 rd = ((rd + delta_d) & (bank_mask - 1))
3313 /* One source operand. */
3314 rm = ((rm + delta_m) & (bank_mask - 1))
3316 gen_mov_F0_vreg(dp, rm);
3318 /* Two source operands. */
3319 rn = ((rn + delta_d) & (bank_mask - 1))
3321 gen_mov_F0_vreg(dp, rn);
3323 rm = ((rm + delta_m) & (bank_mask - 1))
3325 gen_mov_F1_vreg(dp, rm);
3333 if ((insn & 0x03e00000) == 0x00400000) {
3334 /* two-register transfer */
3335 rn = (insn >> 16) & 0xf;
3336 rd = (insn >> 12) & 0xf;
3338 VFP_DREG_M(rm, insn);
3340 rm = VFP_SREG_M(insn);
3343 if (insn & ARM_CP_RW_BIT) {
3346 gen_mov_F0_vreg(0, rm * 2);
3347 tmp = gen_vfp_mrs();
3348 store_reg(s, rd, tmp);
3349 gen_mov_F0_vreg(0, rm * 2 + 1);
3350 tmp = gen_vfp_mrs();
3351 store_reg(s, rn, tmp);
3353 gen_mov_F0_vreg(0, rm);
3354 tmp = gen_vfp_mrs();
3355 store_reg(s, rd, tmp);
3356 gen_mov_F0_vreg(0, rm + 1);
3357 tmp = gen_vfp_mrs();
3358 store_reg(s, rn, tmp);
3363 tmp = load_reg(s, rd);
3365 gen_mov_vreg_F0(0, rm * 2);
3366 tmp = load_reg(s, rn);
3368 gen_mov_vreg_F0(0, rm * 2 + 1);
3370 tmp = load_reg(s, rd);
3372 gen_mov_vreg_F0(0, rm);
3373 tmp = load_reg(s, rn);
3375 gen_mov_vreg_F0(0, rm + 1);
3380 rn = (insn >> 16) & 0xf;
3382 VFP_DREG_D(rd, insn);
3384 rd = VFP_SREG_D(insn);
3385 if (s->thumb && rn == 15) {
3386 addr = tcg_temp_new_i32();
3387 tcg_gen_movi_i32(addr, s->pc & ~2);
3389 addr = load_reg(s, rn);
3391 if ((insn & 0x01200000) == 0x01000000) {
3392 /* Single load/store */
3393 offset = (insn & 0xff) << 2;
3394 if ((insn & (1 << 23)) == 0)
3396 tcg_gen_addi_i32(addr, addr, offset);
3397 if (insn & (1 << 20)) {
3398 gen_vfp_ld(s, dp, addr);
3399 gen_mov_vreg_F0(dp, rd);
3401 gen_mov_F0_vreg(dp, rd);
3402 gen_vfp_st(s, dp, addr);
3404 tcg_temp_free_i32(addr);
3406 /* load/store multiple */
3408 n = (insn >> 1) & 0x7f;
3412 if (insn & (1 << 24)) /* pre-decrement */
3413 tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
3419 for (i = 0; i < n; i++) {
3420 if (insn & ARM_CP_RW_BIT) {
3422 gen_vfp_ld(s, dp, addr);
3423 gen_mov_vreg_F0(dp, rd + i);
3426 gen_mov_F0_vreg(dp, rd + i);
3427 gen_vfp_st(s, dp, addr);
3429 tcg_gen_addi_i32(addr, addr, offset);
3431 if (insn & (1 << 21)) {
3433 if (insn & (1 << 24))
3434 offset = -offset * n;
3435 else if (dp && (insn & 1))
3441 tcg_gen_addi_i32(addr, addr, offset);
3442 store_reg(s, rn, addr);
3444 tcg_temp_free_i32(addr);
3450 /* Should never happen. */
3456 static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
3458 TranslationBlock *tb;
3461 if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3463 gen_set_pc_im(dest);
3464 tcg_gen_exit_tb((tcg_target_long)tb + n);
3466 gen_set_pc_im(dest);
3471 static inline void gen_jmp (DisasContext *s, uint32_t dest)
3473 if (unlikely(s->singlestep_enabled)) {
3474 /* An indirect jump so that we still trigger the debug exception. */
3479 gen_goto_tb(s, 0, dest);
3480 s->is_jmp = DISAS_TB_JUMP;
3484 static inline void gen_mulxy(TCGv t0, TCGv t1, int x, int y)
3487 tcg_gen_sari_i32(t0, t0, 16);
3491 tcg_gen_sari_i32(t1, t1, 16);
3494 tcg_gen_mul_i32(t0, t0, t1);
3497 /* Return the mask of PSR bits set by a MSR instruction. */
3498 static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
3502 if (flags & (1 << 0))
3504 if (flags & (1 << 1))
3506 if (flags & (1 << 2))
3508 if (flags & (1 << 3))
3511 /* Mask out undefined bits. */
3512 mask &= ~CPSR_RESERVED;
3513 if (!arm_feature(env, ARM_FEATURE_V4T))
3515 if (!arm_feature(env, ARM_FEATURE_V5))
3516 mask &= ~CPSR_Q; /* V5TE in reality*/
3517 if (!arm_feature(env, ARM_FEATURE_V6))
3518 mask &= ~(CPSR_E | CPSR_GE);
3519 if (!arm_feature(env, ARM_FEATURE_THUMB2))
3521 /* Mask out execution state bits. */
3524 /* Mask out privileged bits. */
3530 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
3531 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0)
3535 /* ??? This is also undefined in system mode. */
3539 tmp = load_cpu_field(spsr);
3540 tcg_gen_andi_i32(tmp, tmp, ~mask);
3541 tcg_gen_andi_i32(t0, t0, mask);
3542 tcg_gen_or_i32(tmp, tmp, t0);
3543 store_cpu_field(tmp, spsr);
3545 gen_set_cpsr(t0, mask);
3547 tcg_temp_free_i32(t0);
3552 /* Returns nonzero if access to the PSR is not permitted. */
3553 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
3556 tmp = tcg_temp_new_i32();
3557 tcg_gen_movi_i32(tmp, val);
3558 return gen_set_psr(s, mask, spsr, tmp);
3561 /* Generate an old-style exception return. Marks pc as dead. */
3562 static void gen_exception_return(DisasContext *s, TCGv pc)
3565 store_reg(s, 15, pc);
3566 tmp = load_cpu_field(spsr);
3567 gen_set_cpsr(tmp, 0xffffffff);
3568 tcg_temp_free_i32(tmp);
3569 s->is_jmp = DISAS_UPDATE;
3572 /* Generate a v6 exception return. Marks both values as dead. */
3573 static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
3575 gen_set_cpsr(cpsr, 0xffffffff);
3576 tcg_temp_free_i32(cpsr);
3577 store_reg(s, 15, pc);
3578 s->is_jmp = DISAS_UPDATE;
3582 gen_set_condexec (DisasContext *s)
3584 if (s->condexec_mask) {
3585 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
3586 TCGv tmp = tcg_temp_new_i32();
3587 tcg_gen_movi_i32(tmp, val);
3588 store_cpu_field(tmp, condexec_bits);
3592 static void gen_exception_insn(DisasContext *s, int offset, int excp)
3594 gen_set_condexec(s);
3595 gen_set_pc_im(s->pc - offset);
3596 gen_exception(excp);
3597 s->is_jmp = DISAS_JUMP;
3600 static void gen_nop_hint(DisasContext *s, int val)
3604 gen_set_pc_im(s->pc);
3605 s->is_jmp = DISAS_WFI;
3609 /* TODO: Implement SEV and WFE. May help SMP performance. */
3615 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3617 static inline void gen_neon_add(int size, TCGv t0, TCGv t1)
3620 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
3621 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
3622 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3627 static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1)
3630 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3631 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3632 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3637 /* 32-bit pairwise ops end up the same as the elementwise versions. */
3638 #define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
3639 #define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
3640 #define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
3641 #define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
3643 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3644 switch ((size << 1) | u) { \
3646 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3649 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3652 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3655 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3658 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3661 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3663 default: return 1; \
3666 #define GEN_NEON_INTEGER_OP(name) do { \
3667 switch ((size << 1) | u) { \
3669 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3672 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3675 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3678 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3681 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3684 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3686 default: return 1; \
3689 static TCGv neon_load_scratch(int scratch)
3691 TCGv tmp = tcg_temp_new_i32();
3692 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3696 static void neon_store_scratch(int scratch, TCGv var)
3698 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3699 tcg_temp_free_i32(var);
3702 static inline TCGv neon_get_scalar(int size, int reg)
3706 tmp = neon_load_reg(reg & 7, reg >> 4);
3708 gen_neon_dup_high16(tmp);
3710 gen_neon_dup_low16(tmp);
3713 tmp = neon_load_reg(reg & 15, reg >> 4);
3718 static int gen_neon_unzip(int rd, int rm, int size, int q)
3721 if (!q && size == 2) {
3724 tmp = tcg_const_i32(rd);
3725 tmp2 = tcg_const_i32(rm);
3729 gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
3732 gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
3735 gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
3743 gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
3746 gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
3752 tcg_temp_free_i32(tmp);
3753 tcg_temp_free_i32(tmp2);
3757 static int gen_neon_zip(int rd, int rm, int size, int q)
3760 if (!q && size == 2) {
3763 tmp = tcg_const_i32(rd);
3764 tmp2 = tcg_const_i32(rm);
3768 gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
3771 gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
3774 gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
3782 gen_helper_neon_zip8(cpu_env, tmp, tmp2);
3785 gen_helper_neon_zip16(cpu_env, tmp, tmp2);
3791 tcg_temp_free_i32(tmp);
3792 tcg_temp_free_i32(tmp2);
3796 static void gen_neon_trn_u8(TCGv t0, TCGv t1)
3800 rd = tcg_temp_new_i32();
3801 tmp = tcg_temp_new_i32();
3803 tcg_gen_shli_i32(rd, t0, 8);
3804 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3805 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3806 tcg_gen_or_i32(rd, rd, tmp);
3808 tcg_gen_shri_i32(t1, t1, 8);
3809 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3810 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3811 tcg_gen_or_i32(t1, t1, tmp);
3812 tcg_gen_mov_i32(t0, rd);
3814 tcg_temp_free_i32(tmp);
3815 tcg_temp_free_i32(rd);
3818 static void gen_neon_trn_u16(TCGv t0, TCGv t1)
3822 rd = tcg_temp_new_i32();
3823 tmp = tcg_temp_new_i32();
3825 tcg_gen_shli_i32(rd, t0, 16);
3826 tcg_gen_andi_i32(tmp, t1, 0xffff);
3827 tcg_gen_or_i32(rd, rd, tmp);
3828 tcg_gen_shri_i32(t1, t1, 16);
3829 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3830 tcg_gen_or_i32(t1, t1, tmp);
3831 tcg_gen_mov_i32(t0, rd);
3833 tcg_temp_free_i32(tmp);
3834 tcg_temp_free_i32(rd);
3842 } neon_ls_element_type[11] = {
3856 /* Translate a NEON load/store element instruction. Return nonzero if the
3857 instruction is invalid. */
3858 static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3877 if (!s->vfp_enabled)
3879 VFP_DREG_D(rd, insn);
3880 rn = (insn >> 16) & 0xf;
3882 load = (insn & (1 << 21)) != 0;
3883 if ((insn & (1 << 23)) == 0) {
3884 /* Load store all elements. */
3885 op = (insn >> 8) & 0xf;
3886 size = (insn >> 6) & 3;
3889 /* Catch UNDEF cases for bad values of align field */
3892 if (((insn >> 5) & 1) == 1) {
3897 if (((insn >> 4) & 3) == 3) {
3904 nregs = neon_ls_element_type[op].nregs;
3905 interleave = neon_ls_element_type[op].interleave;
3906 spacing = neon_ls_element_type[op].spacing;
3907 if (size == 3 && (interleave | spacing) != 1)
3909 addr = tcg_temp_new_i32();
3910 load_reg_var(s, addr, rn);
3911 stride = (1 << size) * interleave;
3912 for (reg = 0; reg < nregs; reg++) {
3913 if (interleave > 2 || (interleave == 2 && nregs == 2)) {
3914 load_reg_var(s, addr, rn);
3915 tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
3916 } else if (interleave == 2 && nregs == 4 && reg == 2) {
3917 load_reg_var(s, addr, rn);
3918 tcg_gen_addi_i32(addr, addr, 1 << size);
3922 tmp64 = gen_ld64(addr, IS_USER(s));
3923 neon_store_reg64(tmp64, rd);
3924 tcg_temp_free_i64(tmp64);
3926 tmp64 = tcg_temp_new_i64();
3927 neon_load_reg64(tmp64, rd);
3928 gen_st64(tmp64, addr, IS_USER(s));
3930 tcg_gen_addi_i32(addr, addr, stride);
3932 for (pass = 0; pass < 2; pass++) {
3935 tmp = gen_ld32(addr, IS_USER(s));
3936 neon_store_reg(rd, pass, tmp);
3938 tmp = neon_load_reg(rd, pass);
3939 gen_st32(tmp, addr, IS_USER(s));
3941 tcg_gen_addi_i32(addr, addr, stride);
3942 } else if (size == 1) {
3944 tmp = gen_ld16u(addr, IS_USER(s));
3945 tcg_gen_addi_i32(addr, addr, stride);
3946 tmp2 = gen_ld16u(addr, IS_USER(s));
3947 tcg_gen_addi_i32(addr, addr, stride);
3948 tcg_gen_shli_i32(tmp2, tmp2, 16);
3949 tcg_gen_or_i32(tmp, tmp, tmp2);
3950 tcg_temp_free_i32(tmp2);
3951 neon_store_reg(rd, pass, tmp);
3953 tmp = neon_load_reg(rd, pass);
3954 tmp2 = tcg_temp_new_i32();
3955 tcg_gen_shri_i32(tmp2, tmp, 16);
3956 gen_st16(tmp, addr, IS_USER(s));
3957 tcg_gen_addi_i32(addr, addr, stride);
3958 gen_st16(tmp2, addr, IS_USER(s));
3959 tcg_gen_addi_i32(addr, addr, stride);
3961 } else /* size == 0 */ {
3964 for (n = 0; n < 4; n++) {
3965 tmp = gen_ld8u(addr, IS_USER(s));
3966 tcg_gen_addi_i32(addr, addr, stride);
3970 tcg_gen_shli_i32(tmp, tmp, n * 8);
3971 tcg_gen_or_i32(tmp2, tmp2, tmp);
3972 tcg_temp_free_i32(tmp);
3975 neon_store_reg(rd, pass, tmp2);
3977 tmp2 = neon_load_reg(rd, pass);
3978 for (n = 0; n < 4; n++) {
3979 tmp = tcg_temp_new_i32();
3981 tcg_gen_mov_i32(tmp, tmp2);
3983 tcg_gen_shri_i32(tmp, tmp2, n * 8);
3985 gen_st8(tmp, addr, IS_USER(s));
3986 tcg_gen_addi_i32(addr, addr, stride);
3988 tcg_temp_free_i32(tmp2);
3995 tcg_temp_free_i32(addr);
3998 size = (insn >> 10) & 3;
4000 /* Load single element to all lanes. */
4001 int a = (insn >> 4) & 1;
4005 size = (insn >> 6) & 3;
4006 nregs = ((insn >> 8) & 3) + 1;
4009 if (nregs != 4 || a == 0) {
4012 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
4015 if (nregs == 1 && a == 1 && size == 0) {
4018 if (nregs == 3 && a == 1) {
4021 addr = tcg_temp_new_i32();
4022 load_reg_var(s, addr, rn);
4024 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
4025 tmp = gen_load_and_replicate(s, addr, size);
4026 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4027 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4028 if (insn & (1 << 5)) {
4029 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
4030 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
4032 tcg_temp_free_i32(tmp);
4034 /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
4035 stride = (insn & (1 << 5)) ? 2 : 1;
4036 for (reg = 0; reg < nregs; reg++) {
4037 tmp = gen_load_and_replicate(s, addr, size);
4038 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4039 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4040 tcg_temp_free_i32(tmp);
4041 tcg_gen_addi_i32(addr, addr, 1 << size);
4045 tcg_temp_free_i32(addr);
4046 stride = (1 << size) * nregs;
4048 /* Single element. */
4049 int idx = (insn >> 4) & 0xf;
4050 pass = (insn >> 7) & 1;
4053 shift = ((insn >> 5) & 3) * 8;
4057 shift = ((insn >> 6) & 1) * 16;
4058 stride = (insn & (1 << 5)) ? 2 : 1;
4062 stride = (insn & (1 << 6)) ? 2 : 1;
4067 nregs = ((insn >> 8) & 3) + 1;
4068 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
4071 if (((idx & (1 << size)) != 0) ||
4072 (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
4077 if ((idx & 1) != 0) {
4082 if (size == 2 && (idx & 2) != 0) {
4087 if ((size == 2) && ((idx & 3) == 3)) {
4094 if ((rd + stride * (nregs - 1)) > 31) {
4095 /* Attempts to write off the end of the register file
4096 * are UNPREDICTABLE; we choose to UNDEF because otherwise
4097 * the neon_load_reg() would write off the end of the array.
4101 addr = tcg_temp_new_i32();
4102 load_reg_var(s, addr, rn);
4103 for (reg = 0; reg < nregs; reg++) {
4107 tmp = gen_ld8u(addr, IS_USER(s));
4110 tmp = gen_ld16u(addr, IS_USER(s));
4113 tmp = gen_ld32(addr, IS_USER(s));
4115 default: /* Avoid compiler warnings. */
4119 tmp2 = neon_load_reg(rd, pass);
4120 gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
4121 tcg_temp_free_i32(tmp2);
4123 neon_store_reg(rd, pass, tmp);
4124 } else { /* Store */
4125 tmp = neon_load_reg(rd, pass);
4127 tcg_gen_shri_i32(tmp, tmp, shift);
4130 gen_st8(tmp, addr, IS_USER(s));
4133 gen_st16(tmp, addr, IS_USER(s));
4136 gen_st32(tmp, addr, IS_USER(s));
4141 tcg_gen_addi_i32(addr, addr, 1 << size);
4143 tcg_temp_free_i32(addr);
4144 stride = nregs * (1 << size);
4150 base = load_reg(s, rn);
4152 tcg_gen_addi_i32(base, base, stride);
4155 index = load_reg(s, rm);
4156 tcg_gen_add_i32(base, base, index);
4157 tcg_temp_free_i32(index);
4159 store_reg(s, rn, base);
4164 /* Bitwise select. dest = c ? t : f. Clobbers T and F. */
4165 static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
4167 tcg_gen_and_i32(t, t, c);
4168 tcg_gen_andc_i32(f, f, c);
4169 tcg_gen_or_i32(dest, t, f);
4172 static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src)
4175 case 0: gen_helper_neon_narrow_u8(dest, src); break;
4176 case 1: gen_helper_neon_narrow_u16(dest, src); break;
4177 case 2: tcg_gen_trunc_i64_i32(dest, src); break;
4182 static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
4185 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
4186 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
4187 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
4192 static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
4195 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
4196 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
4197 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
4202 static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src)
4205 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
4206 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
4207 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
4212 static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
4218 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
4219 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
4224 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
4225 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
4232 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
4233 case 2: gen_helper_neon_shl_u32(var, var, shift); break;
4238 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
4239 case 2: gen_helper_neon_shl_s32(var, var, shift); break;
4246 static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
4250 case 0: gen_helper_neon_widen_u8(dest, src); break;
4251 case 1: gen_helper_neon_widen_u16(dest, src); break;
4252 case 2: tcg_gen_extu_i32_i64(dest, src); break;
4257 case 0: gen_helper_neon_widen_s8(dest, src); break;
4258 case 1: gen_helper_neon_widen_s16(dest, src); break;
4259 case 2: tcg_gen_ext_i32_i64(dest, src); break;
4263 tcg_temp_free_i32(src);
4266 static inline void gen_neon_addl(int size)
4269 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4270 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4271 case 2: tcg_gen_add_i64(CPU_V001); break;
4276 static inline void gen_neon_subl(int size)
4279 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4280 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4281 case 2: tcg_gen_sub_i64(CPU_V001); break;
4286 static inline void gen_neon_negl(TCGv_i64 var, int size)
4289 case 0: gen_helper_neon_negl_u16(var, var); break;
4290 case 1: gen_helper_neon_negl_u32(var, var); break;
4291 case 2: gen_helper_neon_negl_u64(var, var); break;
4296 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4299 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4300 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4305 static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
4309 switch ((size << 1) | u) {
4310 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4311 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4312 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4313 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4315 tmp = gen_muls_i64_i32(a, b);
4316 tcg_gen_mov_i64(dest, tmp);
4317 tcg_temp_free_i64(tmp);
4320 tmp = gen_mulu_i64_i32(a, b);
4321 tcg_gen_mov_i64(dest, tmp);
4322 tcg_temp_free_i64(tmp);
4327 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
4328 Don't forget to clean them now. */
4330 tcg_temp_free_i32(a);
4331 tcg_temp_free_i32(b);
4335 static void gen_neon_narrow_op(int op, int u, int size, TCGv dest, TCGv_i64 src)
4339 gen_neon_unarrow_sats(size, dest, src);
4341 gen_neon_narrow(size, dest, src);
4345 gen_neon_narrow_satu(size, dest, src);
4347 gen_neon_narrow_sats(size, dest, src);
4352 /* Symbolic constants for op fields for Neon 3-register same-length.
4353 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
4356 #define NEON_3R_VHADD 0
4357 #define NEON_3R_VQADD 1
4358 #define NEON_3R_VRHADD 2
4359 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
4360 #define NEON_3R_VHSUB 4
4361 #define NEON_3R_VQSUB 5
4362 #define NEON_3R_VCGT 6
4363 #define NEON_3R_VCGE 7
4364 #define NEON_3R_VSHL 8
4365 #define NEON_3R_VQSHL 9
4366 #define NEON_3R_VRSHL 10
4367 #define NEON_3R_VQRSHL 11
4368 #define NEON_3R_VMAX 12
4369 #define NEON_3R_VMIN 13
4370 #define NEON_3R_VABD 14
4371 #define NEON_3R_VABA 15
4372 #define NEON_3R_VADD_VSUB 16
4373 #define NEON_3R_VTST_VCEQ 17
4374 #define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
4375 #define NEON_3R_VMUL 19
4376 #define NEON_3R_VPMAX 20
4377 #define NEON_3R_VPMIN 21
4378 #define NEON_3R_VQDMULH_VQRDMULH 22
4379 #define NEON_3R_VPADD 23
4380 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
4381 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
4382 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
4383 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
4384 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
4385 #define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */
4387 static const uint8_t neon_3r_sizes[] = {
4388 [NEON_3R_VHADD] = 0x7,
4389 [NEON_3R_VQADD] = 0xf,
4390 [NEON_3R_VRHADD] = 0x7,
4391 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
4392 [NEON_3R_VHSUB] = 0x7,
4393 [NEON_3R_VQSUB] = 0xf,
4394 [NEON_3R_VCGT] = 0x7,
4395 [NEON_3R_VCGE] = 0x7,
4396 [NEON_3R_VSHL] = 0xf,
4397 [NEON_3R_VQSHL] = 0xf,
4398 [NEON_3R_VRSHL] = 0xf,
4399 [NEON_3R_VQRSHL] = 0xf,
4400 [NEON_3R_VMAX] = 0x7,
4401 [NEON_3R_VMIN] = 0x7,
4402 [NEON_3R_VABD] = 0x7,
4403 [NEON_3R_VABA] = 0x7,
4404 [NEON_3R_VADD_VSUB] = 0xf,
4405 [NEON_3R_VTST_VCEQ] = 0x7,
4406 [NEON_3R_VML] = 0x7,
4407 [NEON_3R_VMUL] = 0x7,
4408 [NEON_3R_VPMAX] = 0x7,
4409 [NEON_3R_VPMIN] = 0x7,
4410 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
4411 [NEON_3R_VPADD] = 0x7,
4412 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
4413 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
4414 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
4415 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
4416 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
4417 [NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */
4420 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
4421 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
4424 #define NEON_2RM_VREV64 0
4425 #define NEON_2RM_VREV32 1
4426 #define NEON_2RM_VREV16 2
4427 #define NEON_2RM_VPADDL 4
4428 #define NEON_2RM_VPADDL_U 5
4429 #define NEON_2RM_VCLS 8
4430 #define NEON_2RM_VCLZ 9
4431 #define NEON_2RM_VCNT 10
4432 #define NEON_2RM_VMVN 11
4433 #define NEON_2RM_VPADAL 12
4434 #define NEON_2RM_VPADAL_U 13
4435 #define NEON_2RM_VQABS 14
4436 #define NEON_2RM_VQNEG 15
4437 #define NEON_2RM_VCGT0 16
4438 #define NEON_2RM_VCGE0 17
4439 #define NEON_2RM_VCEQ0 18
4440 #define NEON_2RM_VCLE0 19
4441 #define NEON_2RM_VCLT0 20
4442 #define NEON_2RM_VABS 22
4443 #define NEON_2RM_VNEG 23
4444 #define NEON_2RM_VCGT0_F 24
4445 #define NEON_2RM_VCGE0_F 25
4446 #define NEON_2RM_VCEQ0_F 26
4447 #define NEON_2RM_VCLE0_F 27
4448 #define NEON_2RM_VCLT0_F 28
4449 #define NEON_2RM_VABS_F 30
4450 #define NEON_2RM_VNEG_F 31
4451 #define NEON_2RM_VSWP 32
4452 #define NEON_2RM_VTRN 33
4453 #define NEON_2RM_VUZP 34
4454 #define NEON_2RM_VZIP 35
4455 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
4456 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
4457 #define NEON_2RM_VSHLL 38
4458 #define NEON_2RM_VCVT_F16_F32 44
4459 #define NEON_2RM_VCVT_F32_F16 46
4460 #define NEON_2RM_VRECPE 56
4461 #define NEON_2RM_VRSQRTE 57
4462 #define NEON_2RM_VRECPE_F 58
4463 #define NEON_2RM_VRSQRTE_F 59
4464 #define NEON_2RM_VCVT_FS 60
4465 #define NEON_2RM_VCVT_FU 61
4466 #define NEON_2RM_VCVT_SF 62
4467 #define NEON_2RM_VCVT_UF 63
4469 static int neon_2rm_is_float_op(int op)
4471 /* Return true if this neon 2reg-misc op is float-to-float */
4472 return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
4473 op >= NEON_2RM_VRECPE_F);
4476 /* Each entry in this array has bit n set if the insn allows
4477 * size value n (otherwise it will UNDEF). Since unallocated
4478 * op values will have no bits set they always UNDEF.
4480 static const uint8_t neon_2rm_sizes[] = {
4481 [NEON_2RM_VREV64] = 0x7,
4482 [NEON_2RM_VREV32] = 0x3,
4483 [NEON_2RM_VREV16] = 0x1,
4484 [NEON_2RM_VPADDL] = 0x7,
4485 [NEON_2RM_VPADDL_U] = 0x7,
4486 [NEON_2RM_VCLS] = 0x7,
4487 [NEON_2RM_VCLZ] = 0x7,
4488 [NEON_2RM_VCNT] = 0x1,
4489 [NEON_2RM_VMVN] = 0x1,
4490 [NEON_2RM_VPADAL] = 0x7,
4491 [NEON_2RM_VPADAL_U] = 0x7,
4492 [NEON_2RM_VQABS] = 0x7,
4493 [NEON_2RM_VQNEG] = 0x7,
4494 [NEON_2RM_VCGT0] = 0x7,
4495 [NEON_2RM_VCGE0] = 0x7,
4496 [NEON_2RM_VCEQ0] = 0x7,
4497 [NEON_2RM_VCLE0] = 0x7,
4498 [NEON_2RM_VCLT0] = 0x7,
4499 [NEON_2RM_VABS] = 0x7,
4500 [NEON_2RM_VNEG] = 0x7,
4501 [NEON_2RM_VCGT0_F] = 0x4,
4502 [NEON_2RM_VCGE0_F] = 0x4,
4503 [NEON_2RM_VCEQ0_F] = 0x4,
4504 [NEON_2RM_VCLE0_F] = 0x4,
4505 [NEON_2RM_VCLT0_F] = 0x4,
4506 [NEON_2RM_VABS_F] = 0x4,
4507 [NEON_2RM_VNEG_F] = 0x4,
4508 [NEON_2RM_VSWP] = 0x1,
4509 [NEON_2RM_VTRN] = 0x7,
4510 [NEON_2RM_VUZP] = 0x7,
4511 [NEON_2RM_VZIP] = 0x7,
4512 [NEON_2RM_VMOVN] = 0x7,
4513 [NEON_2RM_VQMOVN] = 0x7,
4514 [NEON_2RM_VSHLL] = 0x7,
4515 [NEON_2RM_VCVT_F16_F32] = 0x2,
4516 [NEON_2RM_VCVT_F32_F16] = 0x2,
4517 [NEON_2RM_VRECPE] = 0x4,
4518 [NEON_2RM_VRSQRTE] = 0x4,
4519 [NEON_2RM_VRECPE_F] = 0x4,
4520 [NEON_2RM_VRSQRTE_F] = 0x4,
4521 [NEON_2RM_VCVT_FS] = 0x4,
4522 [NEON_2RM_VCVT_FU] = 0x4,
4523 [NEON_2RM_VCVT_SF] = 0x4,
4524 [NEON_2RM_VCVT_UF] = 0x4,
4527 /* Translate a NEON data processing instruction. Return nonzero if the
4528 instruction is invalid.
4529 We process data in a mixture of 32-bit and 64-bit chunks.
4530 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
4532 static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4544 TCGv tmp, tmp2, tmp3, tmp4, tmp5;
4547 if (!s->vfp_enabled)
4549 q = (insn & (1 << 6)) != 0;
4550 u = (insn >> 24) & 1;
4551 VFP_DREG_D(rd, insn);
4552 VFP_DREG_N(rn, insn);
4553 VFP_DREG_M(rm, insn);
4554 size = (insn >> 20) & 3;
4555 if ((insn & (1 << 23)) == 0) {
4556 /* Three register same length. */
4557 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4558 /* Catch invalid op and bad size combinations: UNDEF */
4559 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
4562 /* All insns of this form UNDEF for either this condition or the
4563 * superset of cases "Q==1"; we catch the latter later.
4565 if (q && ((rd | rn | rm) & 1)) {
4568 if (size == 3 && op != NEON_3R_LOGIC) {
4569 /* 64-bit element instructions. */
4570 for (pass = 0; pass < (q ? 2 : 1); pass++) {
4571 neon_load_reg64(cpu_V0, rn + pass);
4572 neon_load_reg64(cpu_V1, rm + pass);
4576 gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
4579 gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
4585 gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
4588 gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
4594 gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4596 gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4601 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4604 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
4610 gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4612 gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4615 case NEON_3R_VQRSHL:
4617 gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4620 gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4624 case NEON_3R_VADD_VSUB:
4626 tcg_gen_sub_i64(CPU_V001);
4628 tcg_gen_add_i64(CPU_V001);
4634 neon_store_reg64(cpu_V0, rd + pass);
4643 case NEON_3R_VQRSHL:
4646 /* Shift instruction operands are reversed. */
4661 case NEON_3R_FLOAT_ARITH:
4662 pairwise = (u && size < 2); /* if VPADD (float) */
4664 case NEON_3R_FLOAT_MINMAX:
4665 pairwise = u; /* if VPMIN/VPMAX (float) */
4667 case NEON_3R_FLOAT_CMP:
4669 /* no encoding for U=0 C=1x */
4673 case NEON_3R_FLOAT_ACMP:
4678 case NEON_3R_VRECPS_VRSQRTS:
4684 if (u && (size != 0)) {
4685 /* UNDEF on invalid size for polynomial subcase */
4693 if (pairwise && q) {
4694 /* All the pairwise insns UNDEF if Q is set */
4698 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4703 tmp = neon_load_reg(rn, 0);
4704 tmp2 = neon_load_reg(rn, 1);
4706 tmp = neon_load_reg(rm, 0);
4707 tmp2 = neon_load_reg(rm, 1);
4711 tmp = neon_load_reg(rn, pass);
4712 tmp2 = neon_load_reg(rm, pass);
4716 GEN_NEON_INTEGER_OP(hadd);
4719 GEN_NEON_INTEGER_OP_ENV(qadd);
4721 case NEON_3R_VRHADD:
4722 GEN_NEON_INTEGER_OP(rhadd);
4724 case NEON_3R_LOGIC: /* Logic ops. */
4725 switch ((u << 2) | size) {
4727 tcg_gen_and_i32(tmp, tmp, tmp2);
4730 tcg_gen_andc_i32(tmp, tmp, tmp2);
4733 tcg_gen_or_i32(tmp, tmp, tmp2);
4736 tcg_gen_orc_i32(tmp, tmp, tmp2);
4739 tcg_gen_xor_i32(tmp, tmp, tmp2);
4742 tmp3 = neon_load_reg(rd, pass);
4743 gen_neon_bsl(tmp, tmp, tmp2, tmp3);
4744 tcg_temp_free_i32(tmp3);
4747 tmp3 = neon_load_reg(rd, pass);
4748 gen_neon_bsl(tmp, tmp, tmp3, tmp2);
4749 tcg_temp_free_i32(tmp3);
4752 tmp3 = neon_load_reg(rd, pass);
4753 gen_neon_bsl(tmp, tmp3, tmp, tmp2);
4754 tcg_temp_free_i32(tmp3);
4759 GEN_NEON_INTEGER_OP(hsub);
4762 GEN_NEON_INTEGER_OP_ENV(qsub);
4765 GEN_NEON_INTEGER_OP(cgt);
4768 GEN_NEON_INTEGER_OP(cge);
4771 GEN_NEON_INTEGER_OP(shl);
4774 GEN_NEON_INTEGER_OP_ENV(qshl);
4777 GEN_NEON_INTEGER_OP(rshl);
4779 case NEON_3R_VQRSHL:
4780 GEN_NEON_INTEGER_OP_ENV(qrshl);
4783 GEN_NEON_INTEGER_OP(max);
4786 GEN_NEON_INTEGER_OP(min);
4789 GEN_NEON_INTEGER_OP(abd);
4792 GEN_NEON_INTEGER_OP(abd);
4793 tcg_temp_free_i32(tmp2);
4794 tmp2 = neon_load_reg(rd, pass);
4795 gen_neon_add(size, tmp, tmp2);
4797 case NEON_3R_VADD_VSUB:
4798 if (!u) { /* VADD */
4799 gen_neon_add(size, tmp, tmp2);
4802 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
4803 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
4804 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
4809 case NEON_3R_VTST_VCEQ:
4810 if (!u) { /* VTST */
4812 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
4813 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
4814 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
4819 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
4820 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
4821 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
4826 case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
4828 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
4829 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
4830 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4833 tcg_temp_free_i32(tmp2);
4834 tmp2 = neon_load_reg(rd, pass);
4836 gen_neon_rsb(size, tmp, tmp2);
4838 gen_neon_add(size, tmp, tmp2);
4842 if (u) { /* polynomial */
4843 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4844 } else { /* Integer */
4846 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
4847 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
4848 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4854 GEN_NEON_INTEGER_OP(pmax);
4857 GEN_NEON_INTEGER_OP(pmin);
4859 case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
4860 if (!u) { /* VQDMULH */
4863 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
4866 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
4870 } else { /* VQRDMULH */
4873 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
4876 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
4884 case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
4885 case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
4886 case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
4890 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
4892 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4893 switch ((u << 2) | size) {
4896 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4899 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
4902 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
4907 tcg_temp_free_ptr(fpstatus);
4910 case NEON_3R_FLOAT_MULTIPLY:
4912 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4913 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
4915 tcg_temp_free_i32(tmp2);
4916 tmp2 = neon_load_reg(rd, pass);
4918 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4920 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
4923 tcg_temp_free_ptr(fpstatus);
4926 case NEON_3R_FLOAT_CMP:
4928 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4930 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
4933 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
4935 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
4938 tcg_temp_free_ptr(fpstatus);
4941 case NEON_3R_FLOAT_ACMP:
4943 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4945 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
4947 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
4949 tcg_temp_free_ptr(fpstatus);
4952 case NEON_3R_FLOAT_MINMAX:
4954 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4956 gen_helper_neon_max_f32(tmp, tmp, tmp2, fpstatus);
4958 gen_helper_neon_min_f32(tmp, tmp, tmp2, fpstatus);
4960 tcg_temp_free_ptr(fpstatus);
4963 case NEON_3R_VRECPS_VRSQRTS:
4965 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
4967 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
4972 tcg_temp_free_i32(tmp2);
4974 /* Save the result. For elementwise operations we can put it
4975 straight into the destination register. For pairwise operations
4976 we have to be careful to avoid clobbering the source operands. */
4977 if (pairwise && rd == rm) {
4978 neon_store_scratch(pass, tmp);
4980 neon_store_reg(rd, pass, tmp);
4984 if (pairwise && rd == rm) {
4985 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4986 tmp = neon_load_scratch(pass);
4987 neon_store_reg(rd, pass, tmp);
4990 /* End of 3 register same size operations. */
4991 } else if (insn & (1 << 4)) {
4992 if ((insn & 0x00380080) != 0) {
4993 /* Two registers and shift. */
4994 op = (insn >> 8) & 0xf;
4995 if (insn & (1 << 7)) {
5003 while ((insn & (1 << (size + 19))) == 0)
5006 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5007 /* To avoid excessive dumplication of ops we implement shift
5008 by immediate using the variable shift operations. */
5010 /* Shift by immediate:
5011 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
5012 if (q && ((rd | rm) & 1)) {
5015 if (!u && (op == 4 || op == 6)) {
5018 /* Right shifts are encoded as N - shift, where N is the
5019 element size in bits. */
5021 shift = shift - (1 << (size + 3));
5029 imm = (uint8_t) shift;
5034 imm = (uint16_t) shift;
5045 for (pass = 0; pass < count; pass++) {
5047 neon_load_reg64(cpu_V0, rm + pass);
5048 tcg_gen_movi_i64(cpu_V1, imm);
5053 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5055 gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
5060 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5062 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5065 case 5: /* VSHL, VSLI */
5066 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5068 case 6: /* VQSHLU */
5069 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5074 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5077 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5082 if (op == 1 || op == 3) {
5084 neon_load_reg64(cpu_V1, rd + pass);
5085 tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5086 } else if (op == 4 || (op == 5 && u)) {
5088 neon_load_reg64(cpu_V1, rd + pass);
5090 if (shift < -63 || shift > 63) {
5094 mask = 0xffffffffffffffffull >> -shift;
5096 mask = 0xffffffffffffffffull << shift;
5099 tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
5100 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5102 neon_store_reg64(cpu_V0, rd + pass);
5103 } else { /* size < 3 */
5104 /* Operands in T0 and T1. */
5105 tmp = neon_load_reg(rm, pass);
5106 tmp2 = tcg_temp_new_i32();
5107 tcg_gen_movi_i32(tmp2, imm);
5111 GEN_NEON_INTEGER_OP(shl);
5115 GEN_NEON_INTEGER_OP(rshl);
5118 case 5: /* VSHL, VSLI */
5120 case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
5121 case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
5122 case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
5126 case 6: /* VQSHLU */
5129 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5133 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5137 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5145 GEN_NEON_INTEGER_OP_ENV(qshl);
5148 tcg_temp_free_i32(tmp2);
5150 if (op == 1 || op == 3) {
5152 tmp2 = neon_load_reg(rd, pass);
5153 gen_neon_add(size, tmp, tmp2);
5154 tcg_temp_free_i32(tmp2);
5155 } else if (op == 4 || (op == 5 && u)) {
5160 mask = 0xff >> -shift;
5162 mask = (uint8_t)(0xff << shift);
5168 mask = 0xffff >> -shift;
5170 mask = (uint16_t)(0xffff << shift);
5174 if (shift < -31 || shift > 31) {
5178 mask = 0xffffffffu >> -shift;
5180 mask = 0xffffffffu << shift;
5186 tmp2 = neon_load_reg(rd, pass);
5187 tcg_gen_andi_i32(tmp, tmp, mask);
5188 tcg_gen_andi_i32(tmp2, tmp2, ~mask);
5189 tcg_gen_or_i32(tmp, tmp, tmp2);
5190 tcg_temp_free_i32(tmp2);
5192 neon_store_reg(rd, pass, tmp);
5195 } else if (op < 10) {
5196 /* Shift by immediate and narrow:
5197 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5198 int input_unsigned = (op == 8) ? !u : u;
5202 shift = shift - (1 << (size + 3));
5205 tmp64 = tcg_const_i64(shift);
5206 neon_load_reg64(cpu_V0, rm);
5207 neon_load_reg64(cpu_V1, rm + 1);
5208 for (pass = 0; pass < 2; pass++) {
5216 if (input_unsigned) {
5217 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5219 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5222 if (input_unsigned) {
5223 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5225 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5228 tmp = tcg_temp_new_i32();
5229 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5230 neon_store_reg(rd, pass, tmp);
5232 tcg_temp_free_i64(tmp64);
5235 imm = (uint16_t)shift;
5239 imm = (uint32_t)shift;
5241 tmp2 = tcg_const_i32(imm);
5242 tmp4 = neon_load_reg(rm + 1, 0);
5243 tmp5 = neon_load_reg(rm + 1, 1);
5244 for (pass = 0; pass < 2; pass++) {
5246 tmp = neon_load_reg(rm, 0);
5250 gen_neon_shift_narrow(size, tmp, tmp2, q,
5253 tmp3 = neon_load_reg(rm, 1);
5257 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5259 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5260 tcg_temp_free_i32(tmp);
5261 tcg_temp_free_i32(tmp3);
5262 tmp = tcg_temp_new_i32();
5263 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5264 neon_store_reg(rd, pass, tmp);
5266 tcg_temp_free_i32(tmp2);
5268 } else if (op == 10) {
5270 if (q || (rd & 1)) {
5273 tmp = neon_load_reg(rm, 0);
5274 tmp2 = neon_load_reg(rm, 1);
5275 for (pass = 0; pass < 2; pass++) {
5279 gen_neon_widen(cpu_V0, tmp, size, u);
5282 /* The shift is less than the width of the source
5283 type, so we can just shift the whole register. */
5284 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5285 /* Widen the result of shift: we need to clear
5286 * the potential overflow bits resulting from
5287 * left bits of the narrow input appearing as
5288 * right bits of left the neighbour narrow
5290 if (size < 2 || !u) {
5293 imm = (0xffu >> (8 - shift));
5295 } else if (size == 1) {
5296 imm = 0xffff >> (16 - shift);
5299 imm = 0xffffffff >> (32 - shift);
5302 imm64 = imm | (((uint64_t)imm) << 32);
5306 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5309 neon_store_reg64(cpu_V0, rd + pass);
5311 } else if (op >= 14) {
5312 /* VCVT fixed-point. */
5313 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5316 /* We have already masked out the must-be-1 top bit of imm6,
5317 * hence this 32-shift where the ARM ARM has 64-imm6.
5320 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5321 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
5324 gen_vfp_ulto(0, shift, 1);
5326 gen_vfp_slto(0, shift, 1);
5329 gen_vfp_toul(0, shift, 1);
5331 gen_vfp_tosl(0, shift, 1);
5333 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
5338 } else { /* (insn & 0x00380080) == 0 */
5340 if (q && (rd & 1)) {
5344 op = (insn >> 8) & 0xf;
5345 /* One register and immediate. */
5346 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5347 invert = (insn & (1 << 5)) != 0;
5348 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5349 * We choose to not special-case this and will behave as if a
5350 * valid constant encoding of 0 had been given.
5369 imm = (imm << 8) | (imm << 24);
5372 imm = (imm << 8) | 0xff;
5375 imm = (imm << 16) | 0xffff;
5378 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5386 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5387 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5393 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5394 if (op & 1 && op < 12) {
5395 tmp = neon_load_reg(rd, pass);
5397 /* The immediate value has already been inverted, so
5399 tcg_gen_andi_i32(tmp, tmp, imm);
5401 tcg_gen_ori_i32(tmp, tmp, imm);
5405 tmp = tcg_temp_new_i32();
5406 if (op == 14 && invert) {
5410 for (n = 0; n < 4; n++) {
5411 if (imm & (1 << (n + (pass & 1) * 4)))
5412 val |= 0xff << (n * 8);
5414 tcg_gen_movi_i32(tmp, val);
5416 tcg_gen_movi_i32(tmp, imm);
5419 neon_store_reg(rd, pass, tmp);
5422 } else { /* (insn & 0x00800010 == 0x00800000) */
5424 op = (insn >> 8) & 0xf;
5425 if ((insn & (1 << 6)) == 0) {
5426 /* Three registers of different lengths. */
5430 /* undefreq: bit 0 : UNDEF if size != 0
5431 * bit 1 : UNDEF if size == 0
5432 * bit 2 : UNDEF if U == 1
5433 * Note that [1:0] set implies 'always UNDEF'
5436 /* prewiden, src1_wide, src2_wide, undefreq */
5437 static const int neon_3reg_wide[16][4] = {
5438 {1, 0, 0, 0}, /* VADDL */
5439 {1, 1, 0, 0}, /* VADDW */
5440 {1, 0, 0, 0}, /* VSUBL */
5441 {1, 1, 0, 0}, /* VSUBW */
5442 {0, 1, 1, 0}, /* VADDHN */
5443 {0, 0, 0, 0}, /* VABAL */
5444 {0, 1, 1, 0}, /* VSUBHN */
5445 {0, 0, 0, 0}, /* VABDL */
5446 {0, 0, 0, 0}, /* VMLAL */
5447 {0, 0, 0, 6}, /* VQDMLAL */
5448 {0, 0, 0, 0}, /* VMLSL */
5449 {0, 0, 0, 6}, /* VQDMLSL */
5450 {0, 0, 0, 0}, /* Integer VMULL */
5451 {0, 0, 0, 2}, /* VQDMULL */
5452 {0, 0, 0, 5}, /* Polynomial VMULL */
5453 {0, 0, 0, 3}, /* Reserved: always UNDEF */
5456 prewiden = neon_3reg_wide[op][0];
5457 src1_wide = neon_3reg_wide[op][1];
5458 src2_wide = neon_3reg_wide[op][2];
5459 undefreq = neon_3reg_wide[op][3];
5461 if (((undefreq & 1) && (size != 0)) ||
5462 ((undefreq & 2) && (size == 0)) ||
5463 ((undefreq & 4) && u)) {
5466 if ((src1_wide && (rn & 1)) ||
5467 (src2_wide && (rm & 1)) ||
5468 (!src2_wide && (rd & 1))) {
5472 /* Avoid overlapping operands. Wide source operands are
5473 always aligned so will never overlap with wide
5474 destinations in problematic ways. */
5475 if (rd == rm && !src2_wide) {
5476 tmp = neon_load_reg(rm, 1);
5477 neon_store_scratch(2, tmp);
5478 } else if (rd == rn && !src1_wide) {
5479 tmp = neon_load_reg(rn, 1);
5480 neon_store_scratch(2, tmp);
5483 for (pass = 0; pass < 2; pass++) {
5485 neon_load_reg64(cpu_V0, rn + pass);
5488 if (pass == 1 && rd == rn) {
5489 tmp = neon_load_scratch(2);
5491 tmp = neon_load_reg(rn, pass);
5494 gen_neon_widen(cpu_V0, tmp, size, u);
5498 neon_load_reg64(cpu_V1, rm + pass);
5501 if (pass == 1 && rd == rm) {
5502 tmp2 = neon_load_scratch(2);
5504 tmp2 = neon_load_reg(rm, pass);
5507 gen_neon_widen(cpu_V1, tmp2, size, u);
5511 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5512 gen_neon_addl(size);
5514 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5515 gen_neon_subl(size);
5517 case 5: case 7: /* VABAL, VABDL */
5518 switch ((size << 1) | u) {
5520 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5523 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5526 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5529 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5532 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5535 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5539 tcg_temp_free_i32(tmp2);
5540 tcg_temp_free_i32(tmp);
5542 case 8: case 9: case 10: case 11: case 12: case 13:
5543 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5544 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5546 case 14: /* Polynomial VMULL */
5547 gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
5548 tcg_temp_free_i32(tmp2);
5549 tcg_temp_free_i32(tmp);
5551 default: /* 15 is RESERVED: caught earlier */
5556 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5557 neon_store_reg64(cpu_V0, rd + pass);
5558 } else if (op == 5 || (op >= 8 && op <= 11)) {
5560 neon_load_reg64(cpu_V1, rd + pass);
5562 case 10: /* VMLSL */
5563 gen_neon_negl(cpu_V0, size);
5565 case 5: case 8: /* VABAL, VMLAL */
5566 gen_neon_addl(size);
5568 case 9: case 11: /* VQDMLAL, VQDMLSL */
5569 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5571 gen_neon_negl(cpu_V0, size);
5573 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5578 neon_store_reg64(cpu_V0, rd + pass);
5579 } else if (op == 4 || op == 6) {
5580 /* Narrowing operation. */
5581 tmp = tcg_temp_new_i32();
5585 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5588 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5591 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5592 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5599 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5602 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5605 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5606 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5607 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5615 neon_store_reg(rd, 0, tmp3);
5616 neon_store_reg(rd, 1, tmp);
5619 /* Write back the result. */
5620 neon_store_reg64(cpu_V0, rd + pass);
5624 /* Two registers and a scalar. NB that for ops of this form
5625 * the ARM ARM labels bit 24 as Q, but it is in our variable
5632 case 1: /* Float VMLA scalar */
5633 case 5: /* Floating point VMLS scalar */
5634 case 9: /* Floating point VMUL scalar */
5639 case 0: /* Integer VMLA scalar */
5640 case 4: /* Integer VMLS scalar */
5641 case 8: /* Integer VMUL scalar */
5642 case 12: /* VQDMULH scalar */
5643 case 13: /* VQRDMULH scalar */
5644 if (u && ((rd | rn) & 1)) {
5647 tmp = neon_get_scalar(size, rm);
5648 neon_store_scratch(0, tmp);
5649 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5650 tmp = neon_load_scratch(0);
5651 tmp2 = neon_load_reg(rn, pass);
5654 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5656 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5658 } else if (op == 13) {
5660 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5662 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5664 } else if (op & 1) {
5665 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5666 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5667 tcg_temp_free_ptr(fpstatus);
5670 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5671 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5672 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5676 tcg_temp_free_i32(tmp2);
5679 tmp2 = neon_load_reg(rd, pass);
5682 gen_neon_add(size, tmp, tmp2);
5686 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5687 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5688 tcg_temp_free_ptr(fpstatus);
5692 gen_neon_rsb(size, tmp, tmp2);
5696 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5697 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5698 tcg_temp_free_ptr(fpstatus);
5704 tcg_temp_free_i32(tmp2);
5706 neon_store_reg(rd, pass, tmp);
5709 case 3: /* VQDMLAL scalar */
5710 case 7: /* VQDMLSL scalar */
5711 case 11: /* VQDMULL scalar */
5716 case 2: /* VMLAL sclar */
5717 case 6: /* VMLSL scalar */
5718 case 10: /* VMULL scalar */
5722 tmp2 = neon_get_scalar(size, rm);
5723 /* We need a copy of tmp2 because gen_neon_mull
5724 * deletes it during pass 0. */
5725 tmp4 = tcg_temp_new_i32();
5726 tcg_gen_mov_i32(tmp4, tmp2);
5727 tmp3 = neon_load_reg(rn, 1);
5729 for (pass = 0; pass < 2; pass++) {
5731 tmp = neon_load_reg(rn, 0);
5736 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5738 neon_load_reg64(cpu_V1, rd + pass);
5742 gen_neon_negl(cpu_V0, size);
5745 gen_neon_addl(size);
5748 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5750 gen_neon_negl(cpu_V0, size);
5752 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5758 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5763 neon_store_reg64(cpu_V0, rd + pass);
5768 default: /* 14 and 15 are RESERVED */
5772 } else { /* size == 3 */
5775 imm = (insn >> 8) & 0xf;
5780 if (q && ((rd | rn | rm) & 1)) {
5785 neon_load_reg64(cpu_V0, rn);
5787 neon_load_reg64(cpu_V1, rn + 1);
5789 } else if (imm == 8) {
5790 neon_load_reg64(cpu_V0, rn + 1);
5792 neon_load_reg64(cpu_V1, rm);
5795 tmp64 = tcg_temp_new_i64();
5797 neon_load_reg64(cpu_V0, rn);
5798 neon_load_reg64(tmp64, rn + 1);
5800 neon_load_reg64(cpu_V0, rn + 1);
5801 neon_load_reg64(tmp64, rm);
5803 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5804 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5805 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5807 neon_load_reg64(cpu_V1, rm);
5809 neon_load_reg64(cpu_V1, rm + 1);
5812 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5813 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
5814 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
5815 tcg_temp_free_i64(tmp64);
5818 neon_load_reg64(cpu_V0, rn);
5819 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
5820 neon_load_reg64(cpu_V1, rm);
5821 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5822 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5824 neon_store_reg64(cpu_V0, rd);
5826 neon_store_reg64(cpu_V1, rd + 1);
5828 } else if ((insn & (1 << 11)) == 0) {
5829 /* Two register misc. */
5830 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5831 size = (insn >> 18) & 3;
5832 /* UNDEF for unknown op values and bad op-size combinations */
5833 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
5836 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
5837 q && ((rm | rd) & 1)) {
5841 case NEON_2RM_VREV64:
5842 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5843 tmp = neon_load_reg(rm, pass * 2);
5844 tmp2 = neon_load_reg(rm, pass * 2 + 1);
5846 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5847 case 1: gen_swap_half(tmp); break;
5848 case 2: /* no-op */ break;
5851 neon_store_reg(rd, pass * 2 + 1, tmp);
5853 neon_store_reg(rd, pass * 2, tmp2);
5856 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
5857 case 1: gen_swap_half(tmp2); break;
5860 neon_store_reg(rd, pass * 2, tmp2);
5864 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
5865 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
5866 for (pass = 0; pass < q + 1; pass++) {
5867 tmp = neon_load_reg(rm, pass * 2);
5868 gen_neon_widen(cpu_V0, tmp, size, op & 1);
5869 tmp = neon_load_reg(rm, pass * 2 + 1);
5870 gen_neon_widen(cpu_V1, tmp, size, op & 1);
5872 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5873 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5874 case 2: tcg_gen_add_i64(CPU_V001); break;
5877 if (op >= NEON_2RM_VPADAL) {
5879 neon_load_reg64(cpu_V1, rd + pass);
5880 gen_neon_addl(size);
5882 neon_store_reg64(cpu_V0, rd + pass);
5888 for (n = 0; n < (q ? 4 : 2); n += 2) {
5889 tmp = neon_load_reg(rm, n);
5890 tmp2 = neon_load_reg(rd, n + 1);
5891 neon_store_reg(rm, n, tmp2);
5892 neon_store_reg(rd, n + 1, tmp);
5899 if (gen_neon_unzip(rd, rm, size, q)) {
5904 if (gen_neon_zip(rd, rm, size, q)) {
5908 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
5909 /* also VQMOVUN; op field and mnemonics don't line up */
5914 for (pass = 0; pass < 2; pass++) {
5915 neon_load_reg64(cpu_V0, rm + pass);
5916 tmp = tcg_temp_new_i32();
5917 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
5922 neon_store_reg(rd, 0, tmp2);
5923 neon_store_reg(rd, 1, tmp);
5927 case NEON_2RM_VSHLL:
5928 if (q || (rd & 1)) {
5931 tmp = neon_load_reg(rm, 0);
5932 tmp2 = neon_load_reg(rm, 1);
5933 for (pass = 0; pass < 2; pass++) {
5936 gen_neon_widen(cpu_V0, tmp, size, 1);
5937 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
5938 neon_store_reg64(cpu_V0, rd + pass);
5941 case NEON_2RM_VCVT_F16_F32:
5942 if (!arm_feature(env, ARM_FEATURE_VFP_FP16) ||
5946 tmp = tcg_temp_new_i32();
5947 tmp2 = tcg_temp_new_i32();
5948 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
5949 gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
5950 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
5951 gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
5952 tcg_gen_shli_i32(tmp2, tmp2, 16);
5953 tcg_gen_or_i32(tmp2, tmp2, tmp);
5954 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
5955 gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
5956 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
5957 neon_store_reg(rd, 0, tmp2);
5958 tmp2 = tcg_temp_new_i32();
5959 gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
5960 tcg_gen_shli_i32(tmp2, tmp2, 16);
5961 tcg_gen_or_i32(tmp2, tmp2, tmp);
5962 neon_store_reg(rd, 1, tmp2);
5963 tcg_temp_free_i32(tmp);
5965 case NEON_2RM_VCVT_F32_F16:
5966 if (!arm_feature(env, ARM_FEATURE_VFP_FP16) ||
5970 tmp3 = tcg_temp_new_i32();
5971 tmp = neon_load_reg(rm, 0);
5972 tmp2 = neon_load_reg(rm, 1);
5973 tcg_gen_ext16u_i32(tmp3, tmp);
5974 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5975 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
5976 tcg_gen_shri_i32(tmp3, tmp, 16);
5977 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5978 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
5979 tcg_temp_free_i32(tmp);
5980 tcg_gen_ext16u_i32(tmp3, tmp2);
5981 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5982 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
5983 tcg_gen_shri_i32(tmp3, tmp2, 16);
5984 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5985 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
5986 tcg_temp_free_i32(tmp2);
5987 tcg_temp_free_i32(tmp3);
5991 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5992 if (neon_2rm_is_float_op(op)) {
5993 tcg_gen_ld_f32(cpu_F0s, cpu_env,
5994 neon_reg_offset(rm, pass));
5997 tmp = neon_load_reg(rm, pass);
6000 case NEON_2RM_VREV32:
6002 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6003 case 1: gen_swap_half(tmp); break;
6007 case NEON_2RM_VREV16:
6012 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6013 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6014 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6020 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6021 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6022 case 2: gen_helper_clz(tmp, tmp); break;
6027 gen_helper_neon_cnt_u8(tmp, tmp);
6030 tcg_gen_not_i32(tmp, tmp);
6032 case NEON_2RM_VQABS:
6035 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6038 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6041 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6046 case NEON_2RM_VQNEG:
6049 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6052 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6055 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6060 case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6061 tmp2 = tcg_const_i32(0);
6063 case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6064 case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6065 case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6068 tcg_temp_free(tmp2);
6069 if (op == NEON_2RM_VCLE0) {
6070 tcg_gen_not_i32(tmp, tmp);
6073 case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6074 tmp2 = tcg_const_i32(0);
6076 case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6077 case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6078 case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6081 tcg_temp_free(tmp2);
6082 if (op == NEON_2RM_VCLT0) {
6083 tcg_gen_not_i32(tmp, tmp);
6086 case NEON_2RM_VCEQ0:
6087 tmp2 = tcg_const_i32(0);
6089 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6090 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6091 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6094 tcg_temp_free(tmp2);
6098 case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
6099 case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
6100 case 2: tcg_gen_abs_i32(tmp, tmp); break;
6105 tmp2 = tcg_const_i32(0);
6106 gen_neon_rsb(size, tmp, tmp2);
6107 tcg_temp_free(tmp2);
6109 case NEON_2RM_VCGT0_F:
6111 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6112 tmp2 = tcg_const_i32(0);
6113 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6114 tcg_temp_free(tmp2);
6115 tcg_temp_free_ptr(fpstatus);
6118 case NEON_2RM_VCGE0_F:
6120 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6121 tmp2 = tcg_const_i32(0);
6122 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6123 tcg_temp_free(tmp2);
6124 tcg_temp_free_ptr(fpstatus);
6127 case NEON_2RM_VCEQ0_F:
6129 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6130 tmp2 = tcg_const_i32(0);
6131 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6132 tcg_temp_free(tmp2);
6133 tcg_temp_free_ptr(fpstatus);
6136 case NEON_2RM_VCLE0_F:
6138 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6139 tmp2 = tcg_const_i32(0);
6140 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6141 tcg_temp_free(tmp2);
6142 tcg_temp_free_ptr(fpstatus);
6145 case NEON_2RM_VCLT0_F:
6147 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6148 tmp2 = tcg_const_i32(0);
6149 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6150 tcg_temp_free(tmp2);
6151 tcg_temp_free_ptr(fpstatus);
6154 case NEON_2RM_VABS_F:
6157 case NEON_2RM_VNEG_F:
6161 tmp2 = neon_load_reg(rd, pass);
6162 neon_store_reg(rm, pass, tmp2);
6165 tmp2 = neon_load_reg(rd, pass);
6167 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6168 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6171 neon_store_reg(rm, pass, tmp2);
6173 case NEON_2RM_VRECPE:
6174 gen_helper_recpe_u32(tmp, tmp, cpu_env);
6176 case NEON_2RM_VRSQRTE:
6177 gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
6179 case NEON_2RM_VRECPE_F:
6180 gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
6182 case NEON_2RM_VRSQRTE_F:
6183 gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
6185 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6188 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6191 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6192 gen_vfp_tosiz(0, 1);
6194 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6195 gen_vfp_touiz(0, 1);
6198 /* Reserved op values were caught by the
6199 * neon_2rm_sizes[] check earlier.
6203 if (neon_2rm_is_float_op(op)) {
6204 tcg_gen_st_f32(cpu_F0s, cpu_env,
6205 neon_reg_offset(rd, pass));
6207 neon_store_reg(rd, pass, tmp);
6212 } else if ((insn & (1 << 10)) == 0) {
6214 int n = ((insn >> 8) & 3) + 1;
6215 if ((rn + n) > 32) {
6216 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6217 * helper function running off the end of the register file.
6222 if (insn & (1 << 6)) {
6223 tmp = neon_load_reg(rd, 0);
6225 tmp = tcg_temp_new_i32();
6226 tcg_gen_movi_i32(tmp, 0);
6228 tmp2 = neon_load_reg(rm, 0);
6229 tmp4 = tcg_const_i32(rn);
6230 tmp5 = tcg_const_i32(n);
6231 gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
6232 tcg_temp_free_i32(tmp);
6233 if (insn & (1 << 6)) {
6234 tmp = neon_load_reg(rd, 1);
6236 tmp = tcg_temp_new_i32();
6237 tcg_gen_movi_i32(tmp, 0);
6239 tmp3 = neon_load_reg(rm, 1);
6240 gen_helper_neon_tbl(tmp3, tmp3, tmp, tmp4, tmp5);
6241 tcg_temp_free_i32(tmp5);
6242 tcg_temp_free_i32(tmp4);
6243 neon_store_reg(rd, 0, tmp2);
6244 neon_store_reg(rd, 1, tmp3);
6245 tcg_temp_free_i32(tmp);
6246 } else if ((insn & 0x380) == 0) {
6248 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6251 if (insn & (1 << 19)) {
6252 tmp = neon_load_reg(rm, 1);
6254 tmp = neon_load_reg(rm, 0);
6256 if (insn & (1 << 16)) {
6257 gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
6258 } else if (insn & (1 << 17)) {
6259 if ((insn >> 18) & 1)
6260 gen_neon_dup_high16(tmp);
6262 gen_neon_dup_low16(tmp);
6264 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6265 tmp2 = tcg_temp_new_i32();
6266 tcg_gen_mov_i32(tmp2, tmp);
6267 neon_store_reg(rd, pass, tmp2);
6269 tcg_temp_free_i32(tmp);
6278 static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn)
6280 int crn = (insn >> 16) & 0xf;
6281 int crm = insn & 0xf;
6282 int op1 = (insn >> 21) & 7;
6283 int op2 = (insn >> 5) & 7;
6284 int rt = (insn >> 12) & 0xf;
6287 /* Minimal set of debug registers, since we don't support debug */
6288 if (op1 == 0 && crn == 0 && op2 == 0) {
6291 /* DBGDIDR: just RAZ. In particular this means the
6292 * "debug architecture version" bits will read as
6293 * a reserved value, which should cause Linux to
6294 * not try to use the debug hardware.
6296 tmp = tcg_const_i32(0);
6297 store_reg(s, rt, tmp);
6301 /* DBGDRAR and DBGDSAR: v7 only. Always RAZ since we
6302 * don't implement memory mapped debug components
6304 if (ENABLE_ARCH_7) {
6305 tmp = tcg_const_i32(0);
6306 store_reg(s, rt, tmp);
6315 if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
6316 if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
6320 tmp = load_cpu_field(teecr);
6321 store_reg(s, rt, tmp);
6324 if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
6326 if (IS_USER(s) && (env->teecr & 1))
6328 tmp = load_cpu_field(teehbr);
6329 store_reg(s, rt, tmp);
6333 fprintf(stderr, "Unknown cp14 read op1:%d crn:%d crm:%d op2:%d\n",
6334 op1, crn, crm, op2);
6338 static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn)
6340 int crn = (insn >> 16) & 0xf;
6341 int crm = insn & 0xf;
6342 int op1 = (insn >> 21) & 7;
6343 int op2 = (insn >> 5) & 7;
6344 int rt = (insn >> 12) & 0xf;
6347 if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
6348 if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
6352 tmp = load_reg(s, rt);
6353 gen_helper_set_teecr(cpu_env, tmp);
6354 tcg_temp_free_i32(tmp);
6357 if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
6359 if (IS_USER(s) && (env->teecr & 1))
6361 tmp = load_reg(s, rt);
6362 store_cpu_field(tmp, teehbr);
6366 fprintf(stderr, "Unknown cp14 write op1:%d crn:%d crm:%d op2:%d\n",
6367 op1, crn, crm, op2);
6371 static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
6375 cpnum = (insn >> 8) & 0xf;
6376 if (arm_feature(env, ARM_FEATURE_XSCALE)
6377 && ((env->cp15.c15_cpar ^ 0x3fff) & (1 << cpnum)))
6383 if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
6384 return disas_iwmmxt_insn(env, s, insn);
6385 } else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
6386 return disas_dsp_insn(env, s, insn);
6391 return disas_vfp_insn (env, s, insn);
6393 /* Coprocessors 7-15 are architecturally reserved by ARM.
6394 Unfortunately Intel decided to ignore this. */
6395 if (arm_feature(env, ARM_FEATURE_XSCALE))
6397 if (insn & (1 << 20))
6398 return disas_cp14_read(env, s, insn);
6400 return disas_cp14_write(env, s, insn);
6402 return disas_cp15_insn (env, s, insn);
6405 /* Unknown coprocessor. See if the board has hooked it. */
6406 return disas_cp_insn (env, s, insn);
6411 /* Store a 64-bit value to a register pair. Clobbers val. */
6412 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
6415 tmp = tcg_temp_new_i32();
6416 tcg_gen_trunc_i64_i32(tmp, val);
6417 store_reg(s, rlow, tmp);
6418 tmp = tcg_temp_new_i32();
6419 tcg_gen_shri_i64(val, val, 32);
6420 tcg_gen_trunc_i64_i32(tmp, val);
6421 store_reg(s, rhigh, tmp);
6424 /* load a 32-bit value from a register and perform a 64-bit accumulate. */
6425 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
6430 /* Load value and extend to 64 bits. */
6431 tmp = tcg_temp_new_i64();
6432 tmp2 = load_reg(s, rlow);
6433 tcg_gen_extu_i32_i64(tmp, tmp2);
6434 tcg_temp_free_i32(tmp2);
6435 tcg_gen_add_i64(val, val, tmp);
6436 tcg_temp_free_i64(tmp);
6439 /* load and add a 64-bit value from a register pair. */
6440 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
6446 /* Load 64-bit value rd:rn. */
6447 tmpl = load_reg(s, rlow);
6448 tmph = load_reg(s, rhigh);
6449 tmp = tcg_temp_new_i64();
6450 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
6451 tcg_temp_free_i32(tmpl);
6452 tcg_temp_free_i32(tmph);
6453 tcg_gen_add_i64(val, val, tmp);
6454 tcg_temp_free_i64(tmp);
6457 /* Set N and Z flags from a 64-bit value. */
6458 static void gen_logicq_cc(TCGv_i64 val)
6460 TCGv tmp = tcg_temp_new_i32();
6461 gen_helper_logicq_cc(tmp, val);
6463 tcg_temp_free_i32(tmp);
6466 /* Load/Store exclusive instructions are implemented by remembering
6467 the value/address loaded, and seeing if these are the same
6468 when the store is performed. This should be is sufficient to implement
6469 the architecturally mandated semantics, and avoids having to monitor
6472 In system emulation mode only one CPU will be running at once, so
6473 this sequence is effectively atomic. In user emulation mode we
6474 throw an exception and handle the atomic operation elsewhere. */
6475 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
6476 TCGv addr, int size)
6482 tmp = gen_ld8u(addr, IS_USER(s));
6485 tmp = gen_ld16u(addr, IS_USER(s));
6489 tmp = gen_ld32(addr, IS_USER(s));
6494 tcg_gen_mov_i32(cpu_exclusive_val, tmp);
6495 store_reg(s, rt, tmp);
6497 TCGv tmp2 = tcg_temp_new_i32();
6498 tcg_gen_addi_i32(tmp2, addr, 4);
6499 tmp = gen_ld32(tmp2, IS_USER(s));
6500 tcg_temp_free_i32(tmp2);
6501 tcg_gen_mov_i32(cpu_exclusive_high, tmp);
6502 store_reg(s, rt2, tmp);
6504 tcg_gen_mov_i32(cpu_exclusive_addr, addr);
6507 static void gen_clrex(DisasContext *s)
6509 tcg_gen_movi_i32(cpu_exclusive_addr, -1);
6512 #ifdef CONFIG_USER_ONLY
6513 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
6514 TCGv addr, int size)
6516 tcg_gen_mov_i32(cpu_exclusive_test, addr);
6517 tcg_gen_movi_i32(cpu_exclusive_info,
6518 size | (rd << 4) | (rt << 8) | (rt2 << 12));
6519 gen_exception_insn(s, 4, EXCP_STREX);
6522 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
6523 TCGv addr, int size)
6529 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
6535 fail_label = gen_new_label();
6536 done_label = gen_new_label();
6537 tcg_gen_brcond_i32(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
6540 tmp = gen_ld8u(addr, IS_USER(s));
6543 tmp = gen_ld16u(addr, IS_USER(s));
6547 tmp = gen_ld32(addr, IS_USER(s));
6552 tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
6553 tcg_temp_free_i32(tmp);
6555 TCGv tmp2 = tcg_temp_new_i32();
6556 tcg_gen_addi_i32(tmp2, addr, 4);
6557 tmp = gen_ld32(tmp2, IS_USER(s));
6558 tcg_temp_free_i32(tmp2);
6559 tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_high, fail_label);
6560 tcg_temp_free_i32(tmp);
6562 tmp = load_reg(s, rt);
6565 gen_st8(tmp, addr, IS_USER(s));
6568 gen_st16(tmp, addr, IS_USER(s));
6572 gen_st32(tmp, addr, IS_USER(s));
6578 tcg_gen_addi_i32(addr, addr, 4);
6579 tmp = load_reg(s, rt2);
6580 gen_st32(tmp, addr, IS_USER(s));
6582 tcg_gen_movi_i32(cpu_R[rd], 0);
6583 tcg_gen_br(done_label);
6584 gen_set_label(fail_label);
6585 tcg_gen_movi_i32(cpu_R[rd], 1);
6586 gen_set_label(done_label);
6587 tcg_gen_movi_i32(cpu_exclusive_addr, -1);
6591 static void disas_arm_insn(CPUState * env, DisasContext *s)
6593 unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
6600 insn = ldl_code(s->pc);
6603 /* M variants do not implement ARM mode. */
6608 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
6609 * choose to UNDEF. In ARMv5 and above the space is used
6610 * for miscellaneous unconditional instructions.
6614 /* Unconditional instructions. */
6615 if (((insn >> 25) & 7) == 1) {
6616 /* NEON Data processing. */
6617 if (!arm_feature(env, ARM_FEATURE_NEON))
6620 if (disas_neon_data_insn(env, s, insn))
6624 if ((insn & 0x0f100000) == 0x04000000) {
6625 /* NEON load/store. */
6626 if (!arm_feature(env, ARM_FEATURE_NEON))
6629 if (disas_neon_ls_insn(env, s, insn))
6633 if (((insn & 0x0f30f000) == 0x0510f000) ||
6634 ((insn & 0x0f30f010) == 0x0710f000)) {
6635 if ((insn & (1 << 22)) == 0) {
6637 if (!arm_feature(env, ARM_FEATURE_V7MP)) {
6641 /* Otherwise PLD; v5TE+ */
6645 if (((insn & 0x0f70f000) == 0x0450f000) ||
6646 ((insn & 0x0f70f010) == 0x0650f000)) {
6648 return; /* PLI; V7 */
6650 if (((insn & 0x0f700000) == 0x04100000) ||
6651 ((insn & 0x0f700010) == 0x06100000)) {
6652 if (!arm_feature(env, ARM_FEATURE_V7MP)) {
6655 return; /* v7MP: Unallocated memory hint: must NOP */
6658 if ((insn & 0x0ffffdff) == 0x01010000) {
6661 if (insn & (1 << 9)) {
6662 /* BE8 mode not implemented. */
6666 } else if ((insn & 0x0fffff00) == 0x057ff000) {
6667 switch ((insn >> 4) & 0xf) {
6676 /* We don't emulate caches so these are a no-op. */
6681 } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
6687 op1 = (insn & 0x1f);
6688 addr = tcg_temp_new_i32();
6689 tmp = tcg_const_i32(op1);
6690 gen_helper_get_r13_banked(addr, cpu_env, tmp);
6691 tcg_temp_free_i32(tmp);
6692 i = (insn >> 23) & 3;
6694 case 0: offset = -4; break; /* DA */
6695 case 1: offset = 0; break; /* IA */
6696 case 2: offset = -8; break; /* DB */
6697 case 3: offset = 4; break; /* IB */
6701 tcg_gen_addi_i32(addr, addr, offset);
6702 tmp = load_reg(s, 14);
6703 gen_st32(tmp, addr, 0);
6704 tmp = load_cpu_field(spsr);
6705 tcg_gen_addi_i32(addr, addr, 4);
6706 gen_st32(tmp, addr, 0);
6707 if (insn & (1 << 21)) {
6708 /* Base writeback. */
6710 case 0: offset = -8; break;
6711 case 1: offset = 4; break;
6712 case 2: offset = -4; break;
6713 case 3: offset = 0; break;
6717 tcg_gen_addi_i32(addr, addr, offset);
6718 tmp = tcg_const_i32(op1);
6719 gen_helper_set_r13_banked(cpu_env, tmp, addr);
6720 tcg_temp_free_i32(tmp);
6721 tcg_temp_free_i32(addr);
6723 tcg_temp_free_i32(addr);
6726 } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
6732 rn = (insn >> 16) & 0xf;
6733 addr = load_reg(s, rn);
6734 i = (insn >> 23) & 3;
6736 case 0: offset = -4; break; /* DA */
6737 case 1: offset = 0; break; /* IA */
6738 case 2: offset = -8; break; /* DB */
6739 case 3: offset = 4; break; /* IB */
6743 tcg_gen_addi_i32(addr, addr, offset);
6744 /* Load PC into tmp and CPSR into tmp2. */
6745 tmp = gen_ld32(addr, 0);
6746 tcg_gen_addi_i32(addr, addr, 4);
6747 tmp2 = gen_ld32(addr, 0);
6748 if (insn & (1 << 21)) {
6749 /* Base writeback. */
6751 case 0: offset = -8; break;
6752 case 1: offset = 4; break;
6753 case 2: offset = -4; break;
6754 case 3: offset = 0; break;
6758 tcg_gen_addi_i32(addr, addr, offset);
6759 store_reg(s, rn, addr);
6761 tcg_temp_free_i32(addr);
6763 gen_rfe(s, tmp, tmp2);
6765 } else if ((insn & 0x0e000000) == 0x0a000000) {
6766 /* branch link and change to thumb (blx <offset>) */
6769 val = (uint32_t)s->pc;
6770 tmp = tcg_temp_new_i32();
6771 tcg_gen_movi_i32(tmp, val);
6772 store_reg(s, 14, tmp);
6773 /* Sign-extend the 24-bit offset */
6774 offset = (((int32_t)insn) << 8) >> 8;
6775 /* offset * 4 + bit24 * 2 + (thumb bit) */
6776 val += (offset << 2) | ((insn >> 23) & 2) | 1;
6777 /* pipeline offset */
6779 /* protected by ARCH(5); above, near the start of uncond block */
6782 } else if ((insn & 0x0e000f00) == 0x0c000100) {
6783 if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
6784 /* iWMMXt register transfer. */
6785 if (env->cp15.c15_cpar & (1 << 1))
6786 if (!disas_iwmmxt_insn(env, s, insn))
6789 } else if ((insn & 0x0fe00000) == 0x0c400000) {
6790 /* Coprocessor double register transfer. */
6792 } else if ((insn & 0x0f000010) == 0x0e000010) {
6793 /* Additional coprocessor register transfer. */
6794 } else if ((insn & 0x0ff10020) == 0x01000000) {
6797 /* cps (privileged) */
6801 if (insn & (1 << 19)) {
6802 if (insn & (1 << 8))
6804 if (insn & (1 << 7))
6806 if (insn & (1 << 6))
6808 if (insn & (1 << 18))
6811 if (insn & (1 << 17)) {
6813 val |= (insn & 0x1f);
6816 gen_set_psr_im(s, mask, 0, val);
6823 /* if not always execute, we generate a conditional jump to
6825 s->condlabel = gen_new_label();
6826 gen_test_cc(cond ^ 1, s->condlabel);
6829 if ((insn & 0x0f900000) == 0x03000000) {
6830 if ((insn & (1 << 21)) == 0) {
6832 rd = (insn >> 12) & 0xf;
6833 val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
6834 if ((insn & (1 << 22)) == 0) {
6836 tmp = tcg_temp_new_i32();
6837 tcg_gen_movi_i32(tmp, val);
6840 tmp = load_reg(s, rd);
6841 tcg_gen_ext16u_i32(tmp, tmp);
6842 tcg_gen_ori_i32(tmp, tmp, val << 16);
6844 store_reg(s, rd, tmp);
6846 if (((insn >> 12) & 0xf) != 0xf)
6848 if (((insn >> 16) & 0xf) == 0) {
6849 gen_nop_hint(s, insn & 0xff);
6851 /* CPSR = immediate */
6853 shift = ((insn >> 8) & 0xf) * 2;
6855 val = (val >> shift) | (val << (32 - shift));
6856 i = ((insn & (1 << 22)) != 0);
6857 if (gen_set_psr_im(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, val))
6861 } else if ((insn & 0x0f900000) == 0x01000000
6862 && (insn & 0x00000090) != 0x00000090) {
6863 /* miscellaneous instructions */
6864 op1 = (insn >> 21) & 3;
6865 sh = (insn >> 4) & 0xf;
6868 case 0x0: /* move program status register */
6871 tmp = load_reg(s, rm);
6872 i = ((op1 & 2) != 0);
6873 if (gen_set_psr(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, tmp))
6877 rd = (insn >> 12) & 0xf;
6881 tmp = load_cpu_field(spsr);
6883 tmp = tcg_temp_new_i32();
6884 gen_helper_cpsr_read(tmp);
6886 store_reg(s, rd, tmp);
6891 /* branch/exchange thumb (bx). */
6893 tmp = load_reg(s, rm);
6895 } else if (op1 == 3) {
6898 rd = (insn >> 12) & 0xf;
6899 tmp = load_reg(s, rm);
6900 gen_helper_clz(tmp, tmp);
6901 store_reg(s, rd, tmp);
6909 /* Trivial implementation equivalent to bx. */
6910 tmp = load_reg(s, rm);
6921 /* branch link/exchange thumb (blx) */
6922 tmp = load_reg(s, rm);
6923 tmp2 = tcg_temp_new_i32();
6924 tcg_gen_movi_i32(tmp2, s->pc);
6925 store_reg(s, 14, tmp2);
6928 case 0x5: /* saturating add/subtract */
6930 rd = (insn >> 12) & 0xf;
6931 rn = (insn >> 16) & 0xf;
6932 tmp = load_reg(s, rm);
6933 tmp2 = load_reg(s, rn);
6935 gen_helper_double_saturate(tmp2, tmp2);
6937 gen_helper_sub_saturate(tmp, tmp, tmp2);
6939 gen_helper_add_saturate(tmp, tmp, tmp2);
6940 tcg_temp_free_i32(tmp2);
6941 store_reg(s, rd, tmp);
6944 /* SMC instruction (op1 == 3)
6945 and undefined instructions (op1 == 0 || op1 == 2)
6952 gen_exception_insn(s, 4, EXCP_BKPT);
6954 case 0x8: /* signed multiply */
6959 rs = (insn >> 8) & 0xf;
6960 rn = (insn >> 12) & 0xf;
6961 rd = (insn >> 16) & 0xf;
6963 /* (32 * 16) >> 16 */
6964 tmp = load_reg(s, rm);
6965 tmp2 = load_reg(s, rs);
6967 tcg_gen_sari_i32(tmp2, tmp2, 16);
6970 tmp64 = gen_muls_i64_i32(tmp, tmp2);
6971 tcg_gen_shri_i64(tmp64, tmp64, 16);
6972 tmp = tcg_temp_new_i32();
6973 tcg_gen_trunc_i64_i32(tmp, tmp64);
6974 tcg_temp_free_i64(tmp64);
6975 if ((sh & 2) == 0) {
6976 tmp2 = load_reg(s, rn);
6977 gen_helper_add_setq(tmp, tmp, tmp2);
6978 tcg_temp_free_i32(tmp2);
6980 store_reg(s, rd, tmp);
6983 tmp = load_reg(s, rm);
6984 tmp2 = load_reg(s, rs);
6985 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
6986 tcg_temp_free_i32(tmp2);
6988 tmp64 = tcg_temp_new_i64();
6989 tcg_gen_ext_i32_i64(tmp64, tmp);
6990 tcg_temp_free_i32(tmp);
6991 gen_addq(s, tmp64, rn, rd);
6992 gen_storeq_reg(s, rn, rd, tmp64);
6993 tcg_temp_free_i64(tmp64);
6996 tmp2 = load_reg(s, rn);
6997 gen_helper_add_setq(tmp, tmp, tmp2);
6998 tcg_temp_free_i32(tmp2);
7000 store_reg(s, rd, tmp);
7007 } else if (((insn & 0x0e000000) == 0 &&
7008 (insn & 0x00000090) != 0x90) ||
7009 ((insn & 0x0e000000) == (1 << 25))) {
7010 int set_cc, logic_cc, shiftop;
7012 op1 = (insn >> 21) & 0xf;
7013 set_cc = (insn >> 20) & 1;
7014 logic_cc = table_logic_cc[op1] & set_cc;
7016 /* data processing instruction */
7017 if (insn & (1 << 25)) {
7018 /* immediate operand */
7020 shift = ((insn >> 8) & 0xf) * 2;
7022 val = (val >> shift) | (val << (32 - shift));
7024 tmp2 = tcg_temp_new_i32();
7025 tcg_gen_movi_i32(tmp2, val);
7026 if (logic_cc && shift) {
7027 gen_set_CF_bit31(tmp2);
7032 tmp2 = load_reg(s, rm);
7033 shiftop = (insn >> 5) & 3;
7034 if (!(insn & (1 << 4))) {
7035 shift = (insn >> 7) & 0x1f;
7036 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
7038 rs = (insn >> 8) & 0xf;
7039 tmp = load_reg(s, rs);
7040 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
7043 if (op1 != 0x0f && op1 != 0x0d) {
7044 rn = (insn >> 16) & 0xf;
7045 tmp = load_reg(s, rn);
7049 rd = (insn >> 12) & 0xf;
7052 tcg_gen_and_i32(tmp, tmp, tmp2);
7056 store_reg_bx(env, s, rd, tmp);
7059 tcg_gen_xor_i32(tmp, tmp, tmp2);
7063 store_reg_bx(env, s, rd, tmp);
7066 if (set_cc && rd == 15) {
7067 /* SUBS r15, ... is used for exception return. */
7071 gen_helper_sub_cc(tmp, tmp, tmp2);
7072 gen_exception_return(s, tmp);
7075 gen_helper_sub_cc(tmp, tmp, tmp2);
7077 tcg_gen_sub_i32(tmp, tmp, tmp2);
7079 store_reg_bx(env, s, rd, tmp);
7084 gen_helper_sub_cc(tmp, tmp2, tmp);
7086 tcg_gen_sub_i32(tmp, tmp2, tmp);
7088 store_reg_bx(env, s, rd, tmp);
7092 gen_helper_add_cc(tmp, tmp, tmp2);
7094 tcg_gen_add_i32(tmp, tmp, tmp2);
7096 store_reg_bx(env, s, rd, tmp);
7100 gen_helper_adc_cc(tmp, tmp, tmp2);
7102 gen_add_carry(tmp, tmp, tmp2);
7104 store_reg_bx(env, s, rd, tmp);
7108 gen_helper_sbc_cc(tmp, tmp, tmp2);
7110 gen_sub_carry(tmp, tmp, tmp2);
7112 store_reg_bx(env, s, rd, tmp);
7116 gen_helper_sbc_cc(tmp, tmp2, tmp);
7118 gen_sub_carry(tmp, tmp2, tmp);
7120 store_reg_bx(env, s, rd, tmp);
7124 tcg_gen_and_i32(tmp, tmp, tmp2);
7127 tcg_temp_free_i32(tmp);
7131 tcg_gen_xor_i32(tmp, tmp, tmp2);
7134 tcg_temp_free_i32(tmp);
7138 gen_helper_sub_cc(tmp, tmp, tmp2);
7140 tcg_temp_free_i32(tmp);
7144 gen_helper_add_cc(tmp, tmp, tmp2);
7146 tcg_temp_free_i32(tmp);
7149 tcg_gen_or_i32(tmp, tmp, tmp2);
7153 store_reg_bx(env, s, rd, tmp);
7156 if (logic_cc && rd == 15) {
7157 /* MOVS r15, ... is used for exception return. */
7161 gen_exception_return(s, tmp2);
7166 store_reg_bx(env, s, rd, tmp2);
7170 tcg_gen_andc_i32(tmp, tmp, tmp2);
7174 store_reg_bx(env, s, rd, tmp);
7178 tcg_gen_not_i32(tmp2, tmp2);
7182 store_reg_bx(env, s, rd, tmp2);
7185 if (op1 != 0x0f && op1 != 0x0d) {
7186 tcg_temp_free_i32(tmp2);
7189 /* other instructions */
7190 op1 = (insn >> 24) & 0xf;
7194 /* multiplies, extra load/stores */
7195 sh = (insn >> 5) & 3;
7198 rd = (insn >> 16) & 0xf;
7199 rn = (insn >> 12) & 0xf;
7200 rs = (insn >> 8) & 0xf;
7202 op1 = (insn >> 20) & 0xf;
7204 case 0: case 1: case 2: case 3: case 6:
7206 tmp = load_reg(s, rs);
7207 tmp2 = load_reg(s, rm);
7208 tcg_gen_mul_i32(tmp, tmp, tmp2);
7209 tcg_temp_free_i32(tmp2);
7210 if (insn & (1 << 22)) {
7211 /* Subtract (mls) */
7213 tmp2 = load_reg(s, rn);
7214 tcg_gen_sub_i32(tmp, tmp2, tmp);
7215 tcg_temp_free_i32(tmp2);
7216 } else if (insn & (1 << 21)) {
7218 tmp2 = load_reg(s, rn);
7219 tcg_gen_add_i32(tmp, tmp, tmp2);
7220 tcg_temp_free_i32(tmp2);
7222 if (insn & (1 << 20))
7224 store_reg(s, rd, tmp);
7227 /* 64 bit mul double accumulate (UMAAL) */
7229 tmp = load_reg(s, rs);
7230 tmp2 = load_reg(s, rm);
7231 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
7232 gen_addq_lo(s, tmp64, rn);
7233 gen_addq_lo(s, tmp64, rd);
7234 gen_storeq_reg(s, rn, rd, tmp64);
7235 tcg_temp_free_i64(tmp64);
7237 case 8: case 9: case 10: case 11:
7238 case 12: case 13: case 14: case 15:
7239 /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
7240 tmp = load_reg(s, rs);
7241 tmp2 = load_reg(s, rm);
7242 if (insn & (1 << 22)) {
7243 tmp64 = gen_muls_i64_i32(tmp, tmp2);
7245 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
7247 if (insn & (1 << 21)) { /* mult accumulate */
7248 gen_addq(s, tmp64, rn, rd);
7250 if (insn & (1 << 20)) {
7251 gen_logicq_cc(tmp64);
7253 gen_storeq_reg(s, rn, rd, tmp64);
7254 tcg_temp_free_i64(tmp64);
7260 rn = (insn >> 16) & 0xf;
7261 rd = (insn >> 12) & 0xf;
7262 if (insn & (1 << 23)) {
7263 /* load/store exclusive */
7264 op1 = (insn >> 21) & 0x3;
7269 addr = tcg_temp_local_new_i32();
7270 load_reg_var(s, addr, rn);
7271 if (insn & (1 << 20)) {
7274 gen_load_exclusive(s, rd, 15, addr, 2);
7276 case 1: /* ldrexd */
7277 gen_load_exclusive(s, rd, rd + 1, addr, 3);
7279 case 2: /* ldrexb */
7280 gen_load_exclusive(s, rd, 15, addr, 0);
7282 case 3: /* ldrexh */
7283 gen_load_exclusive(s, rd, 15, addr, 1);
7292 gen_store_exclusive(s, rd, rm, 15, addr, 2);
7294 case 1: /* strexd */
7295 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
7297 case 2: /* strexb */
7298 gen_store_exclusive(s, rd, rm, 15, addr, 0);
7300 case 3: /* strexh */
7301 gen_store_exclusive(s, rd, rm, 15, addr, 1);
7307 tcg_temp_free(addr);
7309 /* SWP instruction */
7312 /* ??? This is not really atomic. However we know
7313 we never have multiple CPUs running in parallel,
7314 so it is good enough. */
7315 addr = load_reg(s, rn);
7316 tmp = load_reg(s, rm);
7317 if (insn & (1 << 22)) {
7318 tmp2 = gen_ld8u(addr, IS_USER(s));
7319 gen_st8(tmp, addr, IS_USER(s));
7321 tmp2 = gen_ld32(addr, IS_USER(s));
7322 gen_st32(tmp, addr, IS_USER(s));
7324 tcg_temp_free_i32(addr);
7325 store_reg(s, rd, tmp2);
7331 /* Misc load/store */
7332 rn = (insn >> 16) & 0xf;
7333 rd = (insn >> 12) & 0xf;
7334 addr = load_reg(s, rn);
7335 if (insn & (1 << 24))
7336 gen_add_datah_offset(s, insn, 0, addr);
7338 if (insn & (1 << 20)) {
7342 tmp = gen_ld16u(addr, IS_USER(s));
7345 tmp = gen_ld8s(addr, IS_USER(s));
7349 tmp = gen_ld16s(addr, IS_USER(s));
7353 } else if (sh & 2) {
7358 tmp = load_reg(s, rd);
7359 gen_st32(tmp, addr, IS_USER(s));
7360 tcg_gen_addi_i32(addr, addr, 4);
7361 tmp = load_reg(s, rd + 1);
7362 gen_st32(tmp, addr, IS_USER(s));
7366 tmp = gen_ld32(addr, IS_USER(s));
7367 store_reg(s, rd, tmp);
7368 tcg_gen_addi_i32(addr, addr, 4);
7369 tmp = gen_ld32(addr, IS_USER(s));
7373 address_offset = -4;
7376 tmp = load_reg(s, rd);
7377 gen_st16(tmp, addr, IS_USER(s));
7380 /* Perform base writeback before the loaded value to
7381 ensure correct behavior with overlapping index registers.
7382 ldrd with base writeback is is undefined if the
7383 destination and index registers overlap. */
7384 if (!(insn & (1 << 24))) {
7385 gen_add_datah_offset(s, insn, address_offset, addr);
7386 store_reg(s, rn, addr);
7387 } else if (insn & (1 << 21)) {
7389 tcg_gen_addi_i32(addr, addr, address_offset);
7390 store_reg(s, rn, addr);
7392 tcg_temp_free_i32(addr);
7395 /* Complete the load. */
7396 store_reg(s, rd, tmp);
7405 if (insn & (1 << 4)) {
7407 /* Armv6 Media instructions. */
7409 rn = (insn >> 16) & 0xf;
7410 rd = (insn >> 12) & 0xf;
7411 rs = (insn >> 8) & 0xf;
7412 switch ((insn >> 23) & 3) {
7413 case 0: /* Parallel add/subtract. */
7414 op1 = (insn >> 20) & 7;
7415 tmp = load_reg(s, rn);
7416 tmp2 = load_reg(s, rm);
7417 sh = (insn >> 5) & 7;
7418 if ((op1 & 3) == 0 || sh == 5 || sh == 6)
7420 gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
7421 tcg_temp_free_i32(tmp2);
7422 store_reg(s, rd, tmp);
7425 if ((insn & 0x00700020) == 0) {
7426 /* Halfword pack. */
7427 tmp = load_reg(s, rn);
7428 tmp2 = load_reg(s, rm);
7429 shift = (insn >> 7) & 0x1f;
7430 if (insn & (1 << 6)) {
7434 tcg_gen_sari_i32(tmp2, tmp2, shift);
7435 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
7436 tcg_gen_ext16u_i32(tmp2, tmp2);
7440 tcg_gen_shli_i32(tmp2, tmp2, shift);
7441 tcg_gen_ext16u_i32(tmp, tmp);
7442 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
7444 tcg_gen_or_i32(tmp, tmp, tmp2);
7445 tcg_temp_free_i32(tmp2);
7446 store_reg(s, rd, tmp);
7447 } else if ((insn & 0x00200020) == 0x00200000) {
7449 tmp = load_reg(s, rm);
7450 shift = (insn >> 7) & 0x1f;
7451 if (insn & (1 << 6)) {
7454 tcg_gen_sari_i32(tmp, tmp, shift);
7456 tcg_gen_shli_i32(tmp, tmp, shift);
7458 sh = (insn >> 16) & 0x1f;
7459 tmp2 = tcg_const_i32(sh);
7460 if (insn & (1 << 22))
7461 gen_helper_usat(tmp, tmp, tmp2);
7463 gen_helper_ssat(tmp, tmp, tmp2);
7464 tcg_temp_free_i32(tmp2);
7465 store_reg(s, rd, tmp);
7466 } else if ((insn & 0x00300fe0) == 0x00200f20) {
7468 tmp = load_reg(s, rm);
7469 sh = (insn >> 16) & 0x1f;
7470 tmp2 = tcg_const_i32(sh);
7471 if (insn & (1 << 22))
7472 gen_helper_usat16(tmp, tmp, tmp2);
7474 gen_helper_ssat16(tmp, tmp, tmp2);
7475 tcg_temp_free_i32(tmp2);
7476 store_reg(s, rd, tmp);
7477 } else if ((insn & 0x00700fe0) == 0x00000fa0) {
7479 tmp = load_reg(s, rn);
7480 tmp2 = load_reg(s, rm);
7481 tmp3 = tcg_temp_new_i32();
7482 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
7483 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
7484 tcg_temp_free_i32(tmp3);
7485 tcg_temp_free_i32(tmp2);
7486 store_reg(s, rd, tmp);
7487 } else if ((insn & 0x000003e0) == 0x00000060) {
7488 tmp = load_reg(s, rm);
7489 shift = (insn >> 10) & 3;
7490 /* ??? In many cases it's not necessary to do a
7491 rotate, a shift is sufficient. */
7493 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
7494 op1 = (insn >> 20) & 7;
7496 case 0: gen_sxtb16(tmp); break;
7497 case 2: gen_sxtb(tmp); break;
7498 case 3: gen_sxth(tmp); break;
7499 case 4: gen_uxtb16(tmp); break;
7500 case 6: gen_uxtb(tmp); break;
7501 case 7: gen_uxth(tmp); break;
7502 default: goto illegal_op;
7505 tmp2 = load_reg(s, rn);
7506 if ((op1 & 3) == 0) {
7507 gen_add16(tmp, tmp2);
7509 tcg_gen_add_i32(tmp, tmp, tmp2);
7510 tcg_temp_free_i32(tmp2);
7513 store_reg(s, rd, tmp);
7514 } else if ((insn & 0x003f0f60) == 0x003f0f20) {
7516 tmp = load_reg(s, rm);
7517 if (insn & (1 << 22)) {
7518 if (insn & (1 << 7)) {
7522 gen_helper_rbit(tmp, tmp);
7525 if (insn & (1 << 7))
7528 tcg_gen_bswap32_i32(tmp, tmp);
7530 store_reg(s, rd, tmp);
7535 case 2: /* Multiplies (Type 3). */
7536 tmp = load_reg(s, rm);
7537 tmp2 = load_reg(s, rs);
7538 if (insn & (1 << 20)) {
7539 /* Signed multiply most significant [accumulate].
7540 (SMMUL, SMMLA, SMMLS) */
7541 tmp64 = gen_muls_i64_i32(tmp, tmp2);
7544 tmp = load_reg(s, rd);
7545 if (insn & (1 << 6)) {
7546 tmp64 = gen_subq_msw(tmp64, tmp);
7548 tmp64 = gen_addq_msw(tmp64, tmp);
7551 if (insn & (1 << 5)) {
7552 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
7554 tcg_gen_shri_i64(tmp64, tmp64, 32);
7555 tmp = tcg_temp_new_i32();
7556 tcg_gen_trunc_i64_i32(tmp, tmp64);
7557 tcg_temp_free_i64(tmp64);
7558 store_reg(s, rn, tmp);
7560 if (insn & (1 << 5))
7561 gen_swap_half(tmp2);
7562 gen_smul_dual(tmp, tmp2);
7563 if (insn & (1 << 6)) {
7564 /* This subtraction cannot overflow. */
7565 tcg_gen_sub_i32(tmp, tmp, tmp2);
7567 /* This addition cannot overflow 32 bits;
7568 * however it may overflow considered as a signed
7569 * operation, in which case we must set the Q flag.
7571 gen_helper_add_setq(tmp, tmp, tmp2);
7573 tcg_temp_free_i32(tmp2);
7574 if (insn & (1 << 22)) {
7575 /* smlald, smlsld */
7576 tmp64 = tcg_temp_new_i64();
7577 tcg_gen_ext_i32_i64(tmp64, tmp);
7578 tcg_temp_free_i32(tmp);
7579 gen_addq(s, tmp64, rd, rn);
7580 gen_storeq_reg(s, rd, rn, tmp64);
7581 tcg_temp_free_i64(tmp64);
7583 /* smuad, smusd, smlad, smlsd */
7586 tmp2 = load_reg(s, rd);
7587 gen_helper_add_setq(tmp, tmp, tmp2);
7588 tcg_temp_free_i32(tmp2);
7590 store_reg(s, rn, tmp);
7595 op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
7597 case 0: /* Unsigned sum of absolute differences. */
7599 tmp = load_reg(s, rm);
7600 tmp2 = load_reg(s, rs);
7601 gen_helper_usad8(tmp, tmp, tmp2);
7602 tcg_temp_free_i32(tmp2);
7604 tmp2 = load_reg(s, rd);
7605 tcg_gen_add_i32(tmp, tmp, tmp2);
7606 tcg_temp_free_i32(tmp2);
7608 store_reg(s, rn, tmp);
7610 case 0x20: case 0x24: case 0x28: case 0x2c:
7611 /* Bitfield insert/clear. */
7613 shift = (insn >> 7) & 0x1f;
7614 i = (insn >> 16) & 0x1f;
7617 tmp = tcg_temp_new_i32();
7618 tcg_gen_movi_i32(tmp, 0);
7620 tmp = load_reg(s, rm);
7623 tmp2 = load_reg(s, rd);
7624 gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
7625 tcg_temp_free_i32(tmp2);
7627 store_reg(s, rd, tmp);
7629 case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
7630 case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
7632 tmp = load_reg(s, rm);
7633 shift = (insn >> 7) & 0x1f;
7634 i = ((insn >> 16) & 0x1f) + 1;
7639 gen_ubfx(tmp, shift, (1u << i) - 1);
7641 gen_sbfx(tmp, shift, i);
7644 store_reg(s, rd, tmp);
7654 /* Check for undefined extension instructions
7655 * per the ARM Bible IE:
7656 * xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
7658 sh = (0xf << 20) | (0xf << 4);
7659 if (op1 == 0x7 && ((insn & sh) == sh))
7663 /* load/store byte/word */
7664 rn = (insn >> 16) & 0xf;
7665 rd = (insn >> 12) & 0xf;
7666 tmp2 = load_reg(s, rn);
7667 i = (IS_USER(s) || (insn & 0x01200000) == 0x00200000);
7668 if (insn & (1 << 24))
7669 gen_add_data_offset(s, insn, tmp2);
7670 if (insn & (1 << 20)) {
7672 if (insn & (1 << 22)) {
7673 tmp = gen_ld8u(tmp2, i);
7675 tmp = gen_ld32(tmp2, i);
7679 tmp = load_reg(s, rd);
7680 if (insn & (1 << 22))
7681 gen_st8(tmp, tmp2, i);
7683 gen_st32(tmp, tmp2, i);
7685 if (!(insn & (1 << 24))) {
7686 gen_add_data_offset(s, insn, tmp2);
7687 store_reg(s, rn, tmp2);
7688 } else if (insn & (1 << 21)) {
7689 store_reg(s, rn, tmp2);
7691 tcg_temp_free_i32(tmp2);
7693 if (insn & (1 << 20)) {
7694 /* Complete the load. */
7695 store_reg_from_load(env, s, rd, tmp);
7701 int j, n, user, loaded_base;
7703 /* load/store multiple words */
7704 /* XXX: store correct base if write back */
7706 if (insn & (1 << 22)) {
7708 goto illegal_op; /* only usable in supervisor mode */
7710 if ((insn & (1 << 15)) == 0)
7713 rn = (insn >> 16) & 0xf;
7714 addr = load_reg(s, rn);
7716 /* compute total size */
7718 TCGV_UNUSED(loaded_var);
7721 if (insn & (1 << i))
7724 /* XXX: test invalid n == 0 case ? */
7725 if (insn & (1 << 23)) {
7726 if (insn & (1 << 24)) {
7728 tcg_gen_addi_i32(addr, addr, 4);
7730 /* post increment */
7733 if (insn & (1 << 24)) {
7735 tcg_gen_addi_i32(addr, addr, -(n * 4));
7737 /* post decrement */
7739 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7744 if (insn & (1 << i)) {
7745 if (insn & (1 << 20)) {
7747 tmp = gen_ld32(addr, IS_USER(s));
7749 tmp2 = tcg_const_i32(i);
7750 gen_helper_set_user_reg(tmp2, tmp);
7751 tcg_temp_free_i32(tmp2);
7752 tcg_temp_free_i32(tmp);
7753 } else if (i == rn) {
7757 store_reg_from_load(env, s, i, tmp);
7762 /* special case: r15 = PC + 8 */
7763 val = (long)s->pc + 4;
7764 tmp = tcg_temp_new_i32();
7765 tcg_gen_movi_i32(tmp, val);
7767 tmp = tcg_temp_new_i32();
7768 tmp2 = tcg_const_i32(i);
7769 gen_helper_get_user_reg(tmp, tmp2);
7770 tcg_temp_free_i32(tmp2);
7772 tmp = load_reg(s, i);
7774 gen_st32(tmp, addr, IS_USER(s));
7777 /* no need to add after the last transfer */
7779 tcg_gen_addi_i32(addr, addr, 4);
7782 if (insn & (1 << 21)) {
7784 if (insn & (1 << 23)) {
7785 if (insn & (1 << 24)) {
7788 /* post increment */
7789 tcg_gen_addi_i32(addr, addr, 4);
7792 if (insn & (1 << 24)) {
7795 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7797 /* post decrement */
7798 tcg_gen_addi_i32(addr, addr, -(n * 4));
7801 store_reg(s, rn, addr);
7803 tcg_temp_free_i32(addr);
7806 store_reg(s, rn, loaded_var);
7808 if ((insn & (1 << 22)) && !user) {
7809 /* Restore CPSR from SPSR. */
7810 tmp = load_cpu_field(spsr);
7811 gen_set_cpsr(tmp, 0xffffffff);
7812 tcg_temp_free_i32(tmp);
7813 s->is_jmp = DISAS_UPDATE;
7822 /* branch (and link) */
7823 val = (int32_t)s->pc;
7824 if (insn & (1 << 24)) {
7825 tmp = tcg_temp_new_i32();
7826 tcg_gen_movi_i32(tmp, val);
7827 store_reg(s, 14, tmp);
7829 offset = (((int32_t)insn << 8) >> 8);
7830 val += (offset << 2) + 4;
7838 if (disas_coproc_insn(env, s, insn))
7843 gen_set_pc_im(s->pc);
7844 s->is_jmp = DISAS_SWI;
7848 gen_exception_insn(s, 4, EXCP_UDEF);
7854 /* Return true if this is a Thumb-2 logical op. */
7856 thumb2_logic_op(int op)
7861 /* Generate code for a Thumb-2 data processing operation. If CONDS is nonzero
7862 then set condition code flags based on the result of the operation.
7863 If SHIFTER_OUT is nonzero then set the carry flag for logical operations
7864 to the high bit of T1.
7865 Returns zero if the opcode is valid. */
7868 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCGv t0, TCGv t1)
7875 tcg_gen_and_i32(t0, t0, t1);
7879 tcg_gen_andc_i32(t0, t0, t1);
7883 tcg_gen_or_i32(t0, t0, t1);
7887 tcg_gen_orc_i32(t0, t0, t1);
7891 tcg_gen_xor_i32(t0, t0, t1);
7896 gen_helper_add_cc(t0, t0, t1);
7898 tcg_gen_add_i32(t0, t0, t1);
7902 gen_helper_adc_cc(t0, t0, t1);
7908 gen_helper_sbc_cc(t0, t0, t1);
7910 gen_sub_carry(t0, t0, t1);
7914 gen_helper_sub_cc(t0, t0, t1);
7916 tcg_gen_sub_i32(t0, t0, t1);
7920 gen_helper_sub_cc(t0, t1, t0);
7922 tcg_gen_sub_i32(t0, t1, t0);
7924 default: /* 5, 6, 7, 9, 12, 15. */
7930 gen_set_CF_bit31(t1);
7935 /* Translate a 32-bit thumb instruction. Returns nonzero if the instruction
7937 static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
7939 uint32_t insn, imm, shift, offset;
7940 uint32_t rd, rn, rm, rs;
7951 if (!(arm_feature(env, ARM_FEATURE_THUMB2)
7952 || arm_feature (env, ARM_FEATURE_M))) {
7953 /* Thumb-1 cores may need to treat bl and blx as a pair of
7954 16-bit instructions to get correct prefetch abort behavior. */
7956 if ((insn & (1 << 12)) == 0) {
7958 /* Second half of blx. */
7959 offset = ((insn & 0x7ff) << 1);
7960 tmp = load_reg(s, 14);
7961 tcg_gen_addi_i32(tmp, tmp, offset);
7962 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
7964 tmp2 = tcg_temp_new_i32();
7965 tcg_gen_movi_i32(tmp2, s->pc | 1);
7966 store_reg(s, 14, tmp2);
7970 if (insn & (1 << 11)) {
7971 /* Second half of bl. */
7972 offset = ((insn & 0x7ff) << 1) | 1;
7973 tmp = load_reg(s, 14);
7974 tcg_gen_addi_i32(tmp, tmp, offset);
7976 tmp2 = tcg_temp_new_i32();
7977 tcg_gen_movi_i32(tmp2, s->pc | 1);
7978 store_reg(s, 14, tmp2);
7982 if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
7983 /* Instruction spans a page boundary. Implement it as two
7984 16-bit instructions in case the second half causes an
7986 offset = ((int32_t)insn << 21) >> 9;
7987 tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset);
7990 /* Fall through to 32-bit decode. */
7993 insn = lduw_code(s->pc);
7995 insn |= (uint32_t)insn_hw1 << 16;
7997 if ((insn & 0xf800e800) != 0xf000e800) {
8001 rn = (insn >> 16) & 0xf;
8002 rs = (insn >> 12) & 0xf;
8003 rd = (insn >> 8) & 0xf;
8005 switch ((insn >> 25) & 0xf) {
8006 case 0: case 1: case 2: case 3:
8007 /* 16-bit instructions. Should never happen. */
8010 if (insn & (1 << 22)) {
8011 /* Other load/store, table branch. */
8012 if (insn & 0x01200000) {
8013 /* Load/store doubleword. */
8015 addr = tcg_temp_new_i32();
8016 tcg_gen_movi_i32(addr, s->pc & ~3);
8018 addr = load_reg(s, rn);
8020 offset = (insn & 0xff) * 4;
8021 if ((insn & (1 << 23)) == 0)
8023 if (insn & (1 << 24)) {
8024 tcg_gen_addi_i32(addr, addr, offset);
8027 if (insn & (1 << 20)) {
8029 tmp = gen_ld32(addr, IS_USER(s));
8030 store_reg(s, rs, tmp);
8031 tcg_gen_addi_i32(addr, addr, 4);
8032 tmp = gen_ld32(addr, IS_USER(s));
8033 store_reg(s, rd, tmp);
8036 tmp = load_reg(s, rs);
8037 gen_st32(tmp, addr, IS_USER(s));
8038 tcg_gen_addi_i32(addr, addr, 4);
8039 tmp = load_reg(s, rd);
8040 gen_st32(tmp, addr, IS_USER(s));
8042 if (insn & (1 << 21)) {
8043 /* Base writeback. */
8046 tcg_gen_addi_i32(addr, addr, offset - 4);
8047 store_reg(s, rn, addr);
8049 tcg_temp_free_i32(addr);
8051 } else if ((insn & (1 << 23)) == 0) {
8052 /* Load/store exclusive word. */
8053 addr = tcg_temp_local_new();
8054 load_reg_var(s, addr, rn);
8055 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
8056 if (insn & (1 << 20)) {
8057 gen_load_exclusive(s, rs, 15, addr, 2);
8059 gen_store_exclusive(s, rd, rs, 15, addr, 2);
8061 tcg_temp_free(addr);
8062 } else if ((insn & (1 << 6)) == 0) {
8065 addr = tcg_temp_new_i32();
8066 tcg_gen_movi_i32(addr, s->pc);
8068 addr = load_reg(s, rn);
8070 tmp = load_reg(s, rm);
8071 tcg_gen_add_i32(addr, addr, tmp);
8072 if (insn & (1 << 4)) {
8074 tcg_gen_add_i32(addr, addr, tmp);
8075 tcg_temp_free_i32(tmp);
8076 tmp = gen_ld16u(addr, IS_USER(s));
8078 tcg_temp_free_i32(tmp);
8079 tmp = gen_ld8u(addr, IS_USER(s));
8081 tcg_temp_free_i32(addr);
8082 tcg_gen_shli_i32(tmp, tmp, 1);
8083 tcg_gen_addi_i32(tmp, tmp, s->pc);
8084 store_reg(s, 15, tmp);
8086 /* Load/store exclusive byte/halfword/doubleword. */
8088 op = (insn >> 4) & 0x3;
8092 addr = tcg_temp_local_new();
8093 load_reg_var(s, addr, rn);
8094 if (insn & (1 << 20)) {
8095 gen_load_exclusive(s, rs, rd, addr, op);
8097 gen_store_exclusive(s, rm, rs, rd, addr, op);
8099 tcg_temp_free(addr);
8102 /* Load/store multiple, RFE, SRS. */
8103 if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
8104 /* Not available in user mode. */
8107 if (insn & (1 << 20)) {
8109 addr = load_reg(s, rn);
8110 if ((insn & (1 << 24)) == 0)
8111 tcg_gen_addi_i32(addr, addr, -8);
8112 /* Load PC into tmp and CPSR into tmp2. */
8113 tmp = gen_ld32(addr, 0);
8114 tcg_gen_addi_i32(addr, addr, 4);
8115 tmp2 = gen_ld32(addr, 0);
8116 if (insn & (1 << 21)) {
8117 /* Base writeback. */
8118 if (insn & (1 << 24)) {
8119 tcg_gen_addi_i32(addr, addr, 4);
8121 tcg_gen_addi_i32(addr, addr, -4);
8123 store_reg(s, rn, addr);
8125 tcg_temp_free_i32(addr);
8127 gen_rfe(s, tmp, tmp2);
8131 addr = tcg_temp_new_i32();
8132 tmp = tcg_const_i32(op);
8133 gen_helper_get_r13_banked(addr, cpu_env, tmp);
8134 tcg_temp_free_i32(tmp);
8135 if ((insn & (1 << 24)) == 0) {
8136 tcg_gen_addi_i32(addr, addr, -8);
8138 tmp = load_reg(s, 14);
8139 gen_st32(tmp, addr, 0);
8140 tcg_gen_addi_i32(addr, addr, 4);
8141 tmp = tcg_temp_new_i32();
8142 gen_helper_cpsr_read(tmp);
8143 gen_st32(tmp, addr, 0);
8144 if (insn & (1 << 21)) {
8145 if ((insn & (1 << 24)) == 0) {
8146 tcg_gen_addi_i32(addr, addr, -4);
8148 tcg_gen_addi_i32(addr, addr, 4);
8150 tmp = tcg_const_i32(op);
8151 gen_helper_set_r13_banked(cpu_env, tmp, addr);
8152 tcg_temp_free_i32(tmp);
8154 tcg_temp_free_i32(addr);
8158 int i, loaded_base = 0;
8160 /* Load/store multiple. */
8161 addr = load_reg(s, rn);
8163 for (i = 0; i < 16; i++) {
8164 if (insn & (1 << i))
8167 if (insn & (1 << 24)) {
8168 tcg_gen_addi_i32(addr, addr, -offset);
8171 TCGV_UNUSED(loaded_var);
8172 for (i = 0; i < 16; i++) {
8173 if ((insn & (1 << i)) == 0)
8175 if (insn & (1 << 20)) {
8177 tmp = gen_ld32(addr, IS_USER(s));
8180 } else if (i == rn) {
8184 store_reg(s, i, tmp);
8188 tmp = load_reg(s, i);
8189 gen_st32(tmp, addr, IS_USER(s));
8191 tcg_gen_addi_i32(addr, addr, 4);
8194 store_reg(s, rn, loaded_var);
8196 if (insn & (1 << 21)) {
8197 /* Base register writeback. */
8198 if (insn & (1 << 24)) {
8199 tcg_gen_addi_i32(addr, addr, -offset);
8201 /* Fault if writeback register is in register list. */
8202 if (insn & (1 << rn))
8204 store_reg(s, rn, addr);
8206 tcg_temp_free_i32(addr);
8213 op = (insn >> 21) & 0xf;
8215 /* Halfword pack. */
8216 tmp = load_reg(s, rn);
8217 tmp2 = load_reg(s, rm);
8218 shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
8219 if (insn & (1 << 5)) {
8223 tcg_gen_sari_i32(tmp2, tmp2, shift);
8224 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
8225 tcg_gen_ext16u_i32(tmp2, tmp2);
8229 tcg_gen_shli_i32(tmp2, tmp2, shift);
8230 tcg_gen_ext16u_i32(tmp, tmp);
8231 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
8233 tcg_gen_or_i32(tmp, tmp, tmp2);
8234 tcg_temp_free_i32(tmp2);
8235 store_reg(s, rd, tmp);
8237 /* Data processing register constant shift. */
8239 tmp = tcg_temp_new_i32();
8240 tcg_gen_movi_i32(tmp, 0);
8242 tmp = load_reg(s, rn);
8244 tmp2 = load_reg(s, rm);
8246 shiftop = (insn >> 4) & 3;
8247 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
8248 conds = (insn & (1 << 20)) != 0;
8249 logic_cc = (conds && thumb2_logic_op(op));
8250 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
8251 if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
8253 tcg_temp_free_i32(tmp2);
8255 store_reg(s, rd, tmp);
8257 tcg_temp_free_i32(tmp);
8261 case 13: /* Misc data processing. */
8262 op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
8263 if (op < 4 && (insn & 0xf000) != 0xf000)
8266 case 0: /* Register controlled shift. */
8267 tmp = load_reg(s, rn);
8268 tmp2 = load_reg(s, rm);
8269 if ((insn & 0x70) != 0)
8271 op = (insn >> 21) & 3;
8272 logic_cc = (insn & (1 << 20)) != 0;
8273 gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
8276 store_reg_bx(env, s, rd, tmp);
8278 case 1: /* Sign/zero extend. */
8279 tmp = load_reg(s, rm);
8280 shift = (insn >> 4) & 3;
8281 /* ??? In many cases it's not necessary to do a
8282 rotate, a shift is sufficient. */
8284 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
8285 op = (insn >> 20) & 7;
8287 case 0: gen_sxth(tmp); break;
8288 case 1: gen_uxth(tmp); break;
8289 case 2: gen_sxtb16(tmp); break;
8290 case 3: gen_uxtb16(tmp); break;
8291 case 4: gen_sxtb(tmp); break;
8292 case 5: gen_uxtb(tmp); break;
8293 default: goto illegal_op;
8296 tmp2 = load_reg(s, rn);
8297 if ((op >> 1) == 1) {
8298 gen_add16(tmp, tmp2);
8300 tcg_gen_add_i32(tmp, tmp, tmp2);
8301 tcg_temp_free_i32(tmp2);
8304 store_reg(s, rd, tmp);
8306 case 2: /* SIMD add/subtract. */
8307 op = (insn >> 20) & 7;
8308 shift = (insn >> 4) & 7;
8309 if ((op & 3) == 3 || (shift & 3) == 3)
8311 tmp = load_reg(s, rn);
8312 tmp2 = load_reg(s, rm);
8313 gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
8314 tcg_temp_free_i32(tmp2);
8315 store_reg(s, rd, tmp);
8317 case 3: /* Other data processing. */
8318 op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
8320 /* Saturating add/subtract. */
8321 tmp = load_reg(s, rn);
8322 tmp2 = load_reg(s, rm);
8324 gen_helper_double_saturate(tmp, tmp);
8326 gen_helper_sub_saturate(tmp, tmp2, tmp);
8328 gen_helper_add_saturate(tmp, tmp, tmp2);
8329 tcg_temp_free_i32(tmp2);
8331 tmp = load_reg(s, rn);
8333 case 0x0a: /* rbit */
8334 gen_helper_rbit(tmp, tmp);
8336 case 0x08: /* rev */
8337 tcg_gen_bswap32_i32(tmp, tmp);
8339 case 0x09: /* rev16 */
8342 case 0x0b: /* revsh */
8345 case 0x10: /* sel */
8346 tmp2 = load_reg(s, rm);
8347 tmp3 = tcg_temp_new_i32();
8348 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
8349 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
8350 tcg_temp_free_i32(tmp3);
8351 tcg_temp_free_i32(tmp2);
8353 case 0x18: /* clz */
8354 gen_helper_clz(tmp, tmp);
8360 store_reg(s, rd, tmp);
8362 case 4: case 5: /* 32-bit multiply. Sum of absolute differences. */
8363 op = (insn >> 4) & 0xf;
8364 tmp = load_reg(s, rn);
8365 tmp2 = load_reg(s, rm);
8366 switch ((insn >> 20) & 7) {
8367 case 0: /* 32 x 32 -> 32 */
8368 tcg_gen_mul_i32(tmp, tmp, tmp2);
8369 tcg_temp_free_i32(tmp2);
8371 tmp2 = load_reg(s, rs);
8373 tcg_gen_sub_i32(tmp, tmp2, tmp);
8375 tcg_gen_add_i32(tmp, tmp, tmp2);
8376 tcg_temp_free_i32(tmp2);
8379 case 1: /* 16 x 16 -> 32 */
8380 gen_mulxy(tmp, tmp2, op & 2, op & 1);
8381 tcg_temp_free_i32(tmp2);
8383 tmp2 = load_reg(s, rs);
8384 gen_helper_add_setq(tmp, tmp, tmp2);
8385 tcg_temp_free_i32(tmp2);
8388 case 2: /* Dual multiply add. */
8389 case 4: /* Dual multiply subtract. */
8391 gen_swap_half(tmp2);
8392 gen_smul_dual(tmp, tmp2);
8393 if (insn & (1 << 22)) {
8394 /* This subtraction cannot overflow. */
8395 tcg_gen_sub_i32(tmp, tmp, tmp2);
8397 /* This addition cannot overflow 32 bits;
8398 * however it may overflow considered as a signed
8399 * operation, in which case we must set the Q flag.
8401 gen_helper_add_setq(tmp, tmp, tmp2);
8403 tcg_temp_free_i32(tmp2);
8406 tmp2 = load_reg(s, rs);
8407 gen_helper_add_setq(tmp, tmp, tmp2);
8408 tcg_temp_free_i32(tmp2);
8411 case 3: /* 32 * 16 -> 32msb */
8413 tcg_gen_sari_i32(tmp2, tmp2, 16);
8416 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8417 tcg_gen_shri_i64(tmp64, tmp64, 16);
8418 tmp = tcg_temp_new_i32();
8419 tcg_gen_trunc_i64_i32(tmp, tmp64);
8420 tcg_temp_free_i64(tmp64);
8423 tmp2 = load_reg(s, rs);
8424 gen_helper_add_setq(tmp, tmp, tmp2);
8425 tcg_temp_free_i32(tmp2);
8428 case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
8429 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8431 tmp = load_reg(s, rs);
8432 if (insn & (1 << 20)) {
8433 tmp64 = gen_addq_msw(tmp64, tmp);
8435 tmp64 = gen_subq_msw(tmp64, tmp);
8438 if (insn & (1 << 4)) {
8439 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
8441 tcg_gen_shri_i64(tmp64, tmp64, 32);
8442 tmp = tcg_temp_new_i32();
8443 tcg_gen_trunc_i64_i32(tmp, tmp64);
8444 tcg_temp_free_i64(tmp64);
8446 case 7: /* Unsigned sum of absolute differences. */
8447 gen_helper_usad8(tmp, tmp, tmp2);
8448 tcg_temp_free_i32(tmp2);
8450 tmp2 = load_reg(s, rs);
8451 tcg_gen_add_i32(tmp, tmp, tmp2);
8452 tcg_temp_free_i32(tmp2);
8456 store_reg(s, rd, tmp);
8458 case 6: case 7: /* 64-bit multiply, Divide. */
8459 op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
8460 tmp = load_reg(s, rn);
8461 tmp2 = load_reg(s, rm);
8462 if ((op & 0x50) == 0x10) {
8464 if (!arm_feature(env, ARM_FEATURE_DIV))
8467 gen_helper_udiv(tmp, tmp, tmp2);
8469 gen_helper_sdiv(tmp, tmp, tmp2);
8470 tcg_temp_free_i32(tmp2);
8471 store_reg(s, rd, tmp);
8472 } else if ((op & 0xe) == 0xc) {
8473 /* Dual multiply accumulate long. */
8475 gen_swap_half(tmp2);
8476 gen_smul_dual(tmp, tmp2);
8478 tcg_gen_sub_i32(tmp, tmp, tmp2);
8480 tcg_gen_add_i32(tmp, tmp, tmp2);
8482 tcg_temp_free_i32(tmp2);
8484 tmp64 = tcg_temp_new_i64();
8485 tcg_gen_ext_i32_i64(tmp64, tmp);
8486 tcg_temp_free_i32(tmp);
8487 gen_addq(s, tmp64, rs, rd);
8488 gen_storeq_reg(s, rs, rd, tmp64);
8489 tcg_temp_free_i64(tmp64);
8492 /* Unsigned 64-bit multiply */
8493 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
8497 gen_mulxy(tmp, tmp2, op & 2, op & 1);
8498 tcg_temp_free_i32(tmp2);
8499 tmp64 = tcg_temp_new_i64();
8500 tcg_gen_ext_i32_i64(tmp64, tmp);
8501 tcg_temp_free_i32(tmp);
8503 /* Signed 64-bit multiply */
8504 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8509 gen_addq_lo(s, tmp64, rs);
8510 gen_addq_lo(s, tmp64, rd);
8511 } else if (op & 0x40) {
8512 /* 64-bit accumulate. */
8513 gen_addq(s, tmp64, rs, rd);
8515 gen_storeq_reg(s, rs, rd, tmp64);
8516 tcg_temp_free_i64(tmp64);
8521 case 6: case 7: case 14: case 15:
8523 if (((insn >> 24) & 3) == 3) {
8524 /* Translate into the equivalent ARM encoding. */
8525 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
8526 if (disas_neon_data_insn(env, s, insn))
8529 if (insn & (1 << 28))
8531 if (disas_coproc_insn (env, s, insn))
8535 case 8: case 9: case 10: case 11:
8536 if (insn & (1 << 15)) {
8537 /* Branches, misc control. */
8538 if (insn & 0x5000) {
8539 /* Unconditional branch. */
8540 /* signextend(hw1[10:0]) -> offset[:12]. */
8541 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
8542 /* hw1[10:0] -> offset[11:1]. */
8543 offset |= (insn & 0x7ff) << 1;
8544 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
8545 offset[24:22] already have the same value because of the
8546 sign extension above. */
8547 offset ^= ((~insn) & (1 << 13)) << 10;
8548 offset ^= ((~insn) & (1 << 11)) << 11;
8550 if (insn & (1 << 14)) {
8551 /* Branch and link. */
8552 tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
8556 if (insn & (1 << 12)) {
8561 offset &= ~(uint32_t)2;
8562 /* thumb2 bx, no need to check */
8563 gen_bx_im(s, offset);
8565 } else if (((insn >> 23) & 7) == 7) {
8567 if (insn & (1 << 13))
8570 if (insn & (1 << 26)) {
8571 /* Secure monitor call (v6Z) */
8572 goto illegal_op; /* not implemented. */
8574 op = (insn >> 20) & 7;
8576 case 0: /* msr cpsr. */
8578 tmp = load_reg(s, rn);
8579 addr = tcg_const_i32(insn & 0xff);
8580 gen_helper_v7m_msr(cpu_env, addr, tmp);
8581 tcg_temp_free_i32(addr);
8582 tcg_temp_free_i32(tmp);
8587 case 1: /* msr spsr. */
8590 tmp = load_reg(s, rn);
8592 msr_mask(env, s, (insn >> 8) & 0xf, op == 1),
8596 case 2: /* cps, nop-hint. */
8597 if (((insn >> 8) & 7) == 0) {
8598 gen_nop_hint(s, insn & 0xff);
8600 /* Implemented as NOP in user mode. */
8605 if (insn & (1 << 10)) {
8606 if (insn & (1 << 7))
8608 if (insn & (1 << 6))
8610 if (insn & (1 << 5))
8612 if (insn & (1 << 9))
8613 imm = CPSR_A | CPSR_I | CPSR_F;
8615 if (insn & (1 << 8)) {
8617 imm |= (insn & 0x1f);
8620 gen_set_psr_im(s, offset, 0, imm);
8623 case 3: /* Special control operations. */
8625 op = (insn >> 4) & 0xf;
8633 /* These execute as NOPs. */
8640 /* Trivial implementation equivalent to bx. */
8641 tmp = load_reg(s, rn);
8644 case 5: /* Exception return. */
8648 if (rn != 14 || rd != 15) {
8651 tmp = load_reg(s, rn);
8652 tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
8653 gen_exception_return(s, tmp);
8655 case 6: /* mrs cpsr. */
8656 tmp = tcg_temp_new_i32();
8658 addr = tcg_const_i32(insn & 0xff);
8659 gen_helper_v7m_mrs(tmp, cpu_env, addr);
8660 tcg_temp_free_i32(addr);
8662 gen_helper_cpsr_read(tmp);
8664 store_reg(s, rd, tmp);
8666 case 7: /* mrs spsr. */
8667 /* Not accessible in user mode. */
8668 if (IS_USER(s) || IS_M(env))
8670 tmp = load_cpu_field(spsr);
8671 store_reg(s, rd, tmp);
8676 /* Conditional branch. */
8677 op = (insn >> 22) & 0xf;
8678 /* Generate a conditional jump to next instruction. */
8679 s->condlabel = gen_new_label();
8680 gen_test_cc(op ^ 1, s->condlabel);
8683 /* offset[11:1] = insn[10:0] */
8684 offset = (insn & 0x7ff) << 1;
8685 /* offset[17:12] = insn[21:16]. */
8686 offset |= (insn & 0x003f0000) >> 4;
8687 /* offset[31:20] = insn[26]. */
8688 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
8689 /* offset[18] = insn[13]. */
8690 offset |= (insn & (1 << 13)) << 5;
8691 /* offset[19] = insn[11]. */
8692 offset |= (insn & (1 << 11)) << 8;
8694 /* jump to the offset */
8695 gen_jmp(s, s->pc + offset);
8698 /* Data processing immediate. */
8699 if (insn & (1 << 25)) {
8700 if (insn & (1 << 24)) {
8701 if (insn & (1 << 20))
8703 /* Bitfield/Saturate. */
8704 op = (insn >> 21) & 7;
8706 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
8708 tmp = tcg_temp_new_i32();
8709 tcg_gen_movi_i32(tmp, 0);
8711 tmp = load_reg(s, rn);
8714 case 2: /* Signed bitfield extract. */
8716 if (shift + imm > 32)
8719 gen_sbfx(tmp, shift, imm);
8721 case 6: /* Unsigned bitfield extract. */
8723 if (shift + imm > 32)
8726 gen_ubfx(tmp, shift, (1u << imm) - 1);
8728 case 3: /* Bitfield insert/clear. */
8731 imm = imm + 1 - shift;
8733 tmp2 = load_reg(s, rd);
8734 gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
8735 tcg_temp_free_i32(tmp2);
8740 default: /* Saturate. */
8743 tcg_gen_sari_i32(tmp, tmp, shift);
8745 tcg_gen_shli_i32(tmp, tmp, shift);
8747 tmp2 = tcg_const_i32(imm);
8750 if ((op & 1) && shift == 0)
8751 gen_helper_usat16(tmp, tmp, tmp2);
8753 gen_helper_usat(tmp, tmp, tmp2);
8756 if ((op & 1) && shift == 0)
8757 gen_helper_ssat16(tmp, tmp, tmp2);
8759 gen_helper_ssat(tmp, tmp, tmp2);
8761 tcg_temp_free_i32(tmp2);
8764 store_reg(s, rd, tmp);
8766 imm = ((insn & 0x04000000) >> 15)
8767 | ((insn & 0x7000) >> 4) | (insn & 0xff);
8768 if (insn & (1 << 22)) {
8769 /* 16-bit immediate. */
8770 imm |= (insn >> 4) & 0xf000;
8771 if (insn & (1 << 23)) {
8773 tmp = load_reg(s, rd);
8774 tcg_gen_ext16u_i32(tmp, tmp);
8775 tcg_gen_ori_i32(tmp, tmp, imm << 16);
8778 tmp = tcg_temp_new_i32();
8779 tcg_gen_movi_i32(tmp, imm);
8782 /* Add/sub 12-bit immediate. */
8784 offset = s->pc & ~(uint32_t)3;
8785 if (insn & (1 << 23))
8789 tmp = tcg_temp_new_i32();
8790 tcg_gen_movi_i32(tmp, offset);
8792 tmp = load_reg(s, rn);
8793 if (insn & (1 << 23))
8794 tcg_gen_subi_i32(tmp, tmp, imm);
8796 tcg_gen_addi_i32(tmp, tmp, imm);
8799 store_reg(s, rd, tmp);
8802 int shifter_out = 0;
8803 /* modified 12-bit immediate. */
8804 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
8805 imm = (insn & 0xff);
8808 /* Nothing to do. */
8810 case 1: /* 00XY00XY */
8813 case 2: /* XY00XY00 */
8817 case 3: /* XYXYXYXY */
8821 default: /* Rotated constant. */
8822 shift = (shift << 1) | (imm >> 7);
8824 imm = imm << (32 - shift);
8828 tmp2 = tcg_temp_new_i32();
8829 tcg_gen_movi_i32(tmp2, imm);
8830 rn = (insn >> 16) & 0xf;
8832 tmp = tcg_temp_new_i32();
8833 tcg_gen_movi_i32(tmp, 0);
8835 tmp = load_reg(s, rn);
8837 op = (insn >> 21) & 0xf;
8838 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
8839 shifter_out, tmp, tmp2))
8841 tcg_temp_free_i32(tmp2);
8842 rd = (insn >> 8) & 0xf;
8844 store_reg(s, rd, tmp);
8846 tcg_temp_free_i32(tmp);
8851 case 12: /* Load/store single data item. */
8856 if ((insn & 0x01100000) == 0x01000000) {
8857 if (disas_neon_ls_insn(env, s, insn))
8861 op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
8863 if (!(insn & (1 << 20))) {
8867 /* Byte or halfword load space with dest == r15 : memory hints.
8868 * Catch them early so we don't emit pointless addressing code.
8869 * This space is a mix of:
8870 * PLD/PLDW/PLI, which we implement as NOPs (note that unlike
8871 * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
8873 * unallocated hints, which must be treated as NOPs
8874 * UNPREDICTABLE space, which we NOP or UNDEF depending on
8875 * which is easiest for the decoding logic
8876 * Some space which must UNDEF
8878 int op1 = (insn >> 23) & 3;
8879 int op2 = (insn >> 6) & 0x3f;
8884 /* UNPREDICTABLE or unallocated hint */
8888 return 0; /* PLD* or unallocated hint */
8890 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
8891 return 0; /* PLD* or unallocated hint */
8893 /* UNDEF space, or an UNPREDICTABLE */
8899 addr = tcg_temp_new_i32();
8901 /* s->pc has already been incremented by 4. */
8902 imm = s->pc & 0xfffffffc;
8903 if (insn & (1 << 23))
8904 imm += insn & 0xfff;
8906 imm -= insn & 0xfff;
8907 tcg_gen_movi_i32(addr, imm);
8909 addr = load_reg(s, rn);
8910 if (insn & (1 << 23)) {
8911 /* Positive offset. */
8913 tcg_gen_addi_i32(addr, addr, imm);
8916 switch ((insn >> 8) & 0xf) {
8917 case 0x0: /* Shifted Register. */
8918 shift = (insn >> 4) & 0xf;
8920 tcg_temp_free_i32(addr);
8923 tmp = load_reg(s, rm);
8925 tcg_gen_shli_i32(tmp, tmp, shift);
8926 tcg_gen_add_i32(addr, addr, tmp);
8927 tcg_temp_free_i32(tmp);
8929 case 0xc: /* Negative offset. */
8930 tcg_gen_addi_i32(addr, addr, -imm);
8932 case 0xe: /* User privilege. */
8933 tcg_gen_addi_i32(addr, addr, imm);
8936 case 0x9: /* Post-decrement. */
8939 case 0xb: /* Post-increment. */
8943 case 0xd: /* Pre-decrement. */
8946 case 0xf: /* Pre-increment. */
8947 tcg_gen_addi_i32(addr, addr, imm);
8951 tcg_temp_free_i32(addr);
8956 if (insn & (1 << 20)) {
8959 case 0: tmp = gen_ld8u(addr, user); break;
8960 case 4: tmp = gen_ld8s(addr, user); break;
8961 case 1: tmp = gen_ld16u(addr, user); break;
8962 case 5: tmp = gen_ld16s(addr, user); break;
8963 case 2: tmp = gen_ld32(addr, user); break;
8965 tcg_temp_free_i32(addr);
8971 store_reg(s, rs, tmp);
8975 tmp = load_reg(s, rs);
8977 case 0: gen_st8(tmp, addr, user); break;
8978 case 1: gen_st16(tmp, addr, user); break;
8979 case 2: gen_st32(tmp, addr, user); break;
8981 tcg_temp_free_i32(addr);
8986 tcg_gen_addi_i32(addr, addr, imm);
8988 store_reg(s, rn, addr);
8990 tcg_temp_free_i32(addr);
9002 static void disas_thumb_insn(CPUState *env, DisasContext *s)
9004 uint32_t val, insn, op, rm, rn, rd, shift, cond;
9011 if (s->condexec_mask) {
9012 cond = s->condexec_cond;
9013 if (cond != 0x0e) { /* Skip conditional when condition is AL. */
9014 s->condlabel = gen_new_label();
9015 gen_test_cc(cond ^ 1, s->condlabel);
9020 insn = lduw_code(s->pc);
9023 switch (insn >> 12) {
9027 op = (insn >> 11) & 3;
9030 rn = (insn >> 3) & 7;
9031 tmp = load_reg(s, rn);
9032 if (insn & (1 << 10)) {
9034 tmp2 = tcg_temp_new_i32();
9035 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
9038 rm = (insn >> 6) & 7;
9039 tmp2 = load_reg(s, rm);
9041 if (insn & (1 << 9)) {
9042 if (s->condexec_mask)
9043 tcg_gen_sub_i32(tmp, tmp, tmp2);
9045 gen_helper_sub_cc(tmp, tmp, tmp2);
9047 if (s->condexec_mask)
9048 tcg_gen_add_i32(tmp, tmp, tmp2);
9050 gen_helper_add_cc(tmp, tmp, tmp2);
9052 tcg_temp_free_i32(tmp2);
9053 store_reg(s, rd, tmp);
9055 /* shift immediate */
9056 rm = (insn >> 3) & 7;
9057 shift = (insn >> 6) & 0x1f;
9058 tmp = load_reg(s, rm);
9059 gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
9060 if (!s->condexec_mask)
9062 store_reg(s, rd, tmp);
9066 /* arithmetic large immediate */
9067 op = (insn >> 11) & 3;
9068 rd = (insn >> 8) & 0x7;
9069 if (op == 0) { /* mov */
9070 tmp = tcg_temp_new_i32();
9071 tcg_gen_movi_i32(tmp, insn & 0xff);
9072 if (!s->condexec_mask)
9074 store_reg(s, rd, tmp);
9076 tmp = load_reg(s, rd);
9077 tmp2 = tcg_temp_new_i32();
9078 tcg_gen_movi_i32(tmp2, insn & 0xff);
9081 gen_helper_sub_cc(tmp, tmp, tmp2);
9082 tcg_temp_free_i32(tmp);
9083 tcg_temp_free_i32(tmp2);
9086 if (s->condexec_mask)
9087 tcg_gen_add_i32(tmp, tmp, tmp2);
9089 gen_helper_add_cc(tmp, tmp, tmp2);
9090 tcg_temp_free_i32(tmp2);
9091 store_reg(s, rd, tmp);
9094 if (s->condexec_mask)
9095 tcg_gen_sub_i32(tmp, tmp, tmp2);
9097 gen_helper_sub_cc(tmp, tmp, tmp2);
9098 tcg_temp_free_i32(tmp2);
9099 store_reg(s, rd, tmp);
9105 if (insn & (1 << 11)) {
9106 rd = (insn >> 8) & 7;
9107 /* load pc-relative. Bit 1 of PC is ignored. */
9108 val = s->pc + 2 + ((insn & 0xff) * 4);
9109 val &= ~(uint32_t)2;
9110 addr = tcg_temp_new_i32();
9111 tcg_gen_movi_i32(addr, val);
9112 tmp = gen_ld32(addr, IS_USER(s));
9113 tcg_temp_free_i32(addr);
9114 store_reg(s, rd, tmp);
9117 if (insn & (1 << 10)) {
9118 /* data processing extended or blx */
9119 rd = (insn & 7) | ((insn >> 4) & 8);
9120 rm = (insn >> 3) & 0xf;
9121 op = (insn >> 8) & 3;
9124 tmp = load_reg(s, rd);
9125 tmp2 = load_reg(s, rm);
9126 tcg_gen_add_i32(tmp, tmp, tmp2);
9127 tcg_temp_free_i32(tmp2);
9128 store_reg(s, rd, tmp);
9131 tmp = load_reg(s, rd);
9132 tmp2 = load_reg(s, rm);
9133 gen_helper_sub_cc(tmp, tmp, tmp2);
9134 tcg_temp_free_i32(tmp2);
9135 tcg_temp_free_i32(tmp);
9137 case 2: /* mov/cpy */
9138 tmp = load_reg(s, rm);
9139 store_reg(s, rd, tmp);
9141 case 3:/* branch [and link] exchange thumb register */
9142 tmp = load_reg(s, rm);
9143 if (insn & (1 << 7)) {
9145 val = (uint32_t)s->pc | 1;
9146 tmp2 = tcg_temp_new_i32();
9147 tcg_gen_movi_i32(tmp2, val);
9148 store_reg(s, 14, tmp2);
9150 /* already thumb, no need to check */
9157 /* data processing register */
9159 rm = (insn >> 3) & 7;
9160 op = (insn >> 6) & 0xf;
9161 if (op == 2 || op == 3 || op == 4 || op == 7) {
9162 /* the shift/rotate ops want the operands backwards */
9171 if (op == 9) { /* neg */
9172 tmp = tcg_temp_new_i32();
9173 tcg_gen_movi_i32(tmp, 0);
9174 } else if (op != 0xf) { /* mvn doesn't read its first operand */
9175 tmp = load_reg(s, rd);
9180 tmp2 = load_reg(s, rm);
9183 tcg_gen_and_i32(tmp, tmp, tmp2);
9184 if (!s->condexec_mask)
9188 tcg_gen_xor_i32(tmp, tmp, tmp2);
9189 if (!s->condexec_mask)
9193 if (s->condexec_mask) {
9194 gen_helper_shl(tmp2, tmp2, tmp);
9196 gen_helper_shl_cc(tmp2, tmp2, tmp);
9201 if (s->condexec_mask) {
9202 gen_helper_shr(tmp2, tmp2, tmp);
9204 gen_helper_shr_cc(tmp2, tmp2, tmp);
9209 if (s->condexec_mask) {
9210 gen_helper_sar(tmp2, tmp2, tmp);
9212 gen_helper_sar_cc(tmp2, tmp2, tmp);
9217 if (s->condexec_mask)
9220 gen_helper_adc_cc(tmp, tmp, tmp2);
9223 if (s->condexec_mask)
9224 gen_sub_carry(tmp, tmp, tmp2);
9226 gen_helper_sbc_cc(tmp, tmp, tmp2);
9229 if (s->condexec_mask) {
9230 tcg_gen_andi_i32(tmp, tmp, 0x1f);
9231 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
9233 gen_helper_ror_cc(tmp2, tmp2, tmp);
9238 tcg_gen_and_i32(tmp, tmp, tmp2);
9243 if (s->condexec_mask)
9244 tcg_gen_neg_i32(tmp, tmp2);
9246 gen_helper_sub_cc(tmp, tmp, tmp2);
9249 gen_helper_sub_cc(tmp, tmp, tmp2);
9253 gen_helper_add_cc(tmp, tmp, tmp2);
9257 tcg_gen_or_i32(tmp, tmp, tmp2);
9258 if (!s->condexec_mask)
9262 tcg_gen_mul_i32(tmp, tmp, tmp2);
9263 if (!s->condexec_mask)
9267 tcg_gen_andc_i32(tmp, tmp, tmp2);
9268 if (!s->condexec_mask)
9272 tcg_gen_not_i32(tmp2, tmp2);
9273 if (!s->condexec_mask)
9281 store_reg(s, rm, tmp2);
9283 tcg_temp_free_i32(tmp);
9285 store_reg(s, rd, tmp);
9286 tcg_temp_free_i32(tmp2);
9289 tcg_temp_free_i32(tmp);
9290 tcg_temp_free_i32(tmp2);
9295 /* load/store register offset. */
9297 rn = (insn >> 3) & 7;
9298 rm = (insn >> 6) & 7;
9299 op = (insn >> 9) & 7;
9300 addr = load_reg(s, rn);
9301 tmp = load_reg(s, rm);
9302 tcg_gen_add_i32(addr, addr, tmp);
9303 tcg_temp_free_i32(tmp);
9305 if (op < 3) /* store */
9306 tmp = load_reg(s, rd);
9310 gen_st32(tmp, addr, IS_USER(s));
9313 gen_st16(tmp, addr, IS_USER(s));
9316 gen_st8(tmp, addr, IS_USER(s));
9319 tmp = gen_ld8s(addr, IS_USER(s));
9322 tmp = gen_ld32(addr, IS_USER(s));
9325 tmp = gen_ld16u(addr, IS_USER(s));
9328 tmp = gen_ld8u(addr, IS_USER(s));
9331 tmp = gen_ld16s(addr, IS_USER(s));
9334 if (op >= 3) /* load */
9335 store_reg(s, rd, tmp);
9336 tcg_temp_free_i32(addr);
9340 /* load/store word immediate offset */
9342 rn = (insn >> 3) & 7;
9343 addr = load_reg(s, rn);
9344 val = (insn >> 4) & 0x7c;
9345 tcg_gen_addi_i32(addr, addr, val);
9347 if (insn & (1 << 11)) {
9349 tmp = gen_ld32(addr, IS_USER(s));
9350 store_reg(s, rd, tmp);
9353 tmp = load_reg(s, rd);
9354 gen_st32(tmp, addr, IS_USER(s));
9356 tcg_temp_free_i32(addr);
9360 /* load/store byte immediate offset */
9362 rn = (insn >> 3) & 7;
9363 addr = load_reg(s, rn);
9364 val = (insn >> 6) & 0x1f;
9365 tcg_gen_addi_i32(addr, addr, val);
9367 if (insn & (1 << 11)) {
9369 tmp = gen_ld8u(addr, IS_USER(s));
9370 store_reg(s, rd, tmp);
9373 tmp = load_reg(s, rd);
9374 gen_st8(tmp, addr, IS_USER(s));
9376 tcg_temp_free_i32(addr);
9380 /* load/store halfword immediate offset */
9382 rn = (insn >> 3) & 7;
9383 addr = load_reg(s, rn);
9384 val = (insn >> 5) & 0x3e;
9385 tcg_gen_addi_i32(addr, addr, val);
9387 if (insn & (1 << 11)) {
9389 tmp = gen_ld16u(addr, IS_USER(s));
9390 store_reg(s, rd, tmp);
9393 tmp = load_reg(s, rd);
9394 gen_st16(tmp, addr, IS_USER(s));
9396 tcg_temp_free_i32(addr);
9400 /* load/store from stack */
9401 rd = (insn >> 8) & 7;
9402 addr = load_reg(s, 13);
9403 val = (insn & 0xff) * 4;
9404 tcg_gen_addi_i32(addr, addr, val);
9406 if (insn & (1 << 11)) {
9408 tmp = gen_ld32(addr, IS_USER(s));
9409 store_reg(s, rd, tmp);
9412 tmp = load_reg(s, rd);
9413 gen_st32(tmp, addr, IS_USER(s));
9415 tcg_temp_free_i32(addr);
9419 /* add to high reg */
9420 rd = (insn >> 8) & 7;
9421 if (insn & (1 << 11)) {
9423 tmp = load_reg(s, 13);
9425 /* PC. bit 1 is ignored. */
9426 tmp = tcg_temp_new_i32();
9427 tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
9429 val = (insn & 0xff) * 4;
9430 tcg_gen_addi_i32(tmp, tmp, val);
9431 store_reg(s, rd, tmp);
9436 op = (insn >> 8) & 0xf;
9439 /* adjust stack pointer */
9440 tmp = load_reg(s, 13);
9441 val = (insn & 0x7f) * 4;
9442 if (insn & (1 << 7))
9443 val = -(int32_t)val;
9444 tcg_gen_addi_i32(tmp, tmp, val);
9445 store_reg(s, 13, tmp);
9448 case 2: /* sign/zero extend. */
9451 rm = (insn >> 3) & 7;
9452 tmp = load_reg(s, rm);
9453 switch ((insn >> 6) & 3) {
9454 case 0: gen_sxth(tmp); break;
9455 case 1: gen_sxtb(tmp); break;
9456 case 2: gen_uxth(tmp); break;
9457 case 3: gen_uxtb(tmp); break;
9459 store_reg(s, rd, tmp);
9461 case 4: case 5: case 0xc: case 0xd:
9463 addr = load_reg(s, 13);
9464 if (insn & (1 << 8))
9468 for (i = 0; i < 8; i++) {
9469 if (insn & (1 << i))
9472 if ((insn & (1 << 11)) == 0) {
9473 tcg_gen_addi_i32(addr, addr, -offset);
9475 for (i = 0; i < 8; i++) {
9476 if (insn & (1 << i)) {
9477 if (insn & (1 << 11)) {
9479 tmp = gen_ld32(addr, IS_USER(s));
9480 store_reg(s, i, tmp);
9483 tmp = load_reg(s, i);
9484 gen_st32(tmp, addr, IS_USER(s));
9486 /* advance to the next address. */
9487 tcg_gen_addi_i32(addr, addr, 4);
9491 if (insn & (1 << 8)) {
9492 if (insn & (1 << 11)) {
9494 tmp = gen_ld32(addr, IS_USER(s));
9495 /* don't set the pc until the rest of the instruction
9499 tmp = load_reg(s, 14);
9500 gen_st32(tmp, addr, IS_USER(s));
9502 tcg_gen_addi_i32(addr, addr, 4);
9504 if ((insn & (1 << 11)) == 0) {
9505 tcg_gen_addi_i32(addr, addr, -offset);
9507 /* write back the new stack pointer */
9508 store_reg(s, 13, addr);
9509 /* set the new PC value */
9510 if ((insn & 0x0900) == 0x0900) {
9511 store_reg_from_load(env, s, 15, tmp);
9515 case 1: case 3: case 9: case 11: /* czb */
9517 tmp = load_reg(s, rm);
9518 s->condlabel = gen_new_label();
9520 if (insn & (1 << 11))
9521 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
9523 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
9524 tcg_temp_free_i32(tmp);
9525 offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
9526 val = (uint32_t)s->pc + 2;
9531 case 15: /* IT, nop-hint. */
9532 if ((insn & 0xf) == 0) {
9533 gen_nop_hint(s, (insn >> 4) & 0xf);
9537 s->condexec_cond = (insn >> 4) & 0xe;
9538 s->condexec_mask = insn & 0x1f;
9539 /* No actual code generated for this insn, just setup state. */
9542 case 0xe: /* bkpt */
9544 gen_exception_insn(s, 2, EXCP_BKPT);
9549 rn = (insn >> 3) & 0x7;
9551 tmp = load_reg(s, rn);
9552 switch ((insn >> 6) & 3) {
9553 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
9554 case 1: gen_rev16(tmp); break;
9555 case 3: gen_revsh(tmp); break;
9556 default: goto illegal_op;
9558 store_reg(s, rd, tmp);
9566 tmp = tcg_const_i32((insn & (1 << 4)) != 0);
9569 addr = tcg_const_i32(16);
9570 gen_helper_v7m_msr(cpu_env, addr, tmp);
9571 tcg_temp_free_i32(addr);
9575 addr = tcg_const_i32(17);
9576 gen_helper_v7m_msr(cpu_env, addr, tmp);
9577 tcg_temp_free_i32(addr);
9579 tcg_temp_free_i32(tmp);
9582 if (insn & (1 << 4))
9583 shift = CPSR_A | CPSR_I | CPSR_F;
9586 gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
9597 /* load/store multiple */
9599 TCGV_UNUSED(loaded_var);
9600 rn = (insn >> 8) & 0x7;
9601 addr = load_reg(s, rn);
9602 for (i = 0; i < 8; i++) {
9603 if (insn & (1 << i)) {
9604 if (insn & (1 << 11)) {
9606 tmp = gen_ld32(addr, IS_USER(s));
9610 store_reg(s, i, tmp);
9614 tmp = load_reg(s, i);
9615 gen_st32(tmp, addr, IS_USER(s));
9617 /* advance to the next address */
9618 tcg_gen_addi_i32(addr, addr, 4);
9621 if ((insn & (1 << rn)) == 0) {
9622 /* base reg not in list: base register writeback */
9623 store_reg(s, rn, addr);
9625 /* base reg in list: if load, complete it now */
9626 if (insn & (1 << 11)) {
9627 store_reg(s, rn, loaded_var);
9629 tcg_temp_free_i32(addr);
9634 /* conditional branch or swi */
9635 cond = (insn >> 8) & 0xf;
9641 gen_set_pc_im(s->pc);
9642 s->is_jmp = DISAS_SWI;
9645 /* generate a conditional jump to next instruction */
9646 s->condlabel = gen_new_label();
9647 gen_test_cc(cond ^ 1, s->condlabel);
9650 /* jump to the offset */
9651 val = (uint32_t)s->pc + 2;
9652 offset = ((int32_t)insn << 24) >> 24;
9658 if (insn & (1 << 11)) {
9659 if (disas_thumb2_insn(env, s, insn))
9663 /* unconditional branch */
9664 val = (uint32_t)s->pc;
9665 offset = ((int32_t)insn << 21) >> 21;
9666 val += (offset << 1) + 2;
9671 if (disas_thumb2_insn(env, s, insn))
9677 gen_exception_insn(s, 4, EXCP_UDEF);
9681 gen_exception_insn(s, 2, EXCP_UDEF);
9684 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
9685 basic block 'tb'. If search_pc is TRUE, also generate PC
9686 information for each intermediate instruction. */
9687 static inline void gen_intermediate_code_internal(CPUState *env,
9688 TranslationBlock *tb,
9691 DisasContext dc1, *dc = &dc1;
9693 uint16_t *gen_opc_end;
9695 target_ulong pc_start;
9696 uint32_t next_page_start;
9700 /* generate intermediate code */
9705 gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
9707 dc->is_jmp = DISAS_NEXT;
9709 dc->singlestep_enabled = env->singlestep_enabled;
9711 dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
9712 dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
9713 dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
9714 #if !defined(CONFIG_USER_ONLY)
9715 dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
9717 dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
9718 dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
9719 dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
9720 cpu_F0s = tcg_temp_new_i32();
9721 cpu_F1s = tcg_temp_new_i32();
9722 cpu_F0d = tcg_temp_new_i64();
9723 cpu_F1d = tcg_temp_new_i64();
9726 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
9727 cpu_M0 = tcg_temp_new_i64();
9728 next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
9731 max_insns = tb->cflags & CF_COUNT_MASK;
9733 max_insns = CF_COUNT_MASK;
9737 tcg_clear_temp_count();
9739 /* A note on handling of the condexec (IT) bits:
9741 * We want to avoid the overhead of having to write the updated condexec
9742 * bits back to the CPUState for every instruction in an IT block. So:
9743 * (1) if the condexec bits are not already zero then we write
9744 * zero back into the CPUState now. This avoids complications trying
9745 * to do it at the end of the block. (For example if we don't do this
9746 * it's hard to identify whether we can safely skip writing condexec
9747 * at the end of the TB, which we definitely want to do for the case
9748 * where a TB doesn't do anything with the IT state at all.)
9749 * (2) if we are going to leave the TB then we call gen_set_condexec()
9750 * which will write the correct value into CPUState if zero is wrong.
9751 * This is done both for leaving the TB at the end, and for leaving
9752 * it because of an exception we know will happen, which is done in
9753 * gen_exception_insn(). The latter is necessary because we need to
9754 * leave the TB with the PC/IT state just prior to execution of the
9755 * instruction which caused the exception.
9756 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9757 * then the CPUState will be wrong and we need to reset it.
9758 * This is handled in the same way as restoration of the
9759 * PC in these situations: we will be called again with search_pc=1
9760 * and generate a mapping of the condexec bits for each PC in
9761 * gen_opc_condexec_bits[]. restore_state_to_opc() then uses
9762 * this to restore the condexec bits.
9764 * Note that there are no instructions which can read the condexec
9765 * bits, and none which can write non-static values to them, so
9766 * we don't need to care about whether CPUState is correct in the
9770 /* Reset the conditional execution bits immediately. This avoids
9771 complications trying to do it at the end of the block. */
9772 if (dc->condexec_mask || dc->condexec_cond)
9774 TCGv tmp = tcg_temp_new_i32();
9775 tcg_gen_movi_i32(tmp, 0);
9776 store_cpu_field(tmp, condexec_bits);
9779 #ifdef CONFIG_USER_ONLY
9780 /* Intercept jump to the magic kernel page. */
9781 if (dc->pc >= 0xffff0000) {
9782 /* We always get here via a jump, so know we are not in a
9783 conditional execution block. */
9784 gen_exception(EXCP_KERNEL_TRAP);
9785 dc->is_jmp = DISAS_UPDATE;
9789 if (dc->pc >= 0xfffffff0 && IS_M(env)) {
9790 /* We always get here via a jump, so know we are not in a
9791 conditional execution block. */
9792 gen_exception(EXCP_EXCEPTION_EXIT);
9793 dc->is_jmp = DISAS_UPDATE;
9798 if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
9799 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
9800 if (bp->pc == dc->pc) {
9801 gen_exception_insn(dc, 0, EXCP_DEBUG);
9802 /* Advance PC so that clearing the breakpoint will
9803 invalidate this TB. */
9805 goto done_generating;
9811 j = gen_opc_ptr - gen_opc_buf;
9815 gen_opc_instr_start[lj++] = 0;
9817 gen_opc_pc[lj] = dc->pc;
9818 gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9819 gen_opc_instr_start[lj] = 1;
9820 gen_opc_icount[lj] = num_insns;
9823 if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
9826 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
9827 tcg_gen_debug_insn_start(dc->pc);
9831 disas_thumb_insn(env, dc);
9832 if (dc->condexec_mask) {
9833 dc->condexec_cond = (dc->condexec_cond & 0xe)
9834 | ((dc->condexec_mask >> 4) & 1);
9835 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9836 if (dc->condexec_mask == 0) {
9837 dc->condexec_cond = 0;
9841 disas_arm_insn(env, dc);
9844 if (dc->condjmp && !dc->is_jmp) {
9845 gen_set_label(dc->condlabel);
9849 if (tcg_check_temp_count()) {
9850 fprintf(stderr, "TCG temporary leak before %08x\n", dc->pc);
9853 /* Translation stops when a conditional branch is encountered.
9854 * Otherwise the subsequent code could get translated several times.
9855 * Also stop translation when a page boundary is reached. This
9856 * ensures prefetch aborts occur at the right place. */
9858 } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
9859 !env->singlestep_enabled &&
9861 dc->pc < next_page_start &&
9862 num_insns < max_insns);
9864 if (tb->cflags & CF_LAST_IO) {
9866 /* FIXME: This can theoretically happen with self-modifying
9868 cpu_abort(env, "IO on conditional branch instruction");
9873 /* At this stage dc->condjmp will only be set when the skipped
9874 instruction was a conditional branch or trap, and the PC has
9875 already been written. */
9876 if (unlikely(env->singlestep_enabled)) {
9877 /* Make sure the pc is updated, and raise a debug exception. */
9879 gen_set_condexec(dc);
9880 if (dc->is_jmp == DISAS_SWI) {
9881 gen_exception(EXCP_SWI);
9883 gen_exception(EXCP_DEBUG);
9885 gen_set_label(dc->condlabel);
9887 if (dc->condjmp || !dc->is_jmp) {
9888 gen_set_pc_im(dc->pc);
9891 gen_set_condexec(dc);
9892 if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
9893 gen_exception(EXCP_SWI);
9895 /* FIXME: Single stepping a WFI insn will not halt
9897 gen_exception(EXCP_DEBUG);
9900 /* While branches must always occur at the end of an IT block,
9901 there are a few other things that can cause us to terminate
9902 the TB in the middel of an IT block:
9903 - Exception generating instructions (bkpt, swi, undefined).
9905 - Hardware watchpoints.
9906 Hardware breakpoints have already been handled and skip this code.
9908 gen_set_condexec(dc);
9909 switch(dc->is_jmp) {
9911 gen_goto_tb(dc, 1, dc->pc);
9916 /* indicate that the hash table must be used to find the next TB */
9920 /* nothing more to generate */
9926 gen_exception(EXCP_SWI);
9930 gen_set_label(dc->condlabel);
9931 gen_set_condexec(dc);
9932 gen_goto_tb(dc, 1, dc->pc);
9938 gen_icount_end(tb, num_insns);
9939 *gen_opc_ptr = INDEX_op_end;
9942 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
9943 qemu_log("----------------\n");
9944 qemu_log("IN: %s\n", lookup_symbol(pc_start));
9945 log_target_disas(pc_start, dc->pc - pc_start, dc->thumb);
9950 j = gen_opc_ptr - gen_opc_buf;
9953 gen_opc_instr_start[lj++] = 0;
9955 tb->size = dc->pc - pc_start;
9956 tb->icount = num_insns;
9960 void gen_intermediate_code(CPUState *env, TranslationBlock *tb)
9962 gen_intermediate_code_internal(env, tb, 0);
9965 void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
9967 gen_intermediate_code_internal(env, tb, 1);
9970 static const char *cpu_mode_names[16] = {
9971 "usr", "fiq", "irq", "svc", "???", "???", "???", "abt",
9972 "???", "???", "???", "und", "???", "???", "???", "sys"
9975 void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
9985 /* ??? This assumes float64 and double have the same layout.
9986 Oh well, it's only debug dumps. */
9995 cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
9997 cpu_fprintf(f, "\n");
9999 cpu_fprintf(f, " ");
10001 psr = cpsr_read(env);
10002 cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n",
10004 psr & (1 << 31) ? 'N' : '-',
10005 psr & (1 << 30) ? 'Z' : '-',
10006 psr & (1 << 29) ? 'C' : '-',
10007 psr & (1 << 28) ? 'V' : '-',
10008 psr & CPSR_T ? 'T' : 'A',
10009 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
10012 for (i = 0; i < 16; i++) {
10013 d.d = env->vfp.regs[i];
10017 cpu_fprintf(f, "s%02d=%08x(%8g) s%02d=%08x(%8g) d%02d=%08x%08x(%8g)\n",
10018 i * 2, (int)s0.i, s0.s,
10019 i * 2 + 1, (int)s1.i, s1.s,
10020 i, (int)(uint32_t)d.l.upper, (int)(uint32_t)d.l.lower,
10023 cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
10027 void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos)
10029 env->regs[15] = gen_opc_pc[pc_pos];
10030 env->condexec_bits = gen_opc_condexec_bits[pc_pos];