1 /* simulator.c -- Interface for the AArch64 simulator.
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
5 Contributed by Red Hat.
7 This file is part of GDB.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include <sys/types.h>
31 #include "simulator.h"
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
45 #define HALT_UNALLOC \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
74 #define NYI_assert(HI, LO, EXPECTED) \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
82 /* Helper functions used by expandLogicalImmediate. */
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
95 return pickbits64 (val, N, N);
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
110 mask = 0xffffffffffffffffull;
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
136 /* Rotate to the left by simd_size - R. */
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
140 /* Replicate the value according to SIMD size. */
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
161 aarch64_init_LIT_table (void)
165 for (index = 0; index < LI_TABLE_SIZE; index++)
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
171 LITable [index] = expand_logical_immediate (imms, immr, N);
176 dexNotify (sim_cpu *cpu)
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
195 /* aarch64_notifyMethodExit (); */
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
204 /* secondary decode within top level groups */
207 dexPseudo (sim_cpu *cpu)
209 /* assert instr[28,27] = 00
211 We provide 2 pseudo instructions:
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
235 dispatch = INSTR (31, 15);
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
245 else if (dispatch == PSEUDO_NOTIFY)
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
261 ldur32 (sim_cpu *cpu, int32_t offset)
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
274 ldur64 (sim_cpu *cpu, int32_t offset)
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
287 ldurb32 (sim_cpu *cpu, int32_t offset)
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
300 ldursb32 (sim_cpu *cpu, int32_t offset)
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
313 ldursb64 (sim_cpu *cpu, int32_t offset)
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
326 ldurh32 (sim_cpu *cpu, int32_t offset)
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
339 ldursh32 (sim_cpu *cpu, int32_t offset)
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
352 ldursh64 (sim_cpu *cpu, int32_t offset)
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
365 ldursw (sim_cpu *cpu, int32_t offset)
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
381 stur32 (sim_cpu *cpu, int32_t offset)
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
392 /* 64 bit store 64 bit unscaled signed 9 bit */
394 stur64 (sim_cpu *cpu, int32_t offset)
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
405 /* 32 bit store byte unscaled signed 9 bit */
407 sturb (sim_cpu *cpu, int32_t offset)
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
418 /* 32 bit store short unscaled signed 9 bit */
420 sturh (sim_cpu *cpu, int32_t offset)
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
435 /* 32 bit pc-relative load */
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
439 unsigned rd = INSTR (4, 0);
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
447 /* 64 bit pc-relative load */
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
451 unsigned rd = INSTR (4, 0);
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
459 /* sign extended 32 bit pc-relative load */
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
463 unsigned rd = INSTR (4, 0);
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
471 /* float pc-relative load */
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
475 unsigned int rd = INSTR (4, 0);
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
483 /* double pc-relative load */
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
487 unsigned int st = INSTR (4, 0);
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
495 /* long double pc-relative load. */
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
547 /* Scalar Floating Point
549 FP load/store single register (4 addressing modes)
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
574 /* Load 8 bit with unsigned 12 bit offset. */
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
586 /* Load 16 bit scaled unsigned 12 bit. */
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
598 /* Load 32 bit scaled unsigned 12 bit. */
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
610 /* Load 64 bit scaled unsigned 12 bit. */
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
622 /* Load 128 bit scaled unsigned 12 bit. */
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
681 fldrd_wb (cpu, displacement, NoWriteBack);
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
715 fldrq_wb (cpu, displacement, NoWriteBack);
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
740 A separate method is provided for each possible addressing mode. */
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
764 if (rn == rt && wb != NoWriteBack)
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
823 if (rn == rt && wb != NoWriteBack)
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
883 if (rn == rt && wb != NoWriteBack)
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
931 if (rn == rt && wb != NoWriteBack)
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
954 ldrsb_wb (cpu, offset, NoWriteBack);
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
1000 if (rn == rt && wb != NoWriteBack)
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1061 if (rn == rt && wb != NoWriteBack)
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1125 if (rn == rt && wb != NoWriteBack)
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1153 /* rn may reference SP, rm and rt must reference ZR */
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1189 if (rn == rt && wb != NoWriteBack)
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1229 /* 32 bit store scaled unsigned 12 bit. */
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1251 if (rn == rt && wb != NoWriteBack)
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1286 /* 64 bit store scaled unsigned 12 bit. */
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1308 if (rn == rt && wb != NoWriteBack)
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1369 if (rn == rt && wb != NoWriteBack)
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1407 /* 32 bit store short scaled unsigned 12 bit. */
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1429 if (rn == rt && wb != NoWriteBack)
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1466 /* Prefetch unsigned 12 bit. */
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1481 /* TODO : implement prefetch of address. */
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1503 /* TODO : implement prefetch of address */
1506 /* 64 bit pc-relative prefetch. */
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1520 /* TODO : implement this */
1523 /* Load-store exclusive. */
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1576 dexLoadLiteral (sim_cpu *cpu)
1578 /* instr[29,27] == 011
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1614 /* 32 bit add immediate. */
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1626 /* 64 bit add immediate. */
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1650 if (result & (1 << 31))
1653 if (uresult != result)
1656 if (sresult != result)
1659 aarch64_set_CPSR (cpu, flags);
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1668 uint64_t result = value1 + value2;
1670 uint64_t signbit = 1ULL << 63;
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1687 aarch64_set_CPSR (cpu, flags);
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1693 uint32_t result = value1 - value2;
1695 uint32_t signbit = 1U << 31;
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1712 aarch64_set_CPSR (cpu, flags);
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1718 uint64_t result = value1 - value2;
1720 uint64_t signbit = 1ULL << 63;
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1737 aarch64_set_CPSR (cpu, flags);
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1750 if (result & (1 << 31))
1755 aarch64_set_CPSR (cpu, flags);
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1768 if (result & (1ULL << 63))
1773 aarch64_set_CPSR (cpu, flags);
1776 /* 32 bit add immediate set flags. */
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1790 /* 64 bit add immediate set flags. */
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1804 /* 32 bit sub immediate. */
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1816 /* 64 bit sub immediate. */
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1828 /* 32 bit sub immediate set flags. */
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1842 /* 64 bit sub immediate set flags. */
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1856 /* Data Processing Register. */
1858 /* First two helpers to perform the shift operations. */
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1867 return (value << count);
1869 return (value >> count);
1872 int32_t svalue = value;
1873 return (svalue >> count);
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1891 return (value << count);
1893 return (value >> count);
1896 int64_t svalue = value;
1897 return (svalue >> count);
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1912 N.B register args may not be SP. */
1914 /* 32 bit ADD shifted register. */
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1929 /* 64 bit ADD shifted register. */
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1944 /* 32 bit ADD shifted register setting flags. */
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1961 /* 64 bit ADD shifted register setting flags. */
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1978 /* 32 bit SUB shifted register. */
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1993 /* 64 bit SUB shifted register. */
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2008 /* 32 bit SUB shifted register setting flags. */
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2025 /* 64 bit SUB shifted register setting flags. */
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2090 /* 32 bit ADD extending register. */
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2119 /* 32 bit ADD extending register setting flags. */
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2152 /* 32 bit SUB extending register. */
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2181 /* 32 bit SUB extending register setting flags. */
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2231 NYI_assert (28, 24, 0x11);
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2319 /* Shift may not exceed 4. */
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2344 adc32 (sim_cpu *cpu)
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2357 /* 64 bit add with carry */
2359 adc64 (sim_cpu *cpu)
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2372 /* 32 bit add with carry setting flags. */
2374 adcs32 (sim_cpu *cpu)
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2389 /* 64 bit add with carry setting flags. */
2391 adcs64 (sim_cpu *cpu)
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2406 /* 32 bit sub with carry. */
2408 sbc32 (sim_cpu *cpu)
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2421 /* 64 bit sub with carry */
2423 sbc64 (sim_cpu *cpu)
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2436 /* 32 bit sub with carry setting flags */
2438 sbcs32 (sim_cpu *cpu)
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2454 /* 64 bit sub with carry setting flags */
2456 sbcs64 (sim_cpu *cpu)
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2484 uint32_t op2 = INSTR (15, 10);
2486 NYI_assert (28, 21, 0xD0);
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2513 For now we do it with a switch. */
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2549 instr[11] = compare reg (0) or const (1)
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2602 instr[15,10] = 000111
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2612 if (INSTR (20, 16) != vs)
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2623 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,16] = element size and index
2629 instr[15,10] = 00 0010 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635 unsigned imm5 = INSTR (20, 16);
2636 unsigned full = INSTR (30, 30);
2639 NYI_assert (29, 21, 0x070);
2640 NYI_assert (15, 10, 0x0B);
2642 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2647 index = (imm5 >> 1) & 0xF;
2649 else if (imm5 & 0x2)
2652 index = (imm5 >> 2) & 0x7;
2654 else if (full && (imm5 & 0x4))
2657 index = (imm5 >> 3) & 0x3;
2666 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2667 aarch64_get_vec_s8 (cpu, vs, index));
2669 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2670 aarch64_get_vec_s8 (cpu, vs, index));
2675 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2676 aarch64_get_vec_s16 (cpu, vs, index));
2678 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2679 aarch64_get_vec_s16 (cpu, vs, index));
2683 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2684 aarch64_get_vec_s32 (cpu, vs, index));
2693 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2696 instr[30] = word(0)/long(1)
2697 instr[29,21] = 00 1110 000
2698 instr[20,16] = element size and index
2699 instr[15,10] = 00 0011 11
2700 instr[9,5] = V source
2701 instr[4,0] = R dest */
2703 unsigned vs = INSTR (9, 5);
2704 unsigned rd = INSTR (4, 0);
2705 unsigned imm5 = INSTR (20, 16);
2706 unsigned full = INSTR (30, 30);
2709 NYI_assert (29, 21, 0x070);
2710 NYI_assert (15, 10, 0x0F);
2712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2719 index = (imm5 >> 1) & 0xF;
2721 else if (imm5 & 0x2)
2724 index = (imm5 >> 2) & 0x7;
2726 else if (imm5 & 0x4)
2729 index = (imm5 >> 3) & 0x3;
2734 else if (imm5 & 0x8)
2737 index = (imm5 >> 4) & 0x1;
2745 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2746 aarch64_get_vec_u8 (cpu, vs, index));
2750 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2751 aarch64_get_vec_u16 (cpu, vs, index));
2755 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2756 aarch64_get_vec_u32 (cpu, vs, index));
2760 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2761 aarch64_get_vec_u64 (cpu, vs, index));
2770 do_vec_INS (sim_cpu *cpu)
2772 /* instr[31,21] = 01001110000
2773 instr[20,16] = element size and index
2774 instr[15,10] = 000111
2775 instr[9,5] = W source
2776 instr[4,0] = V dest */
2779 unsigned rs = INSTR (9, 5);
2780 unsigned vd = INSTR (4, 0);
2782 NYI_assert (31, 21, 0x270);
2783 NYI_assert (15, 10, 0x07);
2785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 index = INSTR (20, 17);
2789 aarch64_set_vec_u8 (cpu, vd, index,
2790 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2792 else if (INSTR (17, 17))
2794 index = INSTR (20, 18);
2795 aarch64_set_vec_u16 (cpu, vd, index,
2796 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2798 else if (INSTR (18, 18))
2800 index = INSTR (20, 19);
2801 aarch64_set_vec_u32 (cpu, vd, index,
2802 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2804 else if (INSTR (19, 19))
2806 index = INSTR (20, 20);
2807 aarch64_set_vec_u64 (cpu, vd, index,
2808 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2815 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2818 instr[30] = half(0)/full(1)
2819 instr[29,21] = 00 1110 000
2820 instr[20,16] = element size and index
2821 instr[15,10] = 0000 01
2822 instr[9,5] = V source
2823 instr[4,0] = V dest. */
2825 unsigned full = INSTR (30, 30);
2826 unsigned vs = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2830 NYI_assert (29, 21, 0x070);
2831 NYI_assert (15, 10, 0x01);
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2836 index = INSTR (20, 17);
2838 for (i = 0; i < (full ? 16 : 8); i++)
2839 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2841 else if (INSTR (17, 17))
2843 index = INSTR (20, 18);
2845 for (i = 0; i < (full ? 8 : 4); i++)
2846 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2848 else if (INSTR (18, 18))
2850 index = INSTR (20, 19);
2852 for (i = 0; i < (full ? 4 : 2); i++)
2853 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2857 if (INSTR (19, 19) == 0)
2863 index = INSTR (20, 20);
2865 for (i = 0; i < 2; i++)
2866 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2871 do_vec_TBL (sim_cpu *cpu)
2874 instr[30] = half(0)/full(1)
2875 instr[29,21] = 00 1110 000
2878 instr[14,13] = vec length
2880 instr[9,5] = V start
2881 instr[4,0] = V dest */
2883 int full = INSTR (30, 30);
2884 int len = INSTR (14, 13) + 1;
2885 unsigned vm = INSTR (20, 16);
2886 unsigned vn = INSTR (9, 5);
2887 unsigned vd = INSTR (4, 0);
2890 NYI_assert (29, 21, 0x070);
2891 NYI_assert (12, 10, 0);
2893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2894 for (i = 0; i < (full ? 16 : 8); i++)
2896 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2900 val = aarch64_get_vec_u8 (cpu, vn, selector);
2901 else if (selector < 32)
2902 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2903 else if (selector < 48)
2904 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2905 else if (selector < 64)
2906 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2910 aarch64_set_vec_u8 (cpu, vd, i, val);
2915 do_vec_TRN (sim_cpu *cpu)
2918 instr[30] = half(0)/full(1)
2919 instr[29,24] = 00 1110
2924 instr[14] = TRN1 (0) / TRN2 (1)
2926 instr[9,5] = V source
2927 instr[4,0] = V dest. */
2929 int full = INSTR (30, 30);
2930 int second = INSTR (14, 14);
2931 unsigned vm = INSTR (20, 16);
2932 unsigned vn = INSTR (9, 5);
2933 unsigned vd = INSTR (4, 0);
2936 NYI_assert (29, 24, 0x0E);
2937 NYI_assert (13, 10, 0xA);
2939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2940 switch (INSTR (23, 22))
2943 for (i = 0; i < (full ? 8 : 4); i++)
2947 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2949 (cpu, vd, 1 * 2 + 1,
2950 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2955 for (i = 0; i < (full ? 4 : 2); i++)
2959 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2961 (cpu, vd, 1 * 2 + 1,
2962 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2968 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2970 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2972 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2974 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2981 aarch64_set_vec_u64 (cpu, vd, 0,
2982 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2983 aarch64_set_vec_u64 (cpu, vd, 1,
2984 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2990 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2993 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2994 [must be 1 for 64-bit xfer]
2995 instr[29,20] = 00 1110 0000
2996 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2997 0100=> 32-bits. 1000=>64-bits
2998 instr[15,10] = 0000 11
2999 instr[9,5] = W source
3000 instr[4,0] = V dest. */
3003 unsigned Vd = INSTR (4, 0);
3004 unsigned Rs = INSTR (9, 5);
3005 int both = INSTR (30, 30);
3007 NYI_assert (29, 20, 0x0E0);
3008 NYI_assert (15, 10, 0x03);
3010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3011 switch (INSTR (19, 16))
3014 for (i = 0; i < (both ? 16 : 8); i++)
3015 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3019 for (i = 0; i < (both ? 8 : 4); i++)
3020 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3024 for (i = 0; i < (both ? 4 : 2); i++)
3025 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3031 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3032 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3041 do_vec_UZP (sim_cpu *cpu)
3044 instr[30] = half(0)/full(1)
3045 instr[29,24] = 00 1110
3046 instr[23,22] = size: byte(00), half(01), word (10), long (11)
3050 instr[14] = lower (0) / upper (1)
3055 int full = INSTR (30, 30);
3056 int upper = INSTR (14, 14);
3058 unsigned vm = INSTR (20, 16);
3059 unsigned vn = INSTR (9, 5);
3060 unsigned vd = INSTR (4, 0);
3062 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3063 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3064 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3065 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3070 uint64_t input2 = full ? val_n2 : val_m1;
3072 NYI_assert (29, 24, 0x0E);
3073 NYI_assert (21, 21, 0);
3074 NYI_assert (15, 15, 0);
3075 NYI_assert (13, 10, 6);
3077 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3078 switch (INSTR (23, 22))
3081 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3082 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3083 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3084 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3086 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3087 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3088 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3089 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3093 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3094 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3095 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3096 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3098 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3099 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3100 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3101 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3106 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3107 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3109 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3110 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3114 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3115 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3117 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3118 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3123 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3124 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3128 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3129 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3137 val1 = upper ? val_n2 : val_n1;
3138 val2 = upper ? val_m2 : val_m1;
3142 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3144 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3148 do_vec_ZIP (sim_cpu *cpu)
3151 instr[30] = half(0)/full(1)
3152 instr[29,24] = 00 1110
3153 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3157 instr[14] = lower (0) / upper (1)
3162 int full = INSTR (30, 30);
3163 int upper = INSTR (14, 14);
3165 unsigned vm = INSTR (20, 16);
3166 unsigned vn = INSTR (9, 5);
3167 unsigned vd = INSTR (4, 0);
3169 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3170 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3171 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3172 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3177 uint64_t input1 = upper ? val_n1 : val_m1;
3178 uint64_t input2 = upper ? val_n2 : val_m2;
3180 NYI_assert (29, 24, 0x0E);
3181 NYI_assert (21, 21, 0);
3182 NYI_assert (15, 15, 0);
3183 NYI_assert (13, 10, 0xE);
3185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3186 switch (INSTR (23, 23))
3190 ((input1 << 0) & (0xFF << 0))
3191 | ((input2 << 8) & (0xFF << 8))
3192 | ((input1 << 8) & (0xFF << 16))
3193 | ((input2 << 16) & (0xFF << 24))
3194 | ((input1 << 16) & (0xFFULL << 32))
3195 | ((input2 << 24) & (0xFFULL << 40))
3196 | ((input1 << 24) & (0xFFULL << 48))
3197 | ((input2 << 32) & (0xFFULL << 56));
3200 ((input1 >> 32) & (0xFF << 0))
3201 | ((input2 >> 24) & (0xFF << 8))
3202 | ((input1 >> 24) & (0xFF << 16))
3203 | ((input2 >> 16) & (0xFF << 24))
3204 | ((input1 >> 16) & (0xFFULL << 32))
3205 | ((input2 >> 8) & (0xFFULL << 40))
3206 | ((input1 >> 8) & (0xFFULL << 48))
3207 | ((input2 >> 0) & (0xFFULL << 56));
3212 ((input1 << 0) & (0xFFFF << 0))
3213 | ((input2 << 16) & (0xFFFF << 16))
3214 | ((input1 << 16) & (0xFFFFULL << 32))
3215 | ((input2 << 32) & (0xFFFFULL << 48));
3218 ((input1 >> 32) & (0xFFFF << 0))
3219 | ((input2 >> 16) & (0xFFFF << 16))
3220 | ((input1 >> 16) & (0xFFFFULL << 32))
3221 | ((input2 >> 0) & (0xFFFFULL << 48));
3225 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3226 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3235 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3237 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3240 /* Floating point immediates are encoded in 8 bits.
3241 fpimm[7] = sign bit.
3242 fpimm[6:4] = signed exponent.
3243 fpimm[3:0] = fraction (assuming leading 1).
3244 i.e. F = s * 1.f * 2^(e - b). */
3247 fp_immediate_for_encoding_32 (uint32_t imm8)
3250 uint32_t s, e, f, i;
3252 s = (imm8 >> 7) & 0x1;
3253 e = (imm8 >> 4) & 0x7;
3256 /* The fp value is s * n/16 * 2r where n is 16+e. */
3257 u = (16.0 + f) / 16.0;
3259 /* N.B. exponent is signed. */
3264 for (i = 0; i <= epos; i++)
3271 for (i = 0; i < eneg; i++)
3282 fp_immediate_for_encoding_64 (uint32_t imm8)
3285 uint32_t s, e, f, i;
3287 s = (imm8 >> 7) & 0x1;
3288 e = (imm8 >> 4) & 0x7;
3291 /* The fp value is s * n/16 * 2r where n is 16+e. */
3292 u = (16.0 + f) / 16.0;
3294 /* N.B. exponent is signed. */
3299 for (i = 0; i <= epos; i++)
3306 for (i = 0; i < eneg; i++)
3317 do_vec_MOV_immediate (sim_cpu *cpu)
3320 instr[30] = full/half selector
3321 instr[29,19] = 00111100000
3322 instr[18,16] = high 3 bits of uimm8
3323 instr[15,12] = size & shift:
3325 0010 => 32-bit + LSL#8
3326 0100 => 32-bit + LSL#16
3327 0110 => 32-bit + LSL#24
3328 1010 => 16-bit + LSL#8
3330 1101 => 32-bit + MSL#16
3331 1100 => 32-bit + MSL#8
3335 instr[9,5] = low 5-bits of uimm8
3338 int full = INSTR (30, 30);
3339 unsigned vd = INSTR (4, 0);
3340 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3343 NYI_assert (29, 19, 0x1E0);
3344 NYI_assert (11, 10, 1);
3346 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3347 switch (INSTR (15, 12))
3349 case 0x0: /* 32-bit, no shift. */
3350 case 0x2: /* 32-bit, shift by 8. */
3351 case 0x4: /* 32-bit, shift by 16. */
3352 case 0x6: /* 32-bit, shift by 24. */
3353 val <<= (8 * INSTR (14, 13));
3354 for (i = 0; i < (full ? 4 : 2); i++)
3355 aarch64_set_vec_u32 (cpu, vd, i, val);
3358 case 0xa: /* 16-bit, shift by 8. */
3361 case 0x8: /* 16-bit, no shift. */
3362 for (i = 0; i < (full ? 8 : 4); i++)
3363 aarch64_set_vec_u16 (cpu, vd, i, val);
3366 case 0xd: /* 32-bit, mask shift by 16. */
3370 case 0xc: /* 32-bit, mask shift by 8. */
3373 for (i = 0; i < (full ? 4 : 2); i++)
3374 aarch64_set_vec_u32 (cpu, vd, i, val);
3377 case 0xe: /* 8-bit, no shift. */
3378 for (i = 0; i < (full ? 16 : 8); i++)
3379 aarch64_set_vec_u8 (cpu, vd, i, val);
3382 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3384 float u = fp_immediate_for_encoding_32 (val);
3385 for (i = 0; i < (full ? 4 : 2); i++)
3386 aarch64_set_vec_float (cpu, vd, i, u);
3396 do_vec_MVNI (sim_cpu *cpu)
3399 instr[30] = full/half selector
3400 instr[29,19] = 10111100000
3401 instr[18,16] = high 3 bits of uimm8
3402 instr[15,12] = selector
3404 instr[9,5] = low 5-bits of uimm8
3407 int full = INSTR (30, 30);
3408 unsigned vd = INSTR (4, 0);
3409 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3412 NYI_assert (29, 19, 0x5E0);
3413 NYI_assert (11, 10, 1);
3415 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3416 switch (INSTR (15, 12))
3418 case 0x0: /* 32-bit, no shift. */
3419 case 0x2: /* 32-bit, shift by 8. */
3420 case 0x4: /* 32-bit, shift by 16. */
3421 case 0x6: /* 32-bit, shift by 24. */
3422 val <<= (8 * INSTR (14, 13));
3424 for (i = 0; i < (full ? 4 : 2); i++)
3425 aarch64_set_vec_u32 (cpu, vd, i, val);
3428 case 0xa: /* 16-bit, 8 bit shift. */
3430 case 0x8: /* 16-bit, no shift. */
3432 for (i = 0; i < (full ? 8 : 4); i++)
3433 aarch64_set_vec_u16 (cpu, vd, i, val);
3436 case 0xd: /* 32-bit, mask shift by 16. */
3439 case 0xc: /* 32-bit, mask shift by 8. */
3443 for (i = 0; i < (full ? 4 : 2); i++)
3444 aarch64_set_vec_u32 (cpu, vd, i, val);
3447 case 0xE: /* MOVI Dn, #mask64 */
3451 for (i = 0; i < 8; i++)
3453 mask |= (0xFFUL << (i * 8));
3454 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3455 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3459 case 0xf: /* FMOV Vd.2D, #fpimm. */
3461 double u = fp_immediate_for_encoding_64 (val);
3466 aarch64_set_vec_double (cpu, vd, 0, u);
3467 aarch64_set_vec_double (cpu, vd, 1, u);
3476 #define ABS(A) ((A) < 0 ? - (A) : (A))
3479 do_vec_ABS (sim_cpu *cpu)
3482 instr[30] = half(0)/full(1)
3483 instr[29,24] = 00 1110
3484 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3485 instr[21,10] = 10 0000 1011 10
3489 unsigned vn = INSTR (9, 5);
3490 unsigned vd = INSTR (4, 0);
3491 unsigned full = INSTR (30, 30);
3494 NYI_assert (29, 24, 0x0E);
3495 NYI_assert (21, 10, 0x82E);
3497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3498 switch (INSTR (23, 22))
3501 for (i = 0; i < (full ? 16 : 8); i++)
3502 aarch64_set_vec_s8 (cpu, vd, i,
3503 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3507 for (i = 0; i < (full ? 8 : 4); i++)
3508 aarch64_set_vec_s16 (cpu, vd, i,
3509 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3513 for (i = 0; i < (full ? 4 : 2); i++)
3514 aarch64_set_vec_s32 (cpu, vd, i,
3515 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3521 for (i = 0; i < 2; i++)
3522 aarch64_set_vec_s64 (cpu, vd, i,
3523 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3529 do_vec_ADDV (sim_cpu *cpu)
3532 instr[30] = full/half selector
3533 instr[29,24] = 00 1110
3534 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3535 instr[21,10] = 11 0001 1011 10
3539 unsigned vm = INSTR (9, 5);
3540 unsigned rd = INSTR (4, 0);
3542 int full = INSTR (30, 30);
3544 NYI_assert (29, 24, 0x0E);
3545 NYI_assert (21, 10, 0xC6E);
3547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3548 switch (INSTR (23, 22))
3553 for (i = 0; i < (full ? 16 : 8); i++)
3554 val += aarch64_get_vec_u8 (cpu, vm, i);
3555 aarch64_set_vec_u64 (cpu, rd, 0, val);
3562 for (i = 0; i < (full ? 8 : 4); i++)
3563 val += aarch64_get_vec_u16 (cpu, vm, i);
3564 aarch64_set_vec_u64 (cpu, rd, 0, val);
3573 for (i = 0; i < 4; i++)
3574 val += aarch64_get_vec_u32 (cpu, vm, i);
3575 aarch64_set_vec_u64 (cpu, rd, 0, val);
3585 do_vec_ins_2 (sim_cpu *cpu)
3587 /* instr[31,21] = 01001110000
3588 instr[20,18] = size & element selector
3590 instr[13] = direction: to vec(0), from vec (1)
3596 unsigned vm = INSTR (9, 5);
3597 unsigned vd = INSTR (4, 0);
3599 NYI_assert (31, 21, 0x270);
3600 NYI_assert (17, 14, 0);
3601 NYI_assert (12, 10, 7);
3603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3604 if (INSTR (13, 13) == 1)
3606 if (INSTR (18, 18) == 1)
3609 elem = INSTR (20, 19);
3610 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3611 aarch64_get_vec_u32 (cpu, vm, elem));
3616 if (INSTR (19, 19) != 1)
3619 elem = INSTR (20, 20);
3620 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3621 aarch64_get_vec_u64 (cpu, vm, elem));
3626 if (INSTR (18, 18) == 1)
3629 elem = INSTR (20, 19);
3630 aarch64_set_vec_u32 (cpu, vd, elem,
3631 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3636 if (INSTR (19, 19) != 1)
3639 elem = INSTR (20, 20);
3640 aarch64_set_vec_u64 (cpu, vd, elem,
3641 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3646 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3649 DST_TYPE a[N], b[N]; \
3651 for (i = 0; i < (N); i++) \
3653 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3654 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3656 for (i = 0; i < (N); i++) \
3657 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3662 do_vec_mull (sim_cpu *cpu)
3665 instr[30] = lower(0)/upper(1) selector
3666 instr[29] = signed(0)/unsigned(1)
3667 instr[28,24] = 0 1110
3668 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3671 instr[15,10] = 11 0000
3675 int unsign = INSTR (29, 29);
3676 int bias = INSTR (30, 30);
3677 unsigned vm = INSTR (20, 16);
3678 unsigned vn = INSTR ( 9, 5);
3679 unsigned vd = INSTR ( 4, 0);
3682 NYI_assert (28, 24, 0x0E);
3683 NYI_assert (15, 10, 0x30);
3685 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3686 /* NB: Read source values before writing results, in case
3687 the source and destination vectors are the same. */
3688 switch (INSTR (23, 22))
3694 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3696 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3703 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3705 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3712 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3714 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3723 do_vec_fadd (sim_cpu *cpu)
3726 instr[30] = half(0)/full(1)
3727 instr[29,24] = 001110
3728 instr[23] = FADD(0)/FSUB(1)
3729 instr[22] = float (0)/double(1)
3732 instr[15,10] = 110101
3736 unsigned vm = INSTR (20, 16);
3737 unsigned vn = INSTR (9, 5);
3738 unsigned vd = INSTR (4, 0);
3740 int full = INSTR (30, 30);
3742 NYI_assert (29, 24, 0x0E);
3743 NYI_assert (21, 21, 1);
3744 NYI_assert (15, 10, 0x35);
3746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3754 for (i = 0; i < 2; i++)
3755 aarch64_set_vec_double (cpu, vd, i,
3756 aarch64_get_vec_double (cpu, vn, i)
3757 - aarch64_get_vec_double (cpu, vm, i));
3761 for (i = 0; i < (full ? 4 : 2); i++)
3762 aarch64_set_vec_float (cpu, vd, i,
3763 aarch64_get_vec_float (cpu, vn, i)
3764 - aarch64_get_vec_float (cpu, vm, i));
3774 for (i = 0; i < 2; i++)
3775 aarch64_set_vec_double (cpu, vd, i,
3776 aarch64_get_vec_double (cpu, vm, i)
3777 + aarch64_get_vec_double (cpu, vn, i));
3781 for (i = 0; i < (full ? 4 : 2); i++)
3782 aarch64_set_vec_float (cpu, vd, i,
3783 aarch64_get_vec_float (cpu, vm, i)
3784 + aarch64_get_vec_float (cpu, vn, i));
3790 do_vec_add (sim_cpu *cpu)
3793 instr[30] = full/half selector
3794 instr[29,24] = 001110
3795 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3798 instr[15,10] = 100001
3802 unsigned vm = INSTR (20, 16);
3803 unsigned vn = INSTR (9, 5);
3804 unsigned vd = INSTR (4, 0);
3806 int full = INSTR (30, 30);
3808 NYI_assert (29, 24, 0x0E);
3809 NYI_assert (21, 21, 1);
3810 NYI_assert (15, 10, 0x21);
3812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3813 switch (INSTR (23, 22))
3816 for (i = 0; i < (full ? 16 : 8); i++)
3817 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3818 + aarch64_get_vec_u8 (cpu, vm, i));
3822 for (i = 0; i < (full ? 8 : 4); i++)
3823 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3824 + aarch64_get_vec_u16 (cpu, vm, i));
3828 for (i = 0; i < (full ? 4 : 2); i++)
3829 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3830 + aarch64_get_vec_u32 (cpu, vm, i));
3836 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3837 + aarch64_get_vec_u64 (cpu, vm, 0));
3838 aarch64_set_vec_u64 (cpu, vd, 1,
3839 aarch64_get_vec_u64 (cpu, vn, 1)
3840 + aarch64_get_vec_u64 (cpu, vm, 1));
3846 do_vec_mul (sim_cpu *cpu)
3849 instr[30] = full/half selector
3850 instr[29,24] = 00 1110
3851 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3854 instr[15,10] = 10 0111
3858 unsigned vm = INSTR (20, 16);
3859 unsigned vn = INSTR (9, 5);
3860 unsigned vd = INSTR (4, 0);
3862 int full = INSTR (30, 30);
3865 NYI_assert (29, 24, 0x0E);
3866 NYI_assert (21, 21, 1);
3867 NYI_assert (15, 10, 0x27);
3869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3870 switch (INSTR (23, 22))
3873 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3877 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3881 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3890 do_vec_MLA (sim_cpu *cpu)
3893 instr[30] = full/half selector
3894 instr[29,24] = 00 1110
3895 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3898 instr[15,10] = 1001 01
3902 unsigned vm = INSTR (20, 16);
3903 unsigned vn = INSTR (9, 5);
3904 unsigned vd = INSTR (4, 0);
3906 int full = INSTR (30, 30);
3908 NYI_assert (29, 24, 0x0E);
3909 NYI_assert (21, 21, 1);
3910 NYI_assert (15, 10, 0x25);
3912 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3913 switch (INSTR (23, 22))
3916 for (i = 0; i < (full ? 16 : 8); i++)
3917 aarch64_set_vec_u8 (cpu, vd, i,
3918 aarch64_get_vec_u8 (cpu, vd, i)
3919 + (aarch64_get_vec_u8 (cpu, vn, i)
3920 * aarch64_get_vec_u8 (cpu, vm, i)));
3924 for (i = 0; i < (full ? 8 : 4); i++)
3925 aarch64_set_vec_u16 (cpu, vd, i,
3926 aarch64_get_vec_u16 (cpu, vd, i)
3927 + (aarch64_get_vec_u16 (cpu, vn, i)
3928 * aarch64_get_vec_u16 (cpu, vm, i)));
3932 for (i = 0; i < (full ? 4 : 2); i++)
3933 aarch64_set_vec_u32 (cpu, vd, i,
3934 aarch64_get_vec_u32 (cpu, vd, i)
3935 + (aarch64_get_vec_u32 (cpu, vn, i)
3936 * aarch64_get_vec_u32 (cpu, vm, i)));
3945 fmaxnm (float a, float b)
3950 return a > b ? a : b;
3953 else if (! isnan (b))
3959 fminnm (float a, float b)
3964 return a < b ? a : b;
3967 else if (! isnan (b))
3973 dmaxnm (double a, double b)
3978 return a > b ? a : b;
3981 else if (! isnan (b))
3987 dminnm (double a, double b)
3992 return a < b ? a : b;
3995 else if (! isnan (b))
4001 do_vec_FminmaxNMP (sim_cpu *cpu)
4004 instr [30] = half (0)/full (1)
4005 instr [29,24] = 10 1110
4006 instr [23] = max(0)/min(1)
4007 instr [22] = float (0)/double (1)
4010 instr [15,10] = 1100 01
4012 instr [4.0] = Vd. */
4014 unsigned vm = INSTR (20, 16);
4015 unsigned vn = INSTR (9, 5);
4016 unsigned vd = INSTR (4, 0);
4017 int full = INSTR (30, 30);
4019 NYI_assert (29, 24, 0x2E);
4020 NYI_assert (21, 21, 1);
4021 NYI_assert (15, 10, 0x31);
4023 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4026 double (* fn)(double, double) = INSTR (23, 23)
4031 aarch64_set_vec_double (cpu, vd, 0,
4032 fn (aarch64_get_vec_double (cpu, vn, 0),
4033 aarch64_get_vec_double (cpu, vn, 1)));
4034 aarch64_set_vec_double (cpu, vd, 0,
4035 fn (aarch64_get_vec_double (cpu, vm, 0),
4036 aarch64_get_vec_double (cpu, vm, 1)));
4040 float (* fn)(float, float) = INSTR (23, 23)
4043 aarch64_set_vec_float (cpu, vd, 0,
4044 fn (aarch64_get_vec_float (cpu, vn, 0),
4045 aarch64_get_vec_float (cpu, vn, 1)));
4047 aarch64_set_vec_float (cpu, vd, 1,
4048 fn (aarch64_get_vec_float (cpu, vn, 2),
4049 aarch64_get_vec_float (cpu, vn, 3)));
4051 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4052 fn (aarch64_get_vec_float (cpu, vm, 0),
4053 aarch64_get_vec_float (cpu, vm, 1)));
4055 aarch64_set_vec_float (cpu, vd, 3,
4056 fn (aarch64_get_vec_float (cpu, vm, 2),
4057 aarch64_get_vec_float (cpu, vm, 3)));
4062 do_vec_AND (sim_cpu *cpu)
4065 instr[30] = half (0)/full (1)
4066 instr[29,21] = 001110001
4068 instr[15,10] = 000111
4072 unsigned vm = INSTR (20, 16);
4073 unsigned vn = INSTR (9, 5);
4074 unsigned vd = INSTR (4, 0);
4076 int full = INSTR (30, 30);
4078 NYI_assert (29, 21, 0x071);
4079 NYI_assert (15, 10, 0x07);
4081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4082 for (i = 0; i < (full ? 4 : 2); i++)
4083 aarch64_set_vec_u32 (cpu, vd, i,
4084 aarch64_get_vec_u32 (cpu, vn, i)
4085 & aarch64_get_vec_u32 (cpu, vm, i));
4089 do_vec_BSL (sim_cpu *cpu)
4092 instr[30] = half (0)/full (1)
4093 instr[29,21] = 101110011
4095 instr[15,10] = 000111
4099 unsigned vm = INSTR (20, 16);
4100 unsigned vn = INSTR (9, 5);
4101 unsigned vd = INSTR (4, 0);
4103 int full = INSTR (30, 30);
4105 NYI_assert (29, 21, 0x173);
4106 NYI_assert (15, 10, 0x07);
4108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4109 for (i = 0; i < (full ? 16 : 8); i++)
4110 aarch64_set_vec_u8 (cpu, vd, i,
4111 ( aarch64_get_vec_u8 (cpu, vd, i)
4112 & aarch64_get_vec_u8 (cpu, vn, i))
4113 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4114 & aarch64_get_vec_u8 (cpu, vm, i)));
4118 do_vec_EOR (sim_cpu *cpu)
4121 instr[30] = half (0)/full (1)
4122 instr[29,21] = 10 1110 001
4124 instr[15,10] = 000111
4128 unsigned vm = INSTR (20, 16);
4129 unsigned vn = INSTR (9, 5);
4130 unsigned vd = INSTR (4, 0);
4132 int full = INSTR (30, 30);
4134 NYI_assert (29, 21, 0x171);
4135 NYI_assert (15, 10, 0x07);
4137 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4138 for (i = 0; i < (full ? 4 : 2); i++)
4139 aarch64_set_vec_u32 (cpu, vd, i,
4140 aarch64_get_vec_u32 (cpu, vn, i)
4141 ^ aarch64_get_vec_u32 (cpu, vm, i));
4145 do_vec_bit (sim_cpu *cpu)
4148 instr[30] = half (0)/full (1)
4149 instr[29,23] = 10 1110 1
4150 instr[22] = BIT (0) / BIF (1)
4153 instr[15,10] = 0001 11
4157 unsigned vm = INSTR (20, 16);
4158 unsigned vn = INSTR (9, 5);
4159 unsigned vd = INSTR (4, 0);
4160 unsigned full = INSTR (30, 30);
4161 unsigned test_false = INSTR (22, 22);
4164 NYI_assert (29, 23, 0x5D);
4165 NYI_assert (21, 21, 1);
4166 NYI_assert (15, 10, 0x07);
4168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4169 for (i = 0; i < (full ? 4 : 2); i++)
4171 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4172 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4173 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4175 aarch64_set_vec_u32 (cpu, vd, i,
4176 (vd_val & vm_val) | (vn_val & ~vm_val));
4178 aarch64_set_vec_u32 (cpu, vd, i,
4179 (vd_val & ~vm_val) | (vn_val & vm_val));
4184 do_vec_ORN (sim_cpu *cpu)
4187 instr[30] = half (0)/full (1)
4188 instr[29,21] = 00 1110 111
4190 instr[15,10] = 00 0111
4194 unsigned vm = INSTR (20, 16);
4195 unsigned vn = INSTR (9, 5);
4196 unsigned vd = INSTR (4, 0);
4198 int full = INSTR (30, 30);
4200 NYI_assert (29, 21, 0x077);
4201 NYI_assert (15, 10, 0x07);
4203 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4204 for (i = 0; i < (full ? 16 : 8); i++)
4205 aarch64_set_vec_u8 (cpu, vd, i,
4206 aarch64_get_vec_u8 (cpu, vn, i)
4207 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4211 do_vec_ORR (sim_cpu *cpu)
4214 instr[30] = half (0)/full (1)
4215 instr[29,21] = 00 1110 101
4217 instr[15,10] = 0001 11
4221 unsigned vm = INSTR (20, 16);
4222 unsigned vn = INSTR (9, 5);
4223 unsigned vd = INSTR (4, 0);
4225 int full = INSTR (30, 30);
4227 NYI_assert (29, 21, 0x075);
4228 NYI_assert (15, 10, 0x07);
4230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4231 for (i = 0; i < (full ? 16 : 8); i++)
4232 aarch64_set_vec_u8 (cpu, vd, i,
4233 aarch64_get_vec_u8 (cpu, vn, i)
4234 | aarch64_get_vec_u8 (cpu, vm, i));
4238 do_vec_BIC (sim_cpu *cpu)
4241 instr[30] = half (0)/full (1)
4242 instr[29,21] = 00 1110 011
4244 instr[15,10] = 00 0111
4248 unsigned vm = INSTR (20, 16);
4249 unsigned vn = INSTR (9, 5);
4250 unsigned vd = INSTR (4, 0);
4252 int full = INSTR (30, 30);
4254 NYI_assert (29, 21, 0x073);
4255 NYI_assert (15, 10, 0x07);
4257 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4258 for (i = 0; i < (full ? 16 : 8); i++)
4259 aarch64_set_vec_u8 (cpu, vd, i,
4260 aarch64_get_vec_u8 (cpu, vn, i)
4261 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4265 do_vec_XTN (sim_cpu *cpu)
4268 instr[30] = first part (0)/ second part (1)
4269 instr[29,24] = 00 1110
4270 instr[23,22] = size: byte(00), half(01), word (10)
4271 instr[21,10] = 1000 0100 1010
4275 unsigned vs = INSTR (9, 5);
4276 unsigned vd = INSTR (4, 0);
4277 unsigned bias = INSTR (30, 30);
4280 NYI_assert (29, 24, 0x0E);
4281 NYI_assert (21, 10, 0x84A);
4283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4284 switch (INSTR (23, 22))
4287 for (i = 0; i < 8; i++)
4288 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4289 aarch64_get_vec_u16 (cpu, vs, i));
4293 for (i = 0; i < 4; i++)
4294 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4295 aarch64_get_vec_u32 (cpu, vs, i));
4299 for (i = 0; i < 2; i++)
4300 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4301 aarch64_get_vec_u64 (cpu, vs, i));
4306 /* Return the number of bits set in the input value. */
4307 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4308 # define popcount __builtin_popcount
4311 popcount (unsigned char x)
4313 static const unsigned char popcnt[16] =
4321 /* Only counts the low 8 bits of the input as that is all we need. */
4322 return popcnt[x % 16] + popcnt[x / 16];
4327 do_vec_CNT (sim_cpu *cpu)
4330 instr[30] = half (0)/ full (1)
4331 instr[29,24] = 00 1110
4332 instr[23,22] = size: byte(00)
4333 instr[21,10] = 1000 0001 0110
4337 unsigned vs = INSTR (9, 5);
4338 unsigned vd = INSTR (4, 0);
4339 int full = INSTR (30, 30);
4340 int size = INSTR (23, 22);
4343 NYI_assert (29, 24, 0x0E);
4344 NYI_assert (21, 10, 0x816);
4349 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4351 for (i = 0; i < (full ? 16 : 8); i++)
4352 aarch64_set_vec_u8 (cpu, vd, i,
4353 popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4357 do_vec_maxv (sim_cpu *cpu)
4360 instr[30] = half(0)/full(1)
4361 instr[29] = signed (0)/unsigned(1)
4362 instr[28,24] = 0 1110
4363 instr[23,22] = size: byte(00), half(01), word (10)
4365 instr[20,17] = 1 000
4366 instr[16] = max(0)/min(1)
4367 instr[15,10] = 1010 10
4368 instr[9,5] = V source
4369 instr[4.0] = R dest. */
4371 unsigned vs = INSTR (9, 5);
4372 unsigned rd = INSTR (4, 0);
4373 unsigned full = INSTR (30, 30);
4376 NYI_assert (28, 24, 0x0E);
4377 NYI_assert (21, 21, 1);
4378 NYI_assert (20, 17, 8);
4379 NYI_assert (15, 10, 0x2A);
4381 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4382 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4384 case 0: /* SMAXV. */
4387 switch (INSTR (23, 22))
4390 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4391 for (i = 1; i < (full ? 16 : 8); i++)
4392 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4395 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4396 for (i = 1; i < (full ? 8 : 4); i++)
4397 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4400 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4401 for (i = 1; i < (full ? 4 : 2); i++)
4402 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4407 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4411 case 1: /* SMINV. */
4414 switch (INSTR (23, 22))
4417 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4418 for (i = 1; i < (full ? 16 : 8); i++)
4419 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4422 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4423 for (i = 1; i < (full ? 8 : 4); i++)
4424 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4427 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4428 for (i = 1; i < (full ? 4 : 2); i++)
4429 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4435 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4439 case 2: /* UMAXV. */
4442 switch (INSTR (23, 22))
4445 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4446 for (i = 1; i < (full ? 16 : 8); i++)
4447 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4450 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4451 for (i = 1; i < (full ? 8 : 4); i++)
4452 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4455 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4456 for (i = 1; i < (full ? 4 : 2); i++)
4457 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4463 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4467 case 3: /* UMINV. */
4470 switch (INSTR (23, 22))
4473 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4474 for (i = 1; i < (full ? 16 : 8); i++)
4475 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4478 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4479 for (i = 1; i < (full ? 8 : 4); i++)
4480 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4483 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4484 for (i = 1; i < (full ? 4 : 2); i++)
4485 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4491 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4498 do_vec_fminmaxV (sim_cpu *cpu)
4500 /* instr[31,24] = 0110 1110
4501 instr[23] = max(0)/min(1)
4502 instr[22,14] = 011 0000 11
4503 instr[13,12] = nm(00)/normal(11)
4505 instr[9,5] = V source
4506 instr[4.0] = R dest. */
4508 unsigned vs = INSTR (9, 5);
4509 unsigned rd = INSTR (4, 0);
4511 float res = aarch64_get_vec_float (cpu, vs, 0);
4513 NYI_assert (31, 24, 0x6E);
4514 NYI_assert (22, 14, 0x0C3);
4515 NYI_assert (11, 10, 2);
4517 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4520 switch (INSTR (13, 12))
4522 case 0: /* FMNINNMV. */
4523 for (i = 1; i < 4; i++)
4524 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4527 case 3: /* FMINV. */
4528 for (i = 1; i < 4; i++)
4529 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4538 switch (INSTR (13, 12))
4540 case 0: /* FMNAXNMV. */
4541 for (i = 1; i < 4; i++)
4542 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4545 case 3: /* FMAXV. */
4546 for (i = 1; i < 4; i++)
4547 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4555 aarch64_set_FP_float (cpu, rd, res);
4559 do_vec_Fminmax (sim_cpu *cpu)
4562 instr[30] = half(0)/full(1)
4563 instr[29,24] = 00 1110
4564 instr[23] = max(0)/min(1)
4565 instr[22] = float(0)/double(1)
4569 instr[13,12] = nm(00)/normal(11)
4574 unsigned vm = INSTR (20, 16);
4575 unsigned vn = INSTR (9, 5);
4576 unsigned vd = INSTR (4, 0);
4577 unsigned full = INSTR (30, 30);
4578 unsigned min = INSTR (23, 23);
4581 NYI_assert (29, 24, 0x0E);
4582 NYI_assert (21, 21, 1);
4583 NYI_assert (15, 14, 3);
4584 NYI_assert (11, 10, 1);
4586 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4589 double (* func)(double, double);
4594 if (INSTR (13, 12) == 0)
4595 func = min ? dminnm : dmaxnm;
4596 else if (INSTR (13, 12) == 3)
4597 func = min ? fmin : fmax;
4601 for (i = 0; i < 2; i++)
4602 aarch64_set_vec_double (cpu, vd, i,
4603 func (aarch64_get_vec_double (cpu, vn, i),
4604 aarch64_get_vec_double (cpu, vm, i)));
4608 float (* func)(float, float);
4610 if (INSTR (13, 12) == 0)
4611 func = min ? fminnm : fmaxnm;
4612 else if (INSTR (13, 12) == 3)
4613 func = min ? fminf : fmaxf;
4617 for (i = 0; i < (full ? 4 : 2); i++)
4618 aarch64_set_vec_float (cpu, vd, i,
4619 func (aarch64_get_vec_float (cpu, vn, i),
4620 aarch64_get_vec_float (cpu, vm, i)));
4625 do_vec_SCVTF (sim_cpu *cpu)
4629 instr[29,23] = 00 1110 0
4630 instr[22] = float(0)/double(1)
4631 instr[21,10] = 10 0001 1101 10
4635 unsigned vn = INSTR (9, 5);
4636 unsigned vd = INSTR (4, 0);
4637 unsigned full = INSTR (30, 30);
4638 unsigned size = INSTR (22, 22);
4641 NYI_assert (29, 23, 0x1C);
4642 NYI_assert (21, 10, 0x876);
4644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4650 for (i = 0; i < 2; i++)
4652 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4653 aarch64_set_vec_double (cpu, vd, i, val);
4658 for (i = 0; i < (full ? 4 : 2); i++)
4660 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4661 aarch64_set_vec_float (cpu, vd, i, val);
4666 #define VEC_CMP(SOURCE, CMP) \
4672 for (i = 0; i < (full ? 16 : 8); i++) \
4673 aarch64_set_vec_u8 (cpu, vd, i, \
4674 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4676 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4680 for (i = 0; i < (full ? 8 : 4); i++) \
4681 aarch64_set_vec_u16 (cpu, vd, i, \
4682 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4684 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4688 for (i = 0; i < (full ? 4 : 2); i++) \
4689 aarch64_set_vec_u32 (cpu, vd, i, \
4690 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4692 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4698 for (i = 0; i < 2; i++) \
4699 aarch64_set_vec_u64 (cpu, vd, i, \
4700 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4702 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4709 #define VEC_CMP0(SOURCE, CMP) \
4715 for (i = 0; i < (full ? 16 : 8); i++) \
4716 aarch64_set_vec_u8 (cpu, vd, i, \
4717 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4721 for (i = 0; i < (full ? 8 : 4); i++) \
4722 aarch64_set_vec_u16 (cpu, vd, i, \
4723 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4727 for (i = 0; i < (full ? 4 : 2); i++) \
4728 aarch64_set_vec_u32 (cpu, vd, i, \
4729 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4735 for (i = 0; i < 2; i++) \
4736 aarch64_set_vec_u64 (cpu, vd, i, \
4737 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4738 CMP 0 ? -1ULL : 0); \
4744 #define VEC_FCMP0(CMP) \
4749 if (INSTR (22, 22)) \
4753 for (i = 0; i < 2; i++) \
4754 aarch64_set_vec_u64 (cpu, vd, i, \
4755 aarch64_get_vec_double (cpu, vn, i) \
4756 CMP 0.0 ? -1 : 0); \
4760 for (i = 0; i < (full ? 4 : 2); i++) \
4761 aarch64_set_vec_u32 (cpu, vd, i, \
4762 aarch64_get_vec_float (cpu, vn, i) \
4763 CMP 0.0 ? -1 : 0); \
4769 #define VEC_FCMP(CMP) \
4772 if (INSTR (22, 22)) \
4776 for (i = 0; i < 2; i++) \
4777 aarch64_set_vec_u64 (cpu, vd, i, \
4778 aarch64_get_vec_double (cpu, vn, i) \
4780 aarch64_get_vec_double (cpu, vm, i) \
4785 for (i = 0; i < (full ? 4 : 2); i++) \
4786 aarch64_set_vec_u32 (cpu, vd, i, \
4787 aarch64_get_vec_float (cpu, vn, i) \
4789 aarch64_get_vec_float (cpu, vm, i) \
4797 do_vec_compare (sim_cpu *cpu)
4800 instr[30] = half(0)/full(1)
4801 instr[29] = part-of-comparison-type
4802 instr[28,24] = 0 1110
4803 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4804 type of float compares: single (-0) / double (-1)
4806 instr[20,16] = Vm or 00000 (compare vs 0)
4807 instr[15,10] = part-of-comparison-type
4811 int full = INSTR (30, 30);
4812 int size = INSTR (23, 22);
4813 unsigned vm = INSTR (20, 16);
4814 unsigned vn = INSTR (9, 5);
4815 unsigned vd = INSTR (4, 0);
4818 NYI_assert (28, 24, 0x0E);
4819 NYI_assert (21, 21, 1);
4821 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4824 || ((INSTR (11, 11) == 0
4825 && INSTR (10, 10) == 0)))
4827 /* A compare vs 0. */
4830 if (INSTR (15, 10) == 0x2A)
4832 else if (INSTR (15, 10) == 0x32
4833 || INSTR (15, 10) == 0x3E)
4834 do_vec_fminmaxV (cpu);
4835 else if (INSTR (29, 23) == 0x1C
4836 && INSTR (21, 10) == 0x876)
4846 /* A floating point compare. */
4847 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4850 NYI_assert (15, 15, 1);
4854 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4855 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4856 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4857 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4858 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4859 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4860 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4861 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4869 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4873 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4874 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4875 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4876 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4877 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4878 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4879 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4880 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4881 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4882 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4883 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4893 do_vec_SSHL (sim_cpu *cpu)
4896 instr[30] = first part (0)/ second part (1)
4897 instr[29,24] = 00 1110
4898 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4901 instr[15,10] = 0100 01
4905 unsigned full = INSTR (30, 30);
4906 unsigned vm = INSTR (20, 16);
4907 unsigned vn = INSTR (9, 5);
4908 unsigned vd = INSTR (4, 0);
4912 NYI_assert (29, 24, 0x0E);
4913 NYI_assert (21, 21, 1);
4914 NYI_assert (15, 10, 0x11);
4916 /* FIXME: What is a signed shift left in this context ?. */
4918 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4919 switch (INSTR (23, 22))
4922 for (i = 0; i < (full ? 16 : 8); i++)
4924 shift = aarch64_get_vec_s8 (cpu, vm, i);
4926 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4929 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4935 for (i = 0; i < (full ? 8 : 4); i++)
4937 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4939 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4942 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4948 for (i = 0; i < (full ? 4 : 2); i++)
4950 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4952 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4955 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4963 for (i = 0; i < 2; i++)
4965 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4967 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4970 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4978 do_vec_USHL (sim_cpu *cpu)
4981 instr[30] = first part (0)/ second part (1)
4982 instr[29,24] = 10 1110
4983 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4986 instr[15,10] = 0100 01
4990 unsigned full = INSTR (30, 30);
4991 unsigned vm = INSTR (20, 16);
4992 unsigned vn = INSTR (9, 5);
4993 unsigned vd = INSTR (4, 0);
4997 NYI_assert (29, 24, 0x2E);
4998 NYI_assert (15, 10, 0x11);
5000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5001 switch (INSTR (23, 22))
5004 for (i = 0; i < (full ? 16 : 8); i++)
5006 shift = aarch64_get_vec_s8 (cpu, vm, i);
5008 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5011 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5017 for (i = 0; i < (full ? 8 : 4); i++)
5019 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5021 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5024 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5030 for (i = 0; i < (full ? 4 : 2); i++)
5032 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5034 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5037 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5045 for (i = 0; i < 2; i++)
5047 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5049 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5052 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5060 do_vec_FMLA (sim_cpu *cpu)
5063 instr[30] = full/half selector
5064 instr[29,23] = 0011100
5065 instr[22] = size: 0=>float, 1=>double
5068 instr[15,10] = 1100 11
5072 unsigned vm = INSTR (20, 16);
5073 unsigned vn = INSTR (9, 5);
5074 unsigned vd = INSTR (4, 0);
5076 int full = INSTR (30, 30);
5078 NYI_assert (29, 23, 0x1C);
5079 NYI_assert (21, 21, 1);
5080 NYI_assert (15, 10, 0x33);
5082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5087 for (i = 0; i < 2; i++)
5088 aarch64_set_vec_double (cpu, vd, i,
5089 aarch64_get_vec_double (cpu, vn, i) *
5090 aarch64_get_vec_double (cpu, vm, i) +
5091 aarch64_get_vec_double (cpu, vd, i));
5095 for (i = 0; i < (full ? 4 : 2); i++)
5096 aarch64_set_vec_float (cpu, vd, i,
5097 aarch64_get_vec_float (cpu, vn, i) *
5098 aarch64_get_vec_float (cpu, vm, i) +
5099 aarch64_get_vec_float (cpu, vd, i));
5104 do_vec_max (sim_cpu *cpu)
5107 instr[30] = full/half selector
5108 instr[29] = SMAX (0) / UMAX (1)
5109 instr[28,24] = 0 1110
5110 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5113 instr[15,10] = 0110 01
5117 unsigned vm = INSTR (20, 16);
5118 unsigned vn = INSTR (9, 5);
5119 unsigned vd = INSTR (4, 0);
5121 int full = INSTR (30, 30);
5123 NYI_assert (28, 24, 0x0E);
5124 NYI_assert (21, 21, 1);
5125 NYI_assert (15, 10, 0x19);
5127 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5130 switch (INSTR (23, 22))
5133 for (i = 0; i < (full ? 16 : 8); i++)
5134 aarch64_set_vec_u8 (cpu, vd, i,
5135 aarch64_get_vec_u8 (cpu, vn, i)
5136 > aarch64_get_vec_u8 (cpu, vm, i)
5137 ? aarch64_get_vec_u8 (cpu, vn, i)
5138 : aarch64_get_vec_u8 (cpu, vm, i));
5142 for (i = 0; i < (full ? 8 : 4); i++)
5143 aarch64_set_vec_u16 (cpu, vd, i,
5144 aarch64_get_vec_u16 (cpu, vn, i)
5145 > aarch64_get_vec_u16 (cpu, vm, i)
5146 ? aarch64_get_vec_u16 (cpu, vn, i)
5147 : aarch64_get_vec_u16 (cpu, vm, i));
5151 for (i = 0; i < (full ? 4 : 2); i++)
5152 aarch64_set_vec_u32 (cpu, vd, i,
5153 aarch64_get_vec_u32 (cpu, vn, i)
5154 > aarch64_get_vec_u32 (cpu, vm, i)
5155 ? aarch64_get_vec_u32 (cpu, vn, i)
5156 : aarch64_get_vec_u32 (cpu, vm, i));
5165 switch (INSTR (23, 22))
5168 for (i = 0; i < (full ? 16 : 8); i++)
5169 aarch64_set_vec_s8 (cpu, vd, i,
5170 aarch64_get_vec_s8 (cpu, vn, i)
5171 > aarch64_get_vec_s8 (cpu, vm, i)
5172 ? aarch64_get_vec_s8 (cpu, vn, i)
5173 : aarch64_get_vec_s8 (cpu, vm, i));
5177 for (i = 0; i < (full ? 8 : 4); i++)
5178 aarch64_set_vec_s16 (cpu, vd, i,
5179 aarch64_get_vec_s16 (cpu, vn, i)
5180 > aarch64_get_vec_s16 (cpu, vm, i)
5181 ? aarch64_get_vec_s16 (cpu, vn, i)
5182 : aarch64_get_vec_s16 (cpu, vm, i));
5186 for (i = 0; i < (full ? 4 : 2); i++)
5187 aarch64_set_vec_s32 (cpu, vd, i,
5188 aarch64_get_vec_s32 (cpu, vn, i)
5189 > aarch64_get_vec_s32 (cpu, vm, i)
5190 ? aarch64_get_vec_s32 (cpu, vn, i)
5191 : aarch64_get_vec_s32 (cpu, vm, i));
5201 do_vec_min (sim_cpu *cpu)
5204 instr[30] = full/half selector
5205 instr[29] = SMIN (0) / UMIN (1)
5206 instr[28,24] = 0 1110
5207 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5210 instr[15,10] = 0110 11
5214 unsigned vm = INSTR (20, 16);
5215 unsigned vn = INSTR (9, 5);
5216 unsigned vd = INSTR (4, 0);
5218 int full = INSTR (30, 30);
5220 NYI_assert (28, 24, 0x0E);
5221 NYI_assert (21, 21, 1);
5222 NYI_assert (15, 10, 0x1B);
5224 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5227 switch (INSTR (23, 22))
5230 for (i = 0; i < (full ? 16 : 8); i++)
5231 aarch64_set_vec_u8 (cpu, vd, i,
5232 aarch64_get_vec_u8 (cpu, vn, i)
5233 < aarch64_get_vec_u8 (cpu, vm, i)
5234 ? aarch64_get_vec_u8 (cpu, vn, i)
5235 : aarch64_get_vec_u8 (cpu, vm, i));
5239 for (i = 0; i < (full ? 8 : 4); i++)
5240 aarch64_set_vec_u16 (cpu, vd, i,
5241 aarch64_get_vec_u16 (cpu, vn, i)
5242 < aarch64_get_vec_u16 (cpu, vm, i)
5243 ? aarch64_get_vec_u16 (cpu, vn, i)
5244 : aarch64_get_vec_u16 (cpu, vm, i));
5248 for (i = 0; i < (full ? 4 : 2); i++)
5249 aarch64_set_vec_u32 (cpu, vd, i,
5250 aarch64_get_vec_u32 (cpu, vn, i)
5251 < aarch64_get_vec_u32 (cpu, vm, i)
5252 ? aarch64_get_vec_u32 (cpu, vn, i)
5253 : aarch64_get_vec_u32 (cpu, vm, i));
5262 switch (INSTR (23, 22))
5265 for (i = 0; i < (full ? 16 : 8); i++)
5266 aarch64_set_vec_s8 (cpu, vd, i,
5267 aarch64_get_vec_s8 (cpu, vn, i)
5268 < aarch64_get_vec_s8 (cpu, vm, i)
5269 ? aarch64_get_vec_s8 (cpu, vn, i)
5270 : aarch64_get_vec_s8 (cpu, vm, i));
5274 for (i = 0; i < (full ? 8 : 4); i++)
5275 aarch64_set_vec_s16 (cpu, vd, i,
5276 aarch64_get_vec_s16 (cpu, vn, i)
5277 < aarch64_get_vec_s16 (cpu, vm, i)
5278 ? aarch64_get_vec_s16 (cpu, vn, i)
5279 : aarch64_get_vec_s16 (cpu, vm, i));
5283 for (i = 0; i < (full ? 4 : 2); i++)
5284 aarch64_set_vec_s32 (cpu, vd, i,
5285 aarch64_get_vec_s32 (cpu, vn, i)
5286 < aarch64_get_vec_s32 (cpu, vm, i)
5287 ? aarch64_get_vec_s32 (cpu, vn, i)
5288 : aarch64_get_vec_s32 (cpu, vm, i));
5298 do_vec_sub_long (sim_cpu *cpu)
5301 instr[30] = lower (0) / upper (1)
5302 instr[29] = signed (0) / unsigned (1)
5303 instr[28,24] = 0 1110
5304 instr[23,22] = size: bytes (00), half (01), word (10)
5307 instr[15,10] = 0010 00
5309 instr[4,0] = V dest. */
5311 unsigned size = INSTR (23, 22);
5312 unsigned vm = INSTR (20, 16);
5313 unsigned vn = INSTR (9, 5);
5314 unsigned vd = INSTR (4, 0);
5318 NYI_assert (28, 24, 0x0E);
5319 NYI_assert (21, 21, 1);
5320 NYI_assert (15, 10, 0x08);
5325 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5326 switch (INSTR (30, 29))
5328 case 2: /* SSUBL2. */
5330 case 0: /* SSUBL. */
5335 for (i = 0; i < 8; i++)
5336 aarch64_set_vec_s16 (cpu, vd, i,
5337 aarch64_get_vec_s8 (cpu, vn, i + bias)
5338 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5343 for (i = 0; i < 4; i++)
5344 aarch64_set_vec_s32 (cpu, vd, i,
5345 aarch64_get_vec_s16 (cpu, vn, i + bias)
5346 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5350 for (i = 0; i < 2; i++)
5351 aarch64_set_vec_s64 (cpu, vd, i,
5352 aarch64_get_vec_s32 (cpu, vn, i + bias)
5353 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5361 case 3: /* USUBL2. */
5363 case 1: /* USUBL. */
5368 for (i = 0; i < 8; i++)
5369 aarch64_set_vec_u16 (cpu, vd, i,
5370 aarch64_get_vec_u8 (cpu, vn, i + bias)
5371 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5376 for (i = 0; i < 4; i++)
5377 aarch64_set_vec_u32 (cpu, vd, i,
5378 aarch64_get_vec_u16 (cpu, vn, i + bias)
5379 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5383 for (i = 0; i < 2; i++)
5384 aarch64_set_vec_u64 (cpu, vd, i,
5385 aarch64_get_vec_u32 (cpu, vn, i + bias)
5386 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5397 do_vec_ADDP (sim_cpu *cpu)
5400 instr[30] = half(0)/full(1)
5401 instr[29,24] = 00 1110
5402 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5405 instr[15,10] = 1011 11
5407 instr[4,0] = V dest. */
5411 unsigned full = INSTR (30, 30);
5412 unsigned size = INSTR (23, 22);
5413 unsigned vm = INSTR (20, 16);
5414 unsigned vn = INSTR (9, 5);
5415 unsigned vd = INSTR (4, 0);
5418 NYI_assert (29, 24, 0x0E);
5419 NYI_assert (21, 21, 1);
5420 NYI_assert (15, 10, 0x2F);
5422 /* Make copies of the source registers in case vd == vn/vm. */
5423 copy_vn = cpu->fr[vn];
5424 copy_vm = cpu->fr[vm];
5426 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5430 range = full ? 8 : 4;
5431 for (i = 0; i < range; i++)
5433 aarch64_set_vec_u8 (cpu, vd, i,
5434 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5435 aarch64_set_vec_u8 (cpu, vd, i + range,
5436 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5441 range = full ? 4 : 2;
5442 for (i = 0; i < range; i++)
5444 aarch64_set_vec_u16 (cpu, vd, i,
5445 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5446 aarch64_set_vec_u16 (cpu, vd, i + range,
5447 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5452 range = full ? 2 : 1;
5453 for (i = 0; i < range; i++)
5455 aarch64_set_vec_u32 (cpu, vd, i,
5456 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5457 aarch64_set_vec_u32 (cpu, vd, i + range,
5458 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5465 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5466 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5472 do_vec_FABS (sim_cpu *cpu)
5475 instr[30] = half(0)/full(1)
5476 instr[29,23] = 00 1110 1
5477 instr[22] = float(0)/double(1)
5478 instr[21,16] = 10 0000
5479 instr[15,10] = 1111 10
5483 unsigned vn = INSTR (9, 5);
5484 unsigned vd = INSTR (4, 0);
5485 unsigned full = INSTR (30, 30);
5488 NYI_assert (29, 23, 0x1D);
5489 NYI_assert (21, 10, 0x83E);
5491 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5497 for (i = 0; i < 2; i++)
5498 aarch64_set_vec_double (cpu, vd, i,
5499 fabs (aarch64_get_vec_double (cpu, vn, i)));
5503 for (i = 0; i < (full ? 4 : 2); i++)
5504 aarch64_set_vec_float (cpu, vd, i,
5505 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5510 do_vec_FCVTZS (sim_cpu *cpu)
5513 instr[30] = half (0) / all (1)
5514 instr[29,23] = 00 1110 1
5515 instr[22] = single (0) / double (1)
5516 instr[21,10] = 10 0001 1011 10
5520 unsigned rn = INSTR (9, 5);
5521 unsigned rd = INSTR (4, 0);
5522 unsigned full = INSTR (30, 30);
5525 NYI_assert (31, 31, 0);
5526 NYI_assert (29, 23, 0x1D);
5527 NYI_assert (21, 10, 0x86E);
5529 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5535 for (i = 0; i < 2; i++)
5536 aarch64_set_vec_s64 (cpu, rd, i,
5537 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5540 for (i = 0; i < (full ? 4 : 2); i++)
5541 aarch64_set_vec_s32 (cpu, rd, i,
5542 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5546 do_vec_REV64 (sim_cpu *cpu)
5549 instr[30] = full/half
5550 instr[29,24] = 00 1110
5552 instr[21,10] = 10 0000 0000 10
5556 unsigned rn = INSTR (9, 5);
5557 unsigned rd = INSTR (4, 0);
5558 unsigned size = INSTR (23, 22);
5559 unsigned full = INSTR (30, 30);
5563 NYI_assert (29, 24, 0x0E);
5564 NYI_assert (21, 10, 0x802);
5566 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5570 for (i = 0; i < (full ? 16 : 8); i++)
5571 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5575 for (i = 0; i < (full ? 8 : 4); i++)
5576 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5580 for (i = 0; i < (full ? 4 : 2); i++)
5581 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5588 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5590 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5594 do_vec_REV16 (sim_cpu *cpu)
5597 instr[30] = full/half
5598 instr[29,24] = 00 1110
5600 instr[21,10] = 10 0000 0001 10
5604 unsigned rn = INSTR (9, 5);
5605 unsigned rd = INSTR (4, 0);
5606 unsigned size = INSTR (23, 22);
5607 unsigned full = INSTR (30, 30);
5611 NYI_assert (29, 24, 0x0E);
5612 NYI_assert (21, 10, 0x806);
5614 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5618 for (i = 0; i < (full ? 16 : 8); i++)
5619 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5626 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5628 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5632 do_vec_op1 (sim_cpu *cpu)
5635 instr[30] = half/full
5636 instr[29,24] = 00 1110
5639 instr[15,10] = sub-opcode
5642 NYI_assert (29, 24, 0x0E);
5644 if (INSTR (21, 21) == 0)
5646 if (INSTR (23, 22) == 0)
5648 if (INSTR (30, 30) == 1
5649 && INSTR (17, 14) == 0
5650 && INSTR (12, 10) == 7)
5651 return do_vec_ins_2 (cpu);
5653 switch (INSTR (15, 10))
5655 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5656 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5657 case 0x07: do_vec_INS (cpu); return;
5658 case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5659 case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5665 do_vec_TBL (cpu); return;
5669 do_vec_UZP (cpu); return;
5671 case 0x0A: do_vec_TRN (cpu); return;
5675 do_vec_ZIP (cpu); return;
5682 switch (INSTR (13, 10))
5684 case 0x6: do_vec_UZP (cpu); return;
5685 case 0xE: do_vec_ZIP (cpu); return;
5686 case 0xA: do_vec_TRN (cpu); return;
5691 switch (INSTR (15, 10))
5693 case 0x02: do_vec_REV64 (cpu); return;
5694 case 0x06: do_vec_REV16 (cpu); return;
5697 switch (INSTR (23, 21))
5699 case 1: do_vec_AND (cpu); return;
5700 case 3: do_vec_BIC (cpu); return;
5701 case 5: do_vec_ORR (cpu); return;
5702 case 7: do_vec_ORN (cpu); return;
5706 case 0x08: do_vec_sub_long (cpu); return;
5707 case 0x0a: do_vec_XTN (cpu); return;
5708 case 0x11: do_vec_SSHL (cpu); return;
5709 case 0x16: do_vec_CNT (cpu); return;
5710 case 0x19: do_vec_max (cpu); return;
5711 case 0x1B: do_vec_min (cpu); return;
5712 case 0x21: do_vec_add (cpu); return;
5713 case 0x25: do_vec_MLA (cpu); return;
5714 case 0x27: do_vec_mul (cpu); return;
5715 case 0x2F: do_vec_ADDP (cpu); return;
5716 case 0x30: do_vec_mull (cpu); return;
5717 case 0x33: do_vec_FMLA (cpu); return;
5718 case 0x35: do_vec_fadd (cpu); return;
5721 switch (INSTR (20, 16))
5723 case 0x00: do_vec_ABS (cpu); return;
5724 case 0x01: do_vec_FCVTZS (cpu); return;
5725 case 0x11: do_vec_ADDV (cpu); return;
5731 do_vec_Fminmax (cpu); return;
5743 do_vec_compare (cpu); return;
5746 do_vec_FABS (cpu); return;
5754 do_vec_xtl (sim_cpu *cpu)
5757 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5758 instr[28,22] = 0 1111 00
5759 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5760 instr[15,10] = 1010 01
5761 instr[9,5] = V source
5762 instr[4,0] = V dest. */
5764 unsigned vs = INSTR (9, 5);
5765 unsigned vd = INSTR (4, 0);
5766 unsigned i, shift, bias = 0;
5768 NYI_assert (28, 22, 0x3C);
5769 NYI_assert (15, 10, 0x29);
5771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5772 switch (INSTR (30, 29))
5774 case 2: /* SXTL2, SSHLL2. */
5776 case 0: /* SXTL, SSHLL. */
5781 shift = INSTR (20, 16);
5782 /* Get the source values before setting the destination values
5783 in case the source and destination are the same. */
5784 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5785 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5786 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5787 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5789 else if (INSTR (20, 20))
5792 int32_t v1,v2,v3,v4;
5794 shift = INSTR (19, 16);
5796 for (i = 0; i < 4; i++)
5797 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5798 for (i = 0; i < 4; i++)
5799 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5804 NYI_assert (19, 19, 1);
5806 shift = INSTR (18, 16);
5808 for (i = 0; i < 8; i++)
5809 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5810 for (i = 0; i < 8; i++)
5811 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5815 case 3: /* UXTL2, USHLL2. */
5817 case 1: /* UXTL, USHLL. */
5821 shift = INSTR (20, 16);
5822 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5823 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5824 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5825 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5827 else if (INSTR (20, 20))
5830 shift = INSTR (19, 16);
5832 for (i = 0; i < 4; i++)
5833 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5834 for (i = 0; i < 4; i++)
5835 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5840 NYI_assert (19, 19, 1);
5842 shift = INSTR (18, 16);
5844 for (i = 0; i < 8; i++)
5845 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5846 for (i = 0; i < 8; i++)
5847 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5854 do_vec_SHL (sim_cpu *cpu)
5857 instr [30] = half(0)/full(1)
5858 instr [29,23] = 001 1110
5859 instr [22,16] = size and shift amount
5860 instr [15,10] = 01 0101
5862 instr [4, 0] = Vd. */
5865 int full = INSTR (30, 30);
5866 unsigned vs = INSTR (9, 5);
5867 unsigned vd = INSTR (4, 0);
5870 NYI_assert (29, 23, 0x1E);
5871 NYI_assert (15, 10, 0x15);
5873 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5876 shift = INSTR (21, 16);
5881 for (i = 0; i < 2; i++)
5883 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5884 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5892 shift = INSTR (20, 16);
5894 for (i = 0; i < (full ? 4 : 2); i++)
5896 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5897 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5905 shift = INSTR (19, 16);
5907 for (i = 0; i < (full ? 8 : 4); i++)
5909 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5910 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5916 if (INSTR (19, 19) == 0)
5919 shift = INSTR (18, 16);
5921 for (i = 0; i < (full ? 16 : 8); i++)
5923 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5924 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5929 do_vec_SSHR_USHR (sim_cpu *cpu)
5932 instr [30] = half(0)/full(1)
5933 instr [29] = signed(0)/unsigned(1)
5934 instr [28,23] = 0 1111 0
5935 instr [22,16] = size and shift amount
5936 instr [15,10] = 0000 01
5938 instr [4, 0] = Vd. */
5940 int full = INSTR (30, 30);
5941 int sign = ! INSTR (29, 29);
5942 unsigned shift = INSTR (22, 16);
5943 unsigned vs = INSTR (9, 5);
5944 unsigned vd = INSTR (4, 0);
5947 NYI_assert (28, 23, 0x1E);
5948 NYI_assert (15, 10, 0x01);
5950 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5953 shift = 128 - shift;
5959 for (i = 0; i < 2; i++)
5961 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5962 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5965 for (i = 0; i < 2; i++)
5967 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5968 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5979 for (i = 0; i < (full ? 4 : 2); i++)
5981 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5982 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5985 for (i = 0; i < (full ? 4 : 2); i++)
5987 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5988 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5999 for (i = 0; i < (full ? 8 : 4); i++)
6001 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6002 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6005 for (i = 0; i < (full ? 8 : 4); i++)
6007 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6008 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6014 if (INSTR (19, 19) == 0)
6020 for (i = 0; i < (full ? 16 : 8); i++)
6022 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6023 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6026 for (i = 0; i < (full ? 16 : 8); i++)
6028 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6029 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6034 do_vec_MUL_by_element (sim_cpu *cpu)
6037 instr[30] = half/full
6038 instr[29,24] = 00 1111
6049 unsigned full = INSTR (30, 30);
6050 unsigned L = INSTR (21, 21);
6051 unsigned H = INSTR (11, 11);
6052 unsigned vn = INSTR (9, 5);
6053 unsigned vd = INSTR (4, 0);
6054 unsigned size = INSTR (23, 22);
6059 NYI_assert (29, 24, 0x0F);
6060 NYI_assert (15, 12, 0x8);
6061 NYI_assert (10, 10, 0);
6063 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6068 /* 16 bit products. */
6073 index = (H << 2) | (L << 1) | INSTR (20, 20);
6074 vm = INSTR (19, 16);
6075 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6077 for (e = 0; e < (full ? 8 : 4); e ++)
6079 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6080 product = element1 * element2;
6081 aarch64_set_vec_u16 (cpu, vd, e, product);
6088 /* 32 bit products. */
6093 index = (H << 1) | L;
6094 vm = INSTR (20, 16);
6095 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6097 for (e = 0; e < (full ? 4 : 2); e ++)
6099 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6100 product = element1 * element2;
6101 aarch64_set_vec_u32 (cpu, vd, e, product);
6112 do_FMLA_by_element (sim_cpu *cpu)
6115 instr[30] = half/full
6116 instr[29,23] = 00 1111 1
6126 unsigned full = INSTR (30, 30);
6127 unsigned size = INSTR (22, 22);
6128 unsigned L = INSTR (21, 21);
6129 unsigned vm = INSTR (20, 16);
6130 unsigned H = INSTR (11, 11);
6131 unsigned vn = INSTR (9, 5);
6132 unsigned vd = INSTR (4, 0);
6135 NYI_assert (29, 23, 0x1F);
6136 NYI_assert (15, 12, 0x1);
6137 NYI_assert (10, 10, 0);
6139 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6142 double element1, element2;
6147 element2 = aarch64_get_vec_double (cpu, vm, H);
6149 for (e = 0; e < 2; e++)
6151 element1 = aarch64_get_vec_double (cpu, vn, e);
6152 element1 *= element2;
6153 element1 += aarch64_get_vec_double (cpu, vd, e);
6154 aarch64_set_vec_double (cpu, vd, e, element1);
6160 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6162 for (e = 0; e < (full ? 4 : 2); e++)
6164 element1 = aarch64_get_vec_float (cpu, vn, e);
6165 element1 *= element2;
6166 element1 += aarch64_get_vec_float (cpu, vd, e);
6167 aarch64_set_vec_float (cpu, vd, e, element1);
6173 do_vec_op2 (sim_cpu *cpu)
6176 instr[30] = half/full
6177 instr[29,24] = 00 1111
6179 instr[22,16] = element size & index
6180 instr[15,10] = sub-opcode
6184 NYI_assert (29, 24, 0x0F);
6186 if (INSTR (23, 23) != 0)
6188 switch (INSTR (15, 10))
6192 do_FMLA_by_element (cpu);
6197 do_vec_MUL_by_element (cpu);
6206 switch (INSTR (15, 10))
6208 case 0x01: do_vec_SSHR_USHR (cpu); return;
6209 case 0x15: do_vec_SHL (cpu); return;
6211 case 0x22: do_vec_MUL_by_element (cpu); return;
6212 case 0x29: do_vec_xtl (cpu); return;
6219 do_vec_neg (sim_cpu *cpu)
6222 instr[30] = full(1)/half(0)
6223 instr[29,24] = 10 1110
6224 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6225 instr[21,10] = 1000 0010 1110
6229 int full = INSTR (30, 30);
6230 unsigned vs = INSTR (9, 5);
6231 unsigned vd = INSTR (4, 0);
6234 NYI_assert (29, 24, 0x2E);
6235 NYI_assert (21, 10, 0x82E);
6237 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6238 switch (INSTR (23, 22))
6241 for (i = 0; i < (full ? 16 : 8); i++)
6242 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6246 for (i = 0; i < (full ? 8 : 4); i++)
6247 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6251 for (i = 0; i < (full ? 4 : 2); i++)
6252 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6258 for (i = 0; i < 2; i++)
6259 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6265 do_vec_sqrt (sim_cpu *cpu)
6268 instr[30] = full(1)/half(0)
6269 instr[29,23] = 101 1101
6270 instr[22] = single(0)/double(1)
6271 instr[21,10] = 1000 0111 1110
6275 int full = INSTR (30, 30);
6276 unsigned vs = INSTR (9, 5);
6277 unsigned vd = INSTR (4, 0);
6280 NYI_assert (29, 23, 0x5B);
6281 NYI_assert (21, 10, 0x87E);
6283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6284 if (INSTR (22, 22) == 0)
6285 for (i = 0; i < (full ? 4 : 2); i++)
6286 aarch64_set_vec_float (cpu, vd, i,
6287 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6289 for (i = 0; i < 2; i++)
6290 aarch64_set_vec_double (cpu, vd, i,
6291 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6295 do_vec_mls_indexed (sim_cpu *cpu)
6298 instr[30] = half(0)/full(1)
6299 instr[29,24] = 10 1111
6300 instr[23,22] = 16-bit(01)/32-bit(10)
6301 instr[21,20+11] = index (if 16-bit)
6302 instr[21+11] = index (if 32-bit)
6305 instr[11] = part of index
6310 int full = INSTR (30, 30);
6311 unsigned vs = INSTR (9, 5);
6312 unsigned vd = INSTR (4, 0);
6313 unsigned vm = INSTR (20, 16);
6316 NYI_assert (15, 12, 4);
6317 NYI_assert (10, 10, 0);
6319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6320 switch (INSTR (23, 22))
6330 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6331 val = aarch64_get_vec_u16 (cpu, vm, elem);
6333 for (i = 0; i < (full ? 8 : 4); i++)
6334 aarch64_set_vec_u32 (cpu, vd, i,
6335 aarch64_get_vec_u32 (cpu, vd, i) -
6336 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6342 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6343 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6345 for (i = 0; i < (full ? 4 : 2); i++)
6346 aarch64_set_vec_u64 (cpu, vd, i,
6347 aarch64_get_vec_u64 (cpu, vd, i) -
6348 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6360 do_vec_SUB (sim_cpu *cpu)
6363 instr [30] = half(0)/full(1)
6364 instr [29,24] = 10 1110
6365 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6368 instr [15,10] = 10 0001
6370 instr [4, 0] = Vd. */
6372 unsigned full = INSTR (30, 30);
6373 unsigned vm = INSTR (20, 16);
6374 unsigned vn = INSTR (9, 5);
6375 unsigned vd = INSTR (4, 0);
6378 NYI_assert (29, 24, 0x2E);
6379 NYI_assert (21, 21, 1);
6380 NYI_assert (15, 10, 0x21);
6382 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6383 switch (INSTR (23, 22))
6386 for (i = 0; i < (full ? 16 : 8); i++)
6387 aarch64_set_vec_s8 (cpu, vd, i,
6388 aarch64_get_vec_s8 (cpu, vn, i)
6389 - aarch64_get_vec_s8 (cpu, vm, i));
6393 for (i = 0; i < (full ? 8 : 4); i++)
6394 aarch64_set_vec_s16 (cpu, vd, i,
6395 aarch64_get_vec_s16 (cpu, vn, i)
6396 - aarch64_get_vec_s16 (cpu, vm, i));
6400 for (i = 0; i < (full ? 4 : 2); i++)
6401 aarch64_set_vec_s32 (cpu, vd, i,
6402 aarch64_get_vec_s32 (cpu, vn, i)
6403 - aarch64_get_vec_s32 (cpu, vm, i));
6410 for (i = 0; i < 2; i++)
6411 aarch64_set_vec_s64 (cpu, vd, i,
6412 aarch64_get_vec_s64 (cpu, vn, i)
6413 - aarch64_get_vec_s64 (cpu, vm, i));
6419 do_vec_MLS (sim_cpu *cpu)
6422 instr [30] = half(0)/full(1)
6423 instr [29,24] = 10 1110
6424 instr [23,22] = size: byte(00, half(01), word (10)
6427 instr [15,10] = 10 0101
6429 instr [4, 0] = Vd. */
6431 unsigned full = INSTR (30, 30);
6432 unsigned vm = INSTR (20, 16);
6433 unsigned vn = INSTR (9, 5);
6434 unsigned vd = INSTR (4, 0);
6437 NYI_assert (29, 24, 0x2E);
6438 NYI_assert (21, 21, 1);
6439 NYI_assert (15, 10, 0x25);
6441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6442 switch (INSTR (23, 22))
6445 for (i = 0; i < (full ? 16 : 8); i++)
6446 aarch64_set_vec_u8 (cpu, vd, i,
6447 aarch64_get_vec_u8 (cpu, vd, i)
6448 - (aarch64_get_vec_u8 (cpu, vn, i)
6449 * aarch64_get_vec_u8 (cpu, vm, i)));
6453 for (i = 0; i < (full ? 8 : 4); i++)
6454 aarch64_set_vec_u16 (cpu, vd, i,
6455 aarch64_get_vec_u16 (cpu, vd, i)
6456 - (aarch64_get_vec_u16 (cpu, vn, i)
6457 * aarch64_get_vec_u16 (cpu, vm, i)));
6461 for (i = 0; i < (full ? 4 : 2); i++)
6462 aarch64_set_vec_u32 (cpu, vd, i,
6463 aarch64_get_vec_u32 (cpu, vd, i)
6464 - (aarch64_get_vec_u32 (cpu, vn, i)
6465 * aarch64_get_vec_u32 (cpu, vm, i)));
6474 do_vec_FDIV (sim_cpu *cpu)
6477 instr [30] = half(0)/full(1)
6478 instr [29,23] = 10 1110 0
6479 instr [22] = float()/double(1)
6482 instr [15,10] = 1111 11
6484 instr [4, 0] = Vd. */
6486 unsigned full = INSTR (30, 30);
6487 unsigned vm = INSTR (20, 16);
6488 unsigned vn = INSTR (9, 5);
6489 unsigned vd = INSTR (4, 0);
6492 NYI_assert (29, 23, 0x5C);
6493 NYI_assert (21, 21, 1);
6494 NYI_assert (15, 10, 0x3F);
6496 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6502 for (i = 0; i < 2; i++)
6503 aarch64_set_vec_double (cpu, vd, i,
6504 aarch64_get_vec_double (cpu, vn, i)
6505 / aarch64_get_vec_double (cpu, vm, i));
6508 for (i = 0; i < (full ? 4 : 2); i++)
6509 aarch64_set_vec_float (cpu, vd, i,
6510 aarch64_get_vec_float (cpu, vn, i)
6511 / aarch64_get_vec_float (cpu, vm, i));
6515 do_vec_FMUL (sim_cpu *cpu)
6518 instr [30] = half(0)/full(1)
6519 instr [29,23] = 10 1110 0
6520 instr [22] = float(0)/double(1)
6523 instr [15,10] = 1101 11
6525 instr [4, 0] = Vd. */
6527 unsigned full = INSTR (30, 30);
6528 unsigned vm = INSTR (20, 16);
6529 unsigned vn = INSTR (9, 5);
6530 unsigned vd = INSTR (4, 0);
6533 NYI_assert (29, 23, 0x5C);
6534 NYI_assert (21, 21, 1);
6535 NYI_assert (15, 10, 0x37);
6537 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6543 for (i = 0; i < 2; i++)
6544 aarch64_set_vec_double (cpu, vd, i,
6545 aarch64_get_vec_double (cpu, vn, i)
6546 * aarch64_get_vec_double (cpu, vm, i));
6549 for (i = 0; i < (full ? 4 : 2); i++)
6550 aarch64_set_vec_float (cpu, vd, i,
6551 aarch64_get_vec_float (cpu, vn, i)
6552 * aarch64_get_vec_float (cpu, vm, i));
6556 do_vec_FADDP (sim_cpu *cpu)
6559 instr [30] = half(0)/full(1)
6560 instr [29,23] = 10 1110 0
6561 instr [22] = float(0)/double(1)
6564 instr [15,10] = 1101 01
6566 instr [4, 0] = Vd. */
6568 unsigned full = INSTR (30, 30);
6569 unsigned vm = INSTR (20, 16);
6570 unsigned vn = INSTR (9, 5);
6571 unsigned vd = INSTR (4, 0);
6573 NYI_assert (29, 23, 0x5C);
6574 NYI_assert (21, 21, 1);
6575 NYI_assert (15, 10, 0x35);
6577 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6580 /* Extract values before adding them incase vd == vn/vm. */
6581 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6582 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6583 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6584 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6589 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6590 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6594 /* Extract values before adding them incase vd == vn/vm. */
6595 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6596 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6597 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6598 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6602 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6603 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6604 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6605 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6607 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6608 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6609 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6610 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6614 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6615 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6621 do_vec_FSQRT (sim_cpu *cpu)
6624 instr[30] = half(0)/full(1)
6625 instr[29,23] = 10 1110 1
6626 instr[22] = single(0)/double(1)
6627 instr[21,10] = 10 0001 1111 10
6629 instr[4,0] = Vdest. */
6631 unsigned vn = INSTR (9, 5);
6632 unsigned vd = INSTR (4, 0);
6633 unsigned full = INSTR (30, 30);
6636 NYI_assert (29, 23, 0x5D);
6637 NYI_assert (21, 10, 0x87E);
6639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6645 for (i = 0; i < 2; i++)
6646 aarch64_set_vec_double (cpu, vd, i,
6647 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6651 for (i = 0; i < (full ? 4 : 2); i++)
6652 aarch64_set_vec_float (cpu, vd, i,
6653 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6658 do_vec_FNEG (sim_cpu *cpu)
6661 instr[30] = half (0)/full (1)
6662 instr[29,23] = 10 1110 1
6663 instr[22] = single (0)/double (1)
6664 instr[21,10] = 10 0000 1111 10
6666 instr[4,0] = Vdest. */
6668 unsigned vn = INSTR (9, 5);
6669 unsigned vd = INSTR (4, 0);
6670 unsigned full = INSTR (30, 30);
6673 NYI_assert (29, 23, 0x5D);
6674 NYI_assert (21, 10, 0x83E);
6676 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6682 for (i = 0; i < 2; i++)
6683 aarch64_set_vec_double (cpu, vd, i,
6684 - aarch64_get_vec_double (cpu, vn, i));
6688 for (i = 0; i < (full ? 4 : 2); i++)
6689 aarch64_set_vec_float (cpu, vd, i,
6690 - aarch64_get_vec_float (cpu, vn, i));
6695 do_vec_NOT (sim_cpu *cpu)
6698 instr[30] = half (0)/full (1)
6699 instr[29,10] = 10 1110 0010 0000 0101 10
6703 unsigned vn = INSTR (9, 5);
6704 unsigned vd = INSTR (4, 0);
6706 int full = INSTR (30, 30);
6708 NYI_assert (29, 10, 0xB8816);
6710 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6711 for (i = 0; i < (full ? 16 : 8); i++)
6712 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6716 clz (uint64_t val, unsigned size)
6721 mask <<= (size - 1);
6736 do_vec_CLZ (sim_cpu *cpu)
6739 instr[30] = half (0)/full (1)
6740 instr[29,24] = 10 1110
6742 instr[21,10] = 10 0000 0100 10
6746 unsigned vn = INSTR (9, 5);
6747 unsigned vd = INSTR (4, 0);
6749 int full = INSTR (30,30);
6751 NYI_assert (29, 24, 0x2E);
6752 NYI_assert (21, 10, 0x812);
6754 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6755 switch (INSTR (23, 22))
6758 for (i = 0; i < (full ? 16 : 8); i++)
6759 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6762 for (i = 0; i < (full ? 8 : 4); i++)
6763 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6766 for (i = 0; i < (full ? 4 : 2); i++)
6767 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6772 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6773 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6779 do_vec_MOV_element (sim_cpu *cpu)
6781 /* instr[31,21] = 0110 1110 000
6782 instr[20,16] = size & dest index
6784 instr[14,11] = source index
6789 unsigned vs = INSTR (9, 5);
6790 unsigned vd = INSTR (4, 0);
6794 NYI_assert (31, 21, 0x370);
6795 NYI_assert (15, 15, 0);
6796 NYI_assert (10, 10, 1);
6798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6802 src_index = INSTR (14, 11);
6803 dst_index = INSTR (20, 17);
6804 aarch64_set_vec_u8 (cpu, vd, dst_index,
6805 aarch64_get_vec_u8 (cpu, vs, src_index));
6807 else if (INSTR (17, 17))
6810 NYI_assert (11, 11, 0);
6811 src_index = INSTR (14, 12);
6812 dst_index = INSTR (20, 18);
6813 aarch64_set_vec_u16 (cpu, vd, dst_index,
6814 aarch64_get_vec_u16 (cpu, vs, src_index));
6816 else if (INSTR (18, 18))
6819 NYI_assert (12, 11, 0);
6820 src_index = INSTR (14, 13);
6821 dst_index = INSTR (20, 19);
6822 aarch64_set_vec_u32 (cpu, vd, dst_index,
6823 aarch64_get_vec_u32 (cpu, vs, src_index));
6827 NYI_assert (19, 19, 1);
6828 NYI_assert (13, 11, 0);
6829 src_index = INSTR (14, 14);
6830 dst_index = INSTR (20, 20);
6831 aarch64_set_vec_u64 (cpu, vd, dst_index,
6832 aarch64_get_vec_u64 (cpu, vs, src_index));
6837 do_vec_REV32 (sim_cpu *cpu)
6840 instr[30] = full/half
6841 instr[29,24] = 10 1110
6843 instr[21,10] = 10 0000 0000 10
6847 unsigned rn = INSTR (9, 5);
6848 unsigned rd = INSTR (4, 0);
6849 unsigned size = INSTR (23, 22);
6850 unsigned full = INSTR (30, 30);
6854 NYI_assert (29, 24, 0x2E);
6855 NYI_assert (21, 10, 0x802);
6857 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6861 for (i = 0; i < (full ? 16 : 8); i++)
6862 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6866 for (i = 0; i < (full ? 8 : 4); i++)
6867 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6874 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6876 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6880 do_vec_EXT (sim_cpu *cpu)
6883 instr[30] = full/half
6884 instr[29,21] = 10 1110 000
6887 instr[14,11] = source index
6892 unsigned vm = INSTR (20, 16);
6893 unsigned vn = INSTR (9, 5);
6894 unsigned vd = INSTR (4, 0);
6895 unsigned src_index = INSTR (14, 11);
6896 unsigned full = INSTR (30, 30);
6901 NYI_assert (31, 21, 0x370);
6902 NYI_assert (15, 15, 0);
6903 NYI_assert (10, 10, 0);
6905 if (!full && (src_index & 0x8))
6910 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6911 for (i = src_index; i < (full ? 16 : 8); i++)
6912 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6913 for (i = 0; i < src_index; i++)
6914 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6916 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6918 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6922 dexAdvSIMD0 (sim_cpu *cpu)
6924 /* instr [28,25] = 0 111. */
6925 if ( INSTR (15, 10) == 0x07
6929 if (INSTR (31, 21) == 0x075
6930 || INSTR (31, 21) == 0x275)
6932 do_vec_MOV_whole_vector (cpu);
6937 if (INSTR (29, 19) == 0x1E0)
6939 do_vec_MOV_immediate (cpu);
6943 if (INSTR (29, 19) == 0x5E0)
6949 if (INSTR (29, 19) == 0x1C0
6950 || INSTR (29, 19) == 0x1C1)
6952 if (INSTR (15, 10) == 0x03)
6954 do_vec_DUP_scalar_into_vector (cpu);
6959 switch (INSTR (29, 24))
6961 case 0x0E: do_vec_op1 (cpu); return;
6962 case 0x0F: do_vec_op2 (cpu); return;
6965 if (INSTR (21, 21) == 1)
6967 switch (INSTR (15, 10))
6974 switch (INSTR (23, 22))
6976 case 0: do_vec_EOR (cpu); return;
6977 case 1: do_vec_BSL (cpu); return;
6979 case 3: do_vec_bit (cpu); return;
6983 case 0x08: do_vec_sub_long (cpu); return;
6984 case 0x11: do_vec_USHL (cpu); return;
6985 case 0x12: do_vec_CLZ (cpu); return;
6986 case 0x16: do_vec_NOT (cpu); return;
6987 case 0x19: do_vec_max (cpu); return;
6988 case 0x1B: do_vec_min (cpu); return;
6989 case 0x21: do_vec_SUB (cpu); return;
6990 case 0x25: do_vec_MLS (cpu); return;
6991 case 0x31: do_vec_FminmaxNMP (cpu); return;
6992 case 0x35: do_vec_FADDP (cpu); return;
6993 case 0x37: do_vec_FMUL (cpu); return;
6994 case 0x3F: do_vec_FDIV (cpu); return;
6997 switch (INSTR (20, 16))
6999 case 0x00: do_vec_FNEG (cpu); return;
7000 case 0x01: do_vec_FSQRT (cpu); return;
7014 do_vec_compare (cpu); return;
7021 if (INSTR (31, 21) == 0x370)
7024 do_vec_MOV_element (cpu);
7030 switch (INSTR (21, 10))
7032 case 0x82E: do_vec_neg (cpu); return;
7033 case 0x87E: do_vec_sqrt (cpu); return;
7035 if (INSTR (15, 10) == 0x30)
7045 switch (INSTR (15, 10))
7047 case 0x01: do_vec_SSHR_USHR (cpu); return;
7049 case 0x12: do_vec_mls_indexed (cpu); return;
7050 case 0x29: do_vec_xtl (cpu); return;
7064 /* Float multiply add. */
7066 fmadds (sim_cpu *cpu)
7068 unsigned sa = INSTR (14, 10);
7069 unsigned sm = INSTR (20, 16);
7070 unsigned sn = INSTR ( 9, 5);
7071 unsigned sd = INSTR ( 4, 0);
7073 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7074 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7075 + aarch64_get_FP_float (cpu, sn)
7076 * aarch64_get_FP_float (cpu, sm));
7079 /* Double multiply add. */
7081 fmaddd (sim_cpu *cpu)
7083 unsigned sa = INSTR (14, 10);
7084 unsigned sm = INSTR (20, 16);
7085 unsigned sn = INSTR ( 9, 5);
7086 unsigned sd = INSTR ( 4, 0);
7088 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7089 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7090 + aarch64_get_FP_double (cpu, sn)
7091 * aarch64_get_FP_double (cpu, sm));
7094 /* Float multiply subtract. */
7096 fmsubs (sim_cpu *cpu)
7098 unsigned sa = INSTR (14, 10);
7099 unsigned sm = INSTR (20, 16);
7100 unsigned sn = INSTR ( 9, 5);
7101 unsigned sd = INSTR ( 4, 0);
7103 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7104 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7105 - aarch64_get_FP_float (cpu, sn)
7106 * aarch64_get_FP_float (cpu, sm));
7109 /* Double multiply subtract. */
7111 fmsubd (sim_cpu *cpu)
7113 unsigned sa = INSTR (14, 10);
7114 unsigned sm = INSTR (20, 16);
7115 unsigned sn = INSTR ( 9, 5);
7116 unsigned sd = INSTR ( 4, 0);
7118 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7119 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7120 - aarch64_get_FP_double (cpu, sn)
7121 * aarch64_get_FP_double (cpu, sm));
7124 /* Float negative multiply add. */
7126 fnmadds (sim_cpu *cpu)
7128 unsigned sa = INSTR (14, 10);
7129 unsigned sm = INSTR (20, 16);
7130 unsigned sn = INSTR ( 9, 5);
7131 unsigned sd = INSTR ( 4, 0);
7133 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7134 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7135 + (- aarch64_get_FP_float (cpu, sn))
7136 * aarch64_get_FP_float (cpu, sm));
7139 /* Double negative multiply add. */
7141 fnmaddd (sim_cpu *cpu)
7143 unsigned sa = INSTR (14, 10);
7144 unsigned sm = INSTR (20, 16);
7145 unsigned sn = INSTR ( 9, 5);
7146 unsigned sd = INSTR ( 4, 0);
7148 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7149 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7150 + (- aarch64_get_FP_double (cpu, sn))
7151 * aarch64_get_FP_double (cpu, sm));
7154 /* Float negative multiply subtract. */
7156 fnmsubs (sim_cpu *cpu)
7158 unsigned sa = INSTR (14, 10);
7159 unsigned sm = INSTR (20, 16);
7160 unsigned sn = INSTR ( 9, 5);
7161 unsigned sd = INSTR ( 4, 0);
7163 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7164 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7165 + aarch64_get_FP_float (cpu, sn)
7166 * aarch64_get_FP_float (cpu, sm));
7169 /* Double negative multiply subtract. */
7171 fnmsubd (sim_cpu *cpu)
7173 unsigned sa = INSTR (14, 10);
7174 unsigned sm = INSTR (20, 16);
7175 unsigned sn = INSTR ( 9, 5);
7176 unsigned sd = INSTR ( 4, 0);
7178 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7179 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7180 + aarch64_get_FP_double (cpu, sn)
7181 * aarch64_get_FP_double (cpu, sm));
7185 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7187 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7189 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7192 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7193 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7194 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7196 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7197 /* dispatch on combined type:o1:o2. */
7198 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7205 case 0: fmadds (cpu); return;
7206 case 1: fmsubs (cpu); return;
7207 case 2: fnmadds (cpu); return;
7208 case 3: fnmsubs (cpu); return;
7209 case 4: fmaddd (cpu); return;
7210 case 5: fmsubd (cpu); return;
7211 case 6: fnmaddd (cpu); return;
7212 case 7: fnmsubd (cpu); return;
7214 /* type > 1 is currently unallocated. */
7220 dexSimpleFPFixedConvert (sim_cpu *cpu)
7226 dexSimpleFPCondCompare (sim_cpu *cpu)
7228 /* instr [31,23] = 0001 1110 0
7232 instr [15,12] = condition
7236 instr [3,0] = nzcv */
7238 unsigned rm = INSTR (20, 16);
7239 unsigned rn = INSTR (9, 5);
7241 NYI_assert (31, 23, 0x3C);
7242 NYI_assert (11, 10, 0x1);
7243 NYI_assert (4, 4, 0);
7245 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7246 if (! testConditionCode (cpu, INSTR (15, 12)))
7248 aarch64_set_CPSR (cpu, INSTR (3, 0));
7254 /* Double precision. */
7255 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7256 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7258 /* FIXME: Check for NaNs. */
7260 aarch64_set_CPSR (cpu, (Z | C));
7261 else if (val1 < val2)
7262 aarch64_set_CPSR (cpu, N);
7263 else /* val1 > val2 */
7264 aarch64_set_CPSR (cpu, C);
7268 /* Single precision. */
7269 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7270 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7272 /* FIXME: Check for NaNs. */
7274 aarch64_set_CPSR (cpu, (Z | C));
7275 else if (val1 < val2)
7276 aarch64_set_CPSR (cpu, N);
7277 else /* val1 > val2 */
7278 aarch64_set_CPSR (cpu, C);
7286 fadds (sim_cpu *cpu)
7288 unsigned sm = INSTR (20, 16);
7289 unsigned sn = INSTR ( 9, 5);
7290 unsigned sd = INSTR ( 4, 0);
7292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7293 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7294 + aarch64_get_FP_float (cpu, sm));
7299 faddd (sim_cpu *cpu)
7301 unsigned sm = INSTR (20, 16);
7302 unsigned sn = INSTR ( 9, 5);
7303 unsigned sd = INSTR ( 4, 0);
7305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7306 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7307 + aarch64_get_FP_double (cpu, sm));
7312 fdivs (sim_cpu *cpu)
7314 unsigned sm = INSTR (20, 16);
7315 unsigned sn = INSTR ( 9, 5);
7316 unsigned sd = INSTR ( 4, 0);
7318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7319 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7320 / aarch64_get_FP_float (cpu, sm));
7323 /* Double divide. */
7325 fdivd (sim_cpu *cpu)
7327 unsigned sm = INSTR (20, 16);
7328 unsigned sn = INSTR ( 9, 5);
7329 unsigned sd = INSTR ( 4, 0);
7331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7332 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7333 / aarch64_get_FP_double (cpu, sm));
7336 /* Float multiply. */
7338 fmuls (sim_cpu *cpu)
7340 unsigned sm = INSTR (20, 16);
7341 unsigned sn = INSTR ( 9, 5);
7342 unsigned sd = INSTR ( 4, 0);
7344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7345 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7346 * aarch64_get_FP_float (cpu, sm));
7349 /* Double multiply. */
7351 fmuld (sim_cpu *cpu)
7353 unsigned sm = INSTR (20, 16);
7354 unsigned sn = INSTR ( 9, 5);
7355 unsigned sd = INSTR ( 4, 0);
7357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7358 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7359 * aarch64_get_FP_double (cpu, sm));
7362 /* Float negate and multiply. */
7364 fnmuls (sim_cpu *cpu)
7366 unsigned sm = INSTR (20, 16);
7367 unsigned sn = INSTR ( 9, 5);
7368 unsigned sd = INSTR ( 4, 0);
7370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7371 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7372 * aarch64_get_FP_float (cpu, sm)));
7375 /* Double negate and multiply. */
7377 fnmuld (sim_cpu *cpu)
7379 unsigned sm = INSTR (20, 16);
7380 unsigned sn = INSTR ( 9, 5);
7381 unsigned sd = INSTR ( 4, 0);
7383 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7384 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7385 * aarch64_get_FP_double (cpu, sm)));
7388 /* Float subtract. */
7390 fsubs (sim_cpu *cpu)
7392 unsigned sm = INSTR (20, 16);
7393 unsigned sn = INSTR ( 9, 5);
7394 unsigned sd = INSTR ( 4, 0);
7396 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7397 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7398 - aarch64_get_FP_float (cpu, sm));
7401 /* Double subtract. */
7403 fsubd (sim_cpu *cpu)
7405 unsigned sm = INSTR (20, 16);
7406 unsigned sn = INSTR ( 9, 5);
7407 unsigned sd = INSTR ( 4, 0);
7409 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7410 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7411 - aarch64_get_FP_double (cpu, sm));
7415 do_FMINNM (sim_cpu *cpu)
7417 /* instr[31,23] = 0 0011 1100
7418 instr[22] = float(0)/double(1)
7421 instr[15,10] = 01 1110
7425 unsigned sm = INSTR (20, 16);
7426 unsigned sn = INSTR ( 9, 5);
7427 unsigned sd = INSTR ( 4, 0);
7429 NYI_assert (31, 23, 0x03C);
7430 NYI_assert (15, 10, 0x1E);
7432 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7434 aarch64_set_FP_double (cpu, sd,
7435 dminnm (aarch64_get_FP_double (cpu, sn),
7436 aarch64_get_FP_double (cpu, sm)));
7438 aarch64_set_FP_float (cpu, sd,
7439 fminnm (aarch64_get_FP_float (cpu, sn),
7440 aarch64_get_FP_float (cpu, sm)));
7444 do_FMAXNM (sim_cpu *cpu)
7446 /* instr[31,23] = 0 0011 1100
7447 instr[22] = float(0)/double(1)
7450 instr[15,10] = 01 1010
7454 unsigned sm = INSTR (20, 16);
7455 unsigned sn = INSTR ( 9, 5);
7456 unsigned sd = INSTR ( 4, 0);
7458 NYI_assert (31, 23, 0x03C);
7459 NYI_assert (15, 10, 0x1A);
7461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7463 aarch64_set_FP_double (cpu, sd,
7464 dmaxnm (aarch64_get_FP_double (cpu, sn),
7465 aarch64_get_FP_double (cpu, sm)));
7467 aarch64_set_FP_float (cpu, sd,
7468 fmaxnm (aarch64_get_FP_float (cpu, sn),
7469 aarch64_get_FP_float (cpu, sm)));
7473 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7475 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7477 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7480 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7483 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7484 0010 ==> FADD, 0011 ==> FSUB,
7485 0100 ==> FMAX, 0101 ==> FMIN
7486 0110 ==> FMAXNM, 0111 ==> FMINNM
7487 1000 ==> FNMUL, ow ==> UNALLOC
7492 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7493 uint32_t type = INSTR (23, 22);
7494 /* Dispatch on opcode. */
7495 uint32_t dispatch = INSTR (15, 12);
7506 case 0: fmuld (cpu); return;
7507 case 1: fdivd (cpu); return;
7508 case 2: faddd (cpu); return;
7509 case 3: fsubd (cpu); return;
7510 case 6: do_FMAXNM (cpu); return;
7511 case 7: do_FMINNM (cpu); return;
7512 case 8: fnmuld (cpu); return;
7514 /* Have not yet implemented fmax and fmin. */
7522 else /* type == 0 => floats. */
7525 case 0: fmuls (cpu); return;
7526 case 1: fdivs (cpu); return;
7527 case 2: fadds (cpu); return;
7528 case 3: fsubs (cpu); return;
7529 case 6: do_FMAXNM (cpu); return;
7530 case 7: do_FMINNM (cpu); return;
7531 case 8: fnmuls (cpu); return;
7543 dexSimpleFPCondSelect (sim_cpu *cpu)
7546 instr[31,23] = 0 0011 1100
7547 instr[22] = 0=>single 1=>double
7554 unsigned sm = INSTR (20, 16);
7555 unsigned sn = INSTR ( 9, 5);
7556 unsigned sd = INSTR ( 4, 0);
7557 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7559 NYI_assert (31, 23, 0x03C);
7560 NYI_assert (11, 10, 0x3);
7562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7564 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7565 : aarch64_get_FP_double (cpu, sm)));
7567 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7568 : aarch64_get_FP_float (cpu, sm)));
7571 /* Store 32 bit unscaled signed 9 bit. */
7573 fsturs (sim_cpu *cpu, int32_t offset)
7575 unsigned int rn = INSTR (9, 5);
7576 unsigned int st = INSTR (4, 0);
7578 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7579 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7580 aarch64_get_vec_u32 (cpu, st, 0));
7583 /* Store 64 bit unscaled signed 9 bit. */
7585 fsturd (sim_cpu *cpu, int32_t offset)
7587 unsigned int rn = INSTR (9, 5);
7588 unsigned int st = INSTR (4, 0);
7590 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7591 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7592 aarch64_get_vec_u64 (cpu, st, 0));
7595 /* Store 128 bit unscaled signed 9 bit. */
7597 fsturq (sim_cpu *cpu, int32_t offset)
7599 unsigned int rn = INSTR (9, 5);
7600 unsigned int st = INSTR (4, 0);
7603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7604 aarch64_get_FP_long_double (cpu, st, & a);
7605 aarch64_set_mem_long_double (cpu,
7606 aarch64_get_reg_u64 (cpu, rn, 1)
7610 /* TODO FP move register. */
7612 /* 32 bit fp to fp move register. */
7614 ffmovs (sim_cpu *cpu)
7616 unsigned int rn = INSTR (9, 5);
7617 unsigned int st = INSTR (4, 0);
7619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7620 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7623 /* 64 bit fp to fp move register. */
7625 ffmovd (sim_cpu *cpu)
7627 unsigned int rn = INSTR (9, 5);
7628 unsigned int st = INSTR (4, 0);
7630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7631 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7634 /* 32 bit GReg to Vec move register. */
7636 fgmovs (sim_cpu *cpu)
7638 unsigned int rn = INSTR (9, 5);
7639 unsigned int st = INSTR (4, 0);
7641 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7642 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7645 /* 64 bit g to fp move register. */
7647 fgmovd (sim_cpu *cpu)
7649 unsigned int rn = INSTR (9, 5);
7650 unsigned int st = INSTR (4, 0);
7652 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7653 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7656 /* 32 bit fp to g move register. */
7658 gfmovs (sim_cpu *cpu)
7660 unsigned int rn = INSTR (9, 5);
7661 unsigned int st = INSTR (4, 0);
7663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7664 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7667 /* 64 bit fp to g move register. */
7669 gfmovd (sim_cpu *cpu)
7671 unsigned int rn = INSTR (9, 5);
7672 unsigned int st = INSTR (4, 0);
7674 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7675 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7678 /* FP move immediate
7680 These install an immediate 8 bit value in the target register
7681 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7685 fmovs (sim_cpu *cpu)
7687 unsigned int sd = INSTR (4, 0);
7688 uint32_t imm = INSTR (20, 13);
7689 float f = fp_immediate_for_encoding_32 (imm);
7691 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7692 aarch64_set_FP_float (cpu, sd, f);
7696 fmovd (sim_cpu *cpu)
7698 unsigned int sd = INSTR (4, 0);
7699 uint32_t imm = INSTR (20, 13);
7700 double d = fp_immediate_for_encoding_64 (imm);
7702 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7703 aarch64_set_FP_double (cpu, sd, d);
7707 dexSimpleFPImmediate (sim_cpu *cpu)
7709 /* instr[31,23] == 00111100
7710 instr[22] == type : single(0)/double(1)
7712 instr[20,13] == imm8
7714 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7716 uint32_t imm5 = INSTR (9, 5);
7718 NYI_assert (31, 23, 0x3C);
7729 /* TODO specific decode and execute for group Load Store. */
7731 /* TODO FP load/store single register (unscaled offset). */
7733 /* TODO load 8 bit unscaled signed 9 bit. */
7734 /* TODO load 16 bit unscaled signed 9 bit. */
7736 /* Load 32 bit unscaled signed 9 bit. */
7738 fldurs (sim_cpu *cpu, int32_t offset)
7740 unsigned int rn = INSTR (9, 5);
7741 unsigned int st = INSTR (4, 0);
7743 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7744 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7745 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7748 /* Load 64 bit unscaled signed 9 bit. */
7750 fldurd (sim_cpu *cpu, int32_t offset)
7752 unsigned int rn = INSTR (9, 5);
7753 unsigned int st = INSTR (4, 0);
7755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7756 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7757 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7760 /* Load 128 bit unscaled signed 9 bit. */
7762 fldurq (sim_cpu *cpu, int32_t offset)
7764 unsigned int rn = INSTR (9, 5);
7765 unsigned int st = INSTR (4, 0);
7767 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7769 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7770 aarch64_get_mem_long_double (cpu, addr, & a);
7771 aarch64_set_FP_long_double (cpu, st, a);
7774 /* TODO store 8 bit unscaled signed 9 bit. */
7775 /* TODO store 16 bit unscaled signed 9 bit. */
7780 /* Float absolute value. */
7782 fabss (sim_cpu *cpu)
7784 unsigned sn = INSTR (9, 5);
7785 unsigned sd = INSTR (4, 0);
7786 float value = aarch64_get_FP_float (cpu, sn);
7788 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7789 aarch64_set_FP_float (cpu, sd, fabsf (value));
7792 /* Double absolute value. */
7794 fabcpu (sim_cpu *cpu)
7796 unsigned sn = INSTR (9, 5);
7797 unsigned sd = INSTR (4, 0);
7798 double value = aarch64_get_FP_double (cpu, sn);
7800 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7801 aarch64_set_FP_double (cpu, sd, fabs (value));
7804 /* Float negative value. */
7806 fnegs (sim_cpu *cpu)
7808 unsigned sn = INSTR (9, 5);
7809 unsigned sd = INSTR (4, 0);
7811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7812 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7815 /* Double negative value. */
7817 fnegd (sim_cpu *cpu)
7819 unsigned sn = INSTR (9, 5);
7820 unsigned sd = INSTR (4, 0);
7822 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7823 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7826 /* Float square root. */
7828 fsqrts (sim_cpu *cpu)
7830 unsigned sn = INSTR (9, 5);
7831 unsigned sd = INSTR (4, 0);
7833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7834 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7837 /* Double square root. */
7839 fsqrtd (sim_cpu *cpu)
7841 unsigned sn = INSTR (9, 5);
7842 unsigned sd = INSTR (4, 0);
7844 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7845 aarch64_set_FP_double (cpu, sd,
7846 sqrt (aarch64_get_FP_double (cpu, sn)));
7849 /* Convert double to float. */
7851 fcvtds (sim_cpu *cpu)
7853 unsigned sn = INSTR (9, 5);
7854 unsigned sd = INSTR (4, 0);
7856 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7857 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7860 /* Convert float to double. */
7862 fcvtcpu (sim_cpu *cpu)
7864 unsigned sn = INSTR (9, 5);
7865 unsigned sd = INSTR (4, 0);
7867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7868 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7872 do_FRINT (sim_cpu *cpu)
7874 /* instr[31,23] = 0001 1110 0
7875 instr[22] = single(0)/double(1)
7877 instr[17,15] = rounding mode
7878 instr[14,10] = 10000
7880 instr[4,0] = dest */
7883 unsigned rs = INSTR (9, 5);
7884 unsigned rd = INSTR (4, 0);
7885 unsigned int rmode = INSTR (17, 15);
7887 NYI_assert (31, 23, 0x03C);
7888 NYI_assert (21, 18, 0x9);
7889 NYI_assert (14, 10, 0x10);
7891 if (rmode == 6 || rmode == 7)
7892 /* FIXME: Add support for rmode == 6 exactness check. */
7893 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7898 double val = aarch64_get_FP_double (cpu, rs);
7902 case 0: /* mode N: nearest or even. */
7904 double rval = round (val);
7906 if (val - rval == 0.5)
7908 if (((rval / 2.0) * 2.0) != rval)
7912 aarch64_set_FP_double (cpu, rd, round (val));
7916 case 1: /* mode P: towards +inf. */
7918 aarch64_set_FP_double (cpu, rd, trunc (val));
7920 aarch64_set_FP_double (cpu, rd, round (val));
7923 case 2: /* mode M: towards -inf. */
7925 aarch64_set_FP_double (cpu, rd, round (val));
7927 aarch64_set_FP_double (cpu, rd, trunc (val));
7930 case 3: /* mode Z: towards 0. */
7931 aarch64_set_FP_double (cpu, rd, trunc (val));
7934 case 4: /* mode A: away from 0. */
7935 aarch64_set_FP_double (cpu, rd, round (val));
7938 case 6: /* mode X: use FPCR with exactness check. */
7939 case 7: /* mode I: use FPCR mode. */
7947 val = aarch64_get_FP_float (cpu, rs);
7951 case 0: /* mode N: nearest or even. */
7953 float rval = roundf (val);
7955 if (val - rval == 0.5)
7957 if (((rval / 2.0) * 2.0) != rval)
7961 aarch64_set_FP_float (cpu, rd, rval);
7965 case 1: /* mode P: towards +inf. */
7967 aarch64_set_FP_float (cpu, rd, truncf (val));
7969 aarch64_set_FP_float (cpu, rd, roundf (val));
7972 case 2: /* mode M: towards -inf. */
7974 aarch64_set_FP_float (cpu, rd, truncf (val));
7976 aarch64_set_FP_float (cpu, rd, roundf (val));
7979 case 3: /* mode Z: towards 0. */
7980 aarch64_set_FP_float (cpu, rd, truncf (val));
7983 case 4: /* mode A: away from 0. */
7984 aarch64_set_FP_float (cpu, rd, roundf (val));
7987 case 6: /* mode X: use FPCR with exactness check. */
7988 case 7: /* mode I: use FPCR mode. */
7996 /* Convert half to float. */
7998 do_FCVT_half_to_single (sim_cpu *cpu)
8000 unsigned rn = INSTR (9, 5);
8001 unsigned rd = INSTR (4, 0);
8003 NYI_assert (31, 10, 0x7B890);
8005 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8006 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
8009 /* Convert half to double. */
8011 do_FCVT_half_to_double (sim_cpu *cpu)
8013 unsigned rn = INSTR (9, 5);
8014 unsigned rd = INSTR (4, 0);
8016 NYI_assert (31, 10, 0x7B8B0);
8018 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8019 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
8023 do_FCVT_single_to_half (sim_cpu *cpu)
8025 unsigned rn = INSTR (9, 5);
8026 unsigned rd = INSTR (4, 0);
8028 NYI_assert (31, 10, 0x788F0);
8030 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8031 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
8034 /* Convert double to half. */
8036 do_FCVT_double_to_half (sim_cpu *cpu)
8038 unsigned rn = INSTR (9, 5);
8039 unsigned rd = INSTR (4, 0);
8041 NYI_assert (31, 10, 0x798F0);
8043 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8044 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
8048 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8050 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
8052 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8055 instr[23,22] ==> type : 00 ==> source is single,
8056 01 ==> source is double
8058 11 ==> UNALLOC or source is half
8060 instr[20,15] ==> opcode : with type 00 or 01
8061 000000 ==> FMOV, 000001 ==> FABS,
8062 000010 ==> FNEG, 000011 ==> FSQRT,
8063 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8064 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8065 001000 ==> FRINTN, 001001 ==> FRINTP,
8066 001010 ==> FRINTM, 001011 ==> FRINTZ,
8067 001100 ==> FRINTA, 001101 ==> UNALLOC
8068 001110 ==> FRINTX, 001111 ==> FRINTI
8070 000100 ==> FCVT (half-to-single)
8071 000101 ==> FCVT (half-to-double)
8072 instr[14,10] = 10000. */
8074 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8075 uint32_t type = INSTR (23, 22);
8076 uint32_t opcode = INSTR (20, 15);
8084 do_FCVT_half_to_single (cpu);
8085 else if (opcode == 5)
8086 do_FCVT_half_to_double (cpu);
8138 case 8: /* FRINTN etc. */
8150 do_FCVT_double_to_half (cpu);
8152 do_FCVT_single_to_half (cpu);
8163 /* 32 bit signed int to float. */
8165 scvtf32 (sim_cpu *cpu)
8167 unsigned rn = INSTR (9, 5);
8168 unsigned sd = INSTR (4, 0);
8170 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8171 aarch64_set_FP_float
8172 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8175 /* signed int to float. */
8177 scvtf (sim_cpu *cpu)
8179 unsigned rn = INSTR (9, 5);
8180 unsigned sd = INSTR (4, 0);
8182 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8183 aarch64_set_FP_float
8184 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8187 /* 32 bit signed int to double. */
8189 scvtd32 (sim_cpu *cpu)
8191 unsigned rn = INSTR (9, 5);
8192 unsigned sd = INSTR (4, 0);
8194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8195 aarch64_set_FP_double
8196 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8199 /* signed int to double. */
8201 scvtd (sim_cpu *cpu)
8203 unsigned rn = INSTR (9, 5);
8204 unsigned sd = INSTR (4, 0);
8206 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8207 aarch64_set_FP_double
8208 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8211 static const float FLOAT_INT_MAX = (float) INT_MAX;
8212 static const float FLOAT_INT_MIN = (float) INT_MIN;
8213 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8214 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8215 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8216 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8217 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8218 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8222 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8223 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8224 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8225 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8226 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8227 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8228 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8229 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8231 /* Check for FP exception conditions:
8234 Out of Range raises IO and IX and saturates value
8235 Denormal raises ID and IX and sets to zero. */
8236 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8239 switch (fpclassify (F)) \
8243 aarch64_set_FPSR (cpu, IO); \
8245 VALUE = ITYPE##_MAX; \
8247 VALUE = ITYPE##_MIN; \
8251 if (F >= FTYPE##_##ITYPE##_MAX) \
8253 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8254 VALUE = ITYPE##_MAX; \
8256 else if (F <= FTYPE##_##ITYPE##_MIN) \
8258 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8259 VALUE = ITYPE##_MIN; \
8263 case FP_SUBNORMAL: \
8264 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8276 /* 32 bit convert float to signed int truncate towards zero. */
8278 fcvtszs32 (sim_cpu *cpu)
8280 unsigned sn = INSTR (9, 5);
8281 unsigned rd = INSTR (4, 0);
8282 /* TODO : check that this rounds toward zero. */
8283 float f = aarch64_get_FP_float (cpu, sn);
8284 int32_t value = (int32_t) f;
8286 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8288 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8289 /* Avoid sign extension to 64 bit. */
8290 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8293 /* 64 bit convert float to signed int truncate towards zero. */
8295 fcvtszs (sim_cpu *cpu)
8297 unsigned sn = INSTR (9, 5);
8298 unsigned rd = INSTR (4, 0);
8299 float f = aarch64_get_FP_float (cpu, sn);
8300 int64_t value = (int64_t) f;
8302 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8304 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8305 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8308 /* 32 bit convert double to signed int truncate towards zero. */
8310 fcvtszd32 (sim_cpu *cpu)
8312 unsigned sn = INSTR (9, 5);
8313 unsigned rd = INSTR (4, 0);
8314 /* TODO : check that this rounds toward zero. */
8315 double d = aarch64_get_FP_double (cpu, sn);
8316 int32_t value = (int32_t) d;
8318 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8321 /* Avoid sign extension to 64 bit. */
8322 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8325 /* 64 bit convert double to signed int truncate towards zero. */
8327 fcvtszd (sim_cpu *cpu)
8329 unsigned sn = INSTR (9, 5);
8330 unsigned rd = INSTR (4, 0);
8331 /* TODO : check that this rounds toward zero. */
8332 double d = aarch64_get_FP_double (cpu, sn);
8335 value = (int64_t) d;
8337 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8339 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8340 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8344 do_fcvtzu (sim_cpu *cpu)
8346 /* instr[31] = size: 32-bit (0), 64-bit (1)
8347 instr[30,23] = 00111100
8348 instr[22] = type: single (0)/ double (1)
8349 instr[21] = enable (0)/disable(1) precision
8350 instr[20,16] = 11001
8351 instr[15,10] = precision
8355 unsigned rs = INSTR (9, 5);
8356 unsigned rd = INSTR (4, 0);
8358 NYI_assert (30, 23, 0x3C);
8359 NYI_assert (20, 16, 0x19);
8361 if (INSTR (21, 21) != 1)
8362 /* Convert to fixed point. */
8365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8368 /* Convert to unsigned 64-bit integer. */
8371 double d = aarch64_get_FP_double (cpu, rs);
8372 uint64_t value = (uint64_t) d;
8374 /* Do not raise an exception if we have reached ULONG_MAX. */
8375 if (value != (1UL << 63))
8376 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8378 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8382 float f = aarch64_get_FP_float (cpu, rs);
8383 uint64_t value = (uint64_t) f;
8385 /* Do not raise an exception if we have reached ULONG_MAX. */
8386 if (value != (1UL << 63))
8387 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8389 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8396 /* Convert to unsigned 32-bit integer. */
8399 double d = aarch64_get_FP_double (cpu, rs);
8401 value = (uint32_t) d;
8402 /* Do not raise an exception if we have reached UINT_MAX. */
8403 if (value != (1UL << 31))
8404 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8408 float f = aarch64_get_FP_float (cpu, rs);
8410 value = (uint32_t) f;
8411 /* Do not raise an exception if we have reached UINT_MAX. */
8412 if (value != (1UL << 31))
8413 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8416 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8421 do_UCVTF (sim_cpu *cpu)
8423 /* instr[31] = size: 32-bit (0), 64-bit (1)
8424 instr[30,23] = 001 1110 0
8425 instr[22] = type: single (0)/ double (1)
8426 instr[21] = enable (0)/disable(1) precision
8427 instr[20,16] = 0 0011
8428 instr[15,10] = precision
8432 unsigned rs = INSTR (9, 5);
8433 unsigned rd = INSTR (4, 0);
8435 NYI_assert (30, 23, 0x3C);
8436 NYI_assert (20, 16, 0x03);
8438 if (INSTR (21, 21) != 1)
8441 /* FIXME: Add exception raising. */
8442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8445 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8448 aarch64_set_FP_double (cpu, rd, (double) value);
8450 aarch64_set_FP_float (cpu, rd, (float) value);
8454 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8457 aarch64_set_FP_double (cpu, rd, (double) value);
8459 aarch64_set_FP_float (cpu, rd, (float) value);
8464 float_vector_move (sim_cpu *cpu)
8466 /* instr[31,17] == 100 1111 0101 0111
8467 instr[16] ==> direction 0=> to GR, 1=> from GR
8469 instr[9,5] ==> source
8470 instr[4,0] ==> dest. */
8472 unsigned rn = INSTR (9, 5);
8473 unsigned rd = INSTR (4, 0);
8475 NYI_assert (31, 17, 0x4F57);
8477 if (INSTR (15, 10) != 0)
8480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8482 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8484 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8488 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8490 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8492 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8495 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8497 instr[20,19] = rmode
8498 instr[18,16] = opcode
8499 instr[15,10] = 10 0000 */
8501 uint32_t rmode_opcode;
8507 if (INSTR (31, 17) == 0x4F57)
8509 float_vector_move (cpu);
8513 size = INSTR (31, 31);
8518 type = INSTR (23, 22);
8522 rmode_opcode = INSTR (20, 16);
8523 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8525 switch (rmode_opcode)
8527 case 2: /* SCVTF. */
8530 case 0: scvtf32 (cpu); return;
8531 case 1: scvtd32 (cpu); return;
8532 case 2: scvtf (cpu); return;
8533 case 3: scvtd (cpu); return;
8536 case 6: /* FMOV GR, Vec. */
8539 case 0: gfmovs (cpu); return;
8540 case 3: gfmovd (cpu); return;
8541 default: HALT_UNALLOC;
8544 case 7: /* FMOV vec, GR. */
8547 case 0: fgmovs (cpu); return;
8548 case 3: fgmovd (cpu); return;
8549 default: HALT_UNALLOC;
8552 case 24: /* FCVTZS. */
8555 case 0: fcvtszs32 (cpu); return;
8556 case 1: fcvtszd32 (cpu); return;
8557 case 2: fcvtszs (cpu); return;
8558 case 3: fcvtszd (cpu); return;
8561 case 25: do_fcvtzu (cpu); return;
8562 case 3: do_UCVTF (cpu); return;
8564 case 0: /* FCVTNS. */
8565 case 1: /* FCVTNU. */
8566 case 4: /* FCVTAS. */
8567 case 5: /* FCVTAU. */
8568 case 8: /* FCVPTS. */
8569 case 9: /* FCVTPU. */
8570 case 16: /* FCVTMS. */
8571 case 17: /* FCVTMU. */
8578 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8582 /* FIXME: Add exception raising. */
8583 if (isnan (fvalue1) || isnan (fvalue2))
8585 else if (isinf (fvalue1) && isinf (fvalue2))
8587 /* Subtracting two infinities may give a NaN. We only need to compare
8588 the signs, which we can get from isinf. */
8589 int result = isinf (fvalue1) - isinf (fvalue2);
8593 else if (result < 0)
8595 else /* (result > 0). */
8600 float result = fvalue1 - fvalue2;
8604 else if (result < 0)
8606 else /* (result > 0). */
8610 aarch64_set_CPSR (cpu, flags);
8614 fcmps (sim_cpu *cpu)
8616 unsigned sm = INSTR (20, 16);
8617 unsigned sn = INSTR ( 9, 5);
8619 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8620 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8622 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8623 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8626 /* Float compare to zero -- Invalid Operation exception
8627 only on signaling NaNs. */
8629 fcmpzs (sim_cpu *cpu)
8631 unsigned sn = INSTR ( 9, 5);
8632 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8634 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8635 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8638 /* Float compare -- Invalid Operation exception on all NaNs. */
8640 fcmpes (sim_cpu *cpu)
8642 unsigned sm = INSTR (20, 16);
8643 unsigned sn = INSTR ( 9, 5);
8645 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8646 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8648 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8649 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8652 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8654 fcmpzes (sim_cpu *cpu)
8656 unsigned sn = INSTR ( 9, 5);
8657 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8659 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8660 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8664 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8668 /* FIXME: Add exception raising. */
8669 if (isnan (dval1) || isnan (dval2))
8671 else if (isinf (dval1) && isinf (dval2))
8673 /* Subtracting two infinities may give a NaN. We only need to compare
8674 the signs, which we can get from isinf. */
8675 int result = isinf (dval1) - isinf (dval2);
8679 else if (result < 0)
8681 else /* (result > 0). */
8686 double result = dval1 - dval2;
8690 else if (result < 0)
8692 else /* (result > 0). */
8696 aarch64_set_CPSR (cpu, flags);
8699 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8701 fcmpd (sim_cpu *cpu)
8703 unsigned sm = INSTR (20, 16);
8704 unsigned sn = INSTR ( 9, 5);
8706 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8707 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8710 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8713 /* Double compare to zero -- Invalid Operation exception
8714 only on signaling NaNs. */
8716 fcmpzd (sim_cpu *cpu)
8718 unsigned sn = INSTR ( 9, 5);
8719 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8721 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8722 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8725 /* Double compare -- Invalid Operation exception on all NaNs. */
8727 fcmped (sim_cpu *cpu)
8729 unsigned sm = INSTR (20, 16);
8730 unsigned sn = INSTR ( 9, 5);
8732 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8733 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8735 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8736 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8739 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8741 fcmpzed (sim_cpu *cpu)
8743 unsigned sn = INSTR ( 9, 5);
8744 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8747 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8751 dexSimpleFPCompare (sim_cpu *cpu)
8753 /* assert instr[28,25] == 1111
8754 instr[30:24:21:13,10] = 0011000
8755 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8756 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8757 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8758 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8759 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8760 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8763 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8764 uint32_t type = INSTR (23, 22);
8765 uint32_t op = INSTR (15, 14);
8766 uint32_t op2_2_0 = INSTR (2, 0);
8780 /* dispatch on type and top 2 bits of opcode. */
8781 dispatch = (type << 2) | INSTR (4, 3);
8785 case 0: fcmps (cpu); return;
8786 case 1: fcmpzs (cpu); return;
8787 case 2: fcmpes (cpu); return;
8788 case 3: fcmpzes (cpu); return;
8789 case 4: fcmpd (cpu); return;
8790 case 5: fcmpzd (cpu); return;
8791 case 6: fcmped (cpu); return;
8792 case 7: fcmpzed (cpu); return;
8797 do_scalar_FADDP (sim_cpu *cpu)
8799 /* instr [31,23] = 0111 1110 0
8800 instr [22] = single(0)/double(1)
8801 instr [21,10] = 11 0000 1101 10
8803 instr [4,0] = Fd. */
8805 unsigned Fn = INSTR (9, 5);
8806 unsigned Fd = INSTR (4, 0);
8808 NYI_assert (31, 23, 0x0FC);
8809 NYI_assert (21, 10, 0xC36);
8811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8814 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8815 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8817 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8821 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8822 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8824 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8828 /* Floating point absolute difference. */
8831 do_scalar_FABD (sim_cpu *cpu)
8833 /* instr [31,23] = 0111 1110 1
8834 instr [22] = float(0)/double(1)
8837 instr [15,10] = 1101 01
8839 instr [4, 0] = Rd. */
8841 unsigned rm = INSTR (20, 16);
8842 unsigned rn = INSTR (9, 5);
8843 unsigned rd = INSTR (4, 0);
8845 NYI_assert (31, 23, 0x0FD);
8846 NYI_assert (21, 21, 1);
8847 NYI_assert (15, 10, 0x35);
8849 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8851 aarch64_set_FP_double (cpu, rd,
8852 fabs (aarch64_get_FP_double (cpu, rn)
8853 - aarch64_get_FP_double (cpu, rm)));
8855 aarch64_set_FP_float (cpu, rd,
8856 fabsf (aarch64_get_FP_float (cpu, rn)
8857 - aarch64_get_FP_float (cpu, rm)));
8861 do_scalar_CMGT (sim_cpu *cpu)
8863 /* instr [31,21] = 0101 1110 111
8865 instr [15,10] = 00 1101
8867 instr [4, 0] = Rd. */
8869 unsigned rm = INSTR (20, 16);
8870 unsigned rn = INSTR (9, 5);
8871 unsigned rd = INSTR (4, 0);
8873 NYI_assert (31, 21, 0x2F7);
8874 NYI_assert (15, 10, 0x0D);
8876 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8877 aarch64_set_vec_u64 (cpu, rd, 0,
8878 aarch64_get_vec_u64 (cpu, rn, 0) >
8879 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8883 do_scalar_USHR (sim_cpu *cpu)
8885 /* instr [31,23] = 0111 1111 0
8886 instr [22,16] = shift amount
8887 instr [15,10] = 0000 01
8889 instr [4, 0] = Rd. */
8891 unsigned amount = 128 - INSTR (22, 16);
8892 unsigned rn = INSTR (9, 5);
8893 unsigned rd = INSTR (4, 0);
8895 NYI_assert (31, 23, 0x0FE);
8896 NYI_assert (15, 10, 0x01);
8898 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8899 aarch64_set_vec_u64 (cpu, rd, 0,
8900 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8904 do_scalar_SSHL (sim_cpu *cpu)
8906 /* instr [31,21] = 0101 1110 111
8908 instr [15,10] = 0100 01
8910 instr [4, 0] = Rd. */
8912 unsigned rm = INSTR (20, 16);
8913 unsigned rn = INSTR (9, 5);
8914 unsigned rd = INSTR (4, 0);
8915 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8917 NYI_assert (31, 21, 0x2F7);
8918 NYI_assert (15, 10, 0x11);
8920 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8922 aarch64_set_vec_s64 (cpu, rd, 0,
8923 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8925 aarch64_set_vec_s64 (cpu, rd, 0,
8926 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8930 do_scalar_shift (sim_cpu *cpu)
8932 /* instr [31,23] = 0101 1111 0
8933 instr [22,16] = shift amount
8934 instr [15,10] = 0101 01 [SHL]
8935 instr [15,10] = 0000 01 [SSHR]
8937 instr [4, 0] = Rd. */
8939 unsigned rn = INSTR (9, 5);
8940 unsigned rd = INSTR (4, 0);
8943 NYI_assert (31, 23, 0x0BE);
8945 if (INSTR (22, 22) == 0)
8948 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8949 switch (INSTR (15, 10))
8951 case 0x01: /* SSHR */
8952 amount = 128 - INSTR (22, 16);
8953 aarch64_set_vec_s64 (cpu, rd, 0,
8954 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8956 case 0x15: /* SHL */
8957 amount = INSTR (22, 16) - 64;
8958 aarch64_set_vec_u64 (cpu, rd, 0,
8959 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8966 /* FCMEQ FCMGT FCMGE. */
8968 do_scalar_FCM (sim_cpu *cpu)
8970 /* instr [31,30] = 01
8972 instr [28,24] = 1 1110
8977 instr [15,12] = 1110
8981 instr [4, 0] = Rd. */
8983 unsigned rm = INSTR (20, 16);
8984 unsigned rn = INSTR (9, 5);
8985 unsigned rd = INSTR (4, 0);
8986 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8991 NYI_assert (31, 30, 1);
8992 NYI_assert (28, 24, 0x1E);
8993 NYI_assert (21, 21, 1);
8994 NYI_assert (15, 12, 0xE);
8995 NYI_assert (10, 10, 1);
8997 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9000 double val1 = aarch64_get_FP_double (cpu, rn);
9001 double val2 = aarch64_get_FP_double (cpu, rm);
9006 result = val1 == val2;
9014 result = val1 >= val2;
9022 result = val1 > val2;
9029 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9033 val1 = aarch64_get_FP_float (cpu, rn);
9034 val2 = aarch64_get_FP_float (cpu, rm);
9039 result = val1 == val2;
9043 val1 = fabsf (val1);
9044 val2 = fabsf (val2);
9047 result = val1 >= val2;
9051 val1 = fabsf (val1);
9052 val2 = fabsf (val2);
9055 result = val1 > val2;
9062 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9065 /* An alias of DUP. */
9067 do_scalar_MOV (sim_cpu *cpu)
9069 /* instr [31,21] = 0101 1110 000
9070 instr [20,16] = imm5
9071 instr [15,10] = 0000 01
9073 instr [4, 0] = Rd. */
9075 unsigned rn = INSTR (9, 5);
9076 unsigned rd = INSTR (4, 0);
9079 NYI_assert (31, 21, 0x2F0);
9080 NYI_assert (15, 10, 0x01);
9082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9086 index = INSTR (20, 17);
9088 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9090 else if (INSTR (17, 17))
9093 index = INSTR (20, 18);
9095 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9097 else if (INSTR (18, 18))
9100 index = INSTR (20, 19);
9102 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9104 else if (INSTR (19, 19))
9107 index = INSTR (20, 20);
9109 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9116 do_scalar_NEG (sim_cpu *cpu)
9118 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9120 instr [4, 0] = Rd. */
9122 unsigned rn = INSTR (9, 5);
9123 unsigned rd = INSTR (4, 0);
9125 NYI_assert (31, 10, 0x1FB82E);
9127 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9128 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9132 do_scalar_USHL (sim_cpu *cpu)
9134 /* instr [31,21] = 0111 1110 111
9136 instr [15,10] = 0100 01
9138 instr [4, 0] = Rd. */
9140 unsigned rm = INSTR (20, 16);
9141 unsigned rn = INSTR (9, 5);
9142 unsigned rd = INSTR (4, 0);
9143 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9145 NYI_assert (31, 21, 0x3F7);
9146 NYI_assert (15, 10, 0x11);
9148 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9150 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9152 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9156 do_double_add (sim_cpu *cpu)
9158 /* instr [31,21] = 0101 1110 111
9160 instr [15,10] = 1000 01
9162 instr [4,0] = Fd. */
9169 NYI_assert (31, 21, 0x2F7);
9170 NYI_assert (15, 10, 0x21);
9174 Fn = INSTR (20, 16);
9176 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9177 val1 = aarch64_get_FP_double (cpu, Fm);
9178 val2 = aarch64_get_FP_double (cpu, Fn);
9180 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9184 do_scalar_UCVTF (sim_cpu *cpu)
9186 /* instr [31,23] = 0111 1110 0
9187 instr [22] = single(0)/double(1)
9188 instr [21,10] = 10 0001 1101 10
9190 instr [4,0] = rd. */
9192 unsigned rn = INSTR (9, 5);
9193 unsigned rd = INSTR (4, 0);
9195 NYI_assert (31, 23, 0x0FC);
9196 NYI_assert (21, 10, 0x876);
9198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9201 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9203 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9207 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9209 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9214 do_scalar_vec (sim_cpu *cpu)
9216 /* instr [30] = 1. */
9217 /* instr [28,25] = 1111. */
9218 switch (INSTR (31, 23))
9221 switch (INSTR (15, 10))
9223 case 0x01: do_scalar_MOV (cpu); return;
9224 case 0x39: do_scalar_FCM (cpu); return;
9225 case 0x3B: do_scalar_FCM (cpu); return;
9229 case 0xBE: do_scalar_shift (cpu); return;
9232 switch (INSTR (15, 10))
9235 switch (INSTR (21, 16))
9237 case 0x30: do_scalar_FADDP (cpu); return;
9238 case 0x21: do_scalar_UCVTF (cpu); return;
9241 case 0x39: do_scalar_FCM (cpu); return;
9242 case 0x3B: do_scalar_FCM (cpu); return;
9247 switch (INSTR (15, 10))
9249 case 0x0D: do_scalar_CMGT (cpu); return;
9250 case 0x11: do_scalar_USHL (cpu); return;
9251 case 0x2E: do_scalar_NEG (cpu); return;
9252 case 0x35: do_scalar_FABD (cpu); return;
9253 case 0x39: do_scalar_FCM (cpu); return;
9254 case 0x3B: do_scalar_FCM (cpu); return;
9259 case 0xFE: do_scalar_USHR (cpu); return;
9262 switch (INSTR (15, 10))
9264 case 0x21: do_double_add (cpu); return;
9265 case 0x11: do_scalar_SSHL (cpu); return;
9276 dexAdvSIMD1 (sim_cpu *cpu)
9278 /* instr [28,25] = 1 111. */
9280 /* We are currently only interested in the basic
9281 scalar fp routines which all have bit 30 = 0. */
9283 do_scalar_vec (cpu);
9285 /* instr[24] is set for FP data processing 3-source and clear for
9286 all other basic scalar fp instruction groups. */
9287 else if (INSTR (24, 24))
9288 dexSimpleFPDataProc3Source (cpu);
9290 /* instr[21] is clear for floating <-> fixed conversions and set for
9291 all other basic scalar fp instruction groups. */
9292 else if (!INSTR (21, 21))
9293 dexSimpleFPFixedConvert (cpu);
9295 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9296 11 ==> cond select, 00 ==> other. */
9298 switch (INSTR (11, 10))
9300 case 1: dexSimpleFPCondCompare (cpu); return;
9301 case 2: dexSimpleFPDataProc2Source (cpu); return;
9302 case 3: dexSimpleFPCondSelect (cpu); return;
9305 /* Now an ordered cascade of tests.
9306 FP immediate has instr [12] == 1.
9307 FP compare has instr [13] == 1.
9308 FP Data Proc 1 Source has instr [14] == 1.
9309 FP floating <--> integer conversions has instr [15] == 0. */
9311 dexSimpleFPImmediate (cpu);
9313 else if (INSTR (13, 13))
9314 dexSimpleFPCompare (cpu);
9316 else if (INSTR (14, 14))
9317 dexSimpleFPDataProc1Source (cpu);
9319 else if (!INSTR (15, 15))
9320 dexSimpleFPIntegerConvert (cpu);
9323 /* If we get here then instr[15] == 1 which means UNALLOC. */
9328 /* PC relative addressing. */
9331 pcadr (sim_cpu *cpu)
9333 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9334 instr[30,29] = immlo
9335 instr[23,5] = immhi. */
9337 unsigned rd = INSTR (4, 0);
9338 uint32_t isPage = INSTR (31, 31);
9339 union { int64_t u64; uint64_t s64; } imm;
9342 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9344 offset = (offset << 2) | INSTR (30, 29);
9346 address = aarch64_get_PC (cpu);
9354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9355 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9358 /* Specific decode and execute for group Data Processing Immediate. */
9361 dexPCRelAddressing (sim_cpu *cpu)
9363 /* assert instr[28,24] = 10000. */
9367 /* Immediate logical.
9368 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9369 16, 32 or 64 bit sequence pulled out at decode and possibly
9372 N.B. the output register (dest) can normally be Xn or SP
9373 the exception occurs for flag setting instructions which may
9374 only use Xn for the output (dest). The input register can
9377 /* 32 bit and immediate. */
9379 and32 (sim_cpu *cpu, uint32_t bimm)
9381 unsigned rn = INSTR (9, 5);
9382 unsigned rd = INSTR (4, 0);
9384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9385 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9386 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9389 /* 64 bit and immediate. */
9391 and64 (sim_cpu *cpu, uint64_t bimm)
9393 unsigned rn = INSTR (9, 5);
9394 unsigned rd = INSTR (4, 0);
9396 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9397 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9398 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9401 /* 32 bit and immediate set flags. */
9403 ands32 (sim_cpu *cpu, uint32_t bimm)
9405 unsigned rn = INSTR (9, 5);
9406 unsigned rd = INSTR (4, 0);
9408 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9409 uint32_t value2 = bimm;
9411 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9412 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9413 set_flags_for_binop32 (cpu, value1 & value2);
9416 /* 64 bit and immediate set flags. */
9418 ands64 (sim_cpu *cpu, uint64_t bimm)
9420 unsigned rn = INSTR (9, 5);
9421 unsigned rd = INSTR (4, 0);
9423 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9424 uint64_t value2 = bimm;
9426 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9427 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9428 set_flags_for_binop64 (cpu, value1 & value2);
9431 /* 32 bit exclusive or immediate. */
9433 eor32 (sim_cpu *cpu, uint32_t bimm)
9435 unsigned rn = INSTR (9, 5);
9436 unsigned rd = INSTR (4, 0);
9438 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9439 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9440 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9443 /* 64 bit exclusive or immediate. */
9445 eor64 (sim_cpu *cpu, uint64_t bimm)
9447 unsigned rn = INSTR (9, 5);
9448 unsigned rd = INSTR (4, 0);
9450 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9451 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9452 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9455 /* 32 bit or immediate. */
9457 orr32 (sim_cpu *cpu, uint32_t bimm)
9459 unsigned rn = INSTR (9, 5);
9460 unsigned rd = INSTR (4, 0);
9462 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9463 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9464 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9467 /* 64 bit or immediate. */
9469 orr64 (sim_cpu *cpu, uint64_t bimm)
9471 unsigned rn = INSTR (9, 5);
9472 unsigned rd = INSTR (4, 0);
9474 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9475 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9476 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9479 /* Logical shifted register.
9480 These allow an optional LSL, ASR, LSR or ROR to the second source
9481 register with a count up to the register bit count.
9482 N.B register args may not be SP. */
9484 /* 32 bit AND shifted register. */
9486 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9488 unsigned rm = INSTR (20, 16);
9489 unsigned rn = INSTR (9, 5);
9490 unsigned rd = INSTR (4, 0);
9492 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9494 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9495 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9498 /* 64 bit AND shifted register. */
9500 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9502 unsigned rm = INSTR (20, 16);
9503 unsigned rn = INSTR (9, 5);
9504 unsigned rd = INSTR (4, 0);
9506 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9508 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9509 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9512 /* 32 bit AND shifted register setting flags. */
9514 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9516 unsigned rm = INSTR (20, 16);
9517 unsigned rn = INSTR (9, 5);
9518 unsigned rd = INSTR (4, 0);
9520 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9521 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9524 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9525 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9526 set_flags_for_binop32 (cpu, value1 & value2);
9529 /* 64 bit AND shifted register setting flags. */
9531 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9533 unsigned rm = INSTR (20, 16);
9534 unsigned rn = INSTR (9, 5);
9535 unsigned rd = INSTR (4, 0);
9537 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9538 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9541 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9542 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9543 set_flags_for_binop64 (cpu, value1 & value2);
9546 /* 32 bit BIC shifted register. */
9548 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9550 unsigned rm = INSTR (20, 16);
9551 unsigned rn = INSTR (9, 5);
9552 unsigned rd = INSTR (4, 0);
9554 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9556 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9557 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9560 /* 64 bit BIC shifted register. */
9562 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9564 unsigned rm = INSTR (20, 16);
9565 unsigned rn = INSTR (9, 5);
9566 unsigned rd = INSTR (4, 0);
9568 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9570 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9571 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9574 /* 32 bit BIC shifted register setting flags. */
9576 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9578 unsigned rm = INSTR (20, 16);
9579 unsigned rn = INSTR (9, 5);
9580 unsigned rd = INSTR (4, 0);
9582 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9583 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9586 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9587 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9588 set_flags_for_binop32 (cpu, value1 & value2);
9591 /* 64 bit BIC shifted register setting flags. */
9593 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9595 unsigned rm = INSTR (20, 16);
9596 unsigned rn = INSTR (9, 5);
9597 unsigned rd = INSTR (4, 0);
9599 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9600 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9604 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9605 set_flags_for_binop64 (cpu, value1 & value2);
9608 /* 32 bit EON shifted register. */
9610 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9612 unsigned rm = INSTR (20, 16);
9613 unsigned rn = INSTR (9, 5);
9614 unsigned rd = INSTR (4, 0);
9616 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9618 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9619 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9622 /* 64 bit EON shifted register. */
9624 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9626 unsigned rm = INSTR (20, 16);
9627 unsigned rn = INSTR (9, 5);
9628 unsigned rd = INSTR (4, 0);
9630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9632 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9633 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9636 /* 32 bit EOR shifted register. */
9638 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9640 unsigned rm = INSTR (20, 16);
9641 unsigned rn = INSTR (9, 5);
9642 unsigned rd = INSTR (4, 0);
9644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9646 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9647 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9650 /* 64 bit EOR shifted register. */
9652 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9654 unsigned rm = INSTR (20, 16);
9655 unsigned rn = INSTR (9, 5);
9656 unsigned rd = INSTR (4, 0);
9658 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9660 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9661 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9664 /* 32 bit ORR shifted register. */
9666 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9668 unsigned rm = INSTR (20, 16);
9669 unsigned rn = INSTR (9, 5);
9670 unsigned rd = INSTR (4, 0);
9672 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9674 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9675 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9678 /* 64 bit ORR shifted register. */
9680 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9682 unsigned rm = INSTR (20, 16);
9683 unsigned rn = INSTR (9, 5);
9684 unsigned rd = INSTR (4, 0);
9686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9688 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9689 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9692 /* 32 bit ORN shifted register. */
9694 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9696 unsigned rm = INSTR (20, 16);
9697 unsigned rn = INSTR (9, 5);
9698 unsigned rd = INSTR (4, 0);
9700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9702 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9703 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9706 /* 64 bit ORN shifted register. */
9708 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9710 unsigned rm = INSTR (20, 16);
9711 unsigned rn = INSTR (9, 5);
9712 unsigned rd = INSTR (4, 0);
9714 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9716 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9717 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9721 dexLogicalImmediate (sim_cpu *cpu)
9723 /* assert instr[28,23] = 1001000
9724 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9725 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9726 instr[22] = N : used to construct immediate mask
9732 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9733 uint32_t size = INSTR (31, 31);
9734 uint32_t N = INSTR (22, 22);
9735 /* uint32_t immr = INSTR (21, 16);. */
9736 /* uint32_t imms = INSTR (15, 10);. */
9737 uint32_t index = INSTR (22, 10);
9738 uint64_t bimm64 = LITable [index];
9739 uint32_t dispatch = INSTR (30, 29);
9749 uint32_t bimm = (uint32_t) bimm64;
9753 case 0: and32 (cpu, bimm); return;
9754 case 1: orr32 (cpu, bimm); return;
9755 case 2: eor32 (cpu, bimm); return;
9756 case 3: ands32 (cpu, bimm); return;
9763 case 0: and64 (cpu, bimm64); return;
9764 case 1: orr64 (cpu, bimm64); return;
9765 case 2: eor64 (cpu, bimm64); return;
9766 case 3: ands64 (cpu, bimm64); return;
9773 The uimm argument is a 16 bit value to be inserted into the
9774 target register the pos argument locates the 16 bit word in the
9775 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9777 N.B register arg may not be SP so it should be.
9778 accessed using the setGZRegisterXXX accessors. */
9780 /* 32 bit move 16 bit immediate zero remaining shorts. */
9782 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9784 unsigned rd = INSTR (4, 0);
9786 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9787 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9790 /* 64 bit move 16 bit immediate zero remaining shorts. */
9792 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9794 unsigned rd = INSTR (4, 0);
9796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9797 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9800 /* 32 bit move 16 bit immediate negated. */
9802 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9804 unsigned rd = INSTR (4, 0);
9806 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9807 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9810 /* 64 bit move 16 bit immediate negated. */
9812 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9814 unsigned rd = INSTR (4, 0);
9816 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9818 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9819 ^ 0xffffffffffffffffULL));
9822 /* 32 bit move 16 bit immediate keep remaining shorts. */
9824 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9826 unsigned rd = INSTR (4, 0);
9827 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9828 uint32_t value = val << (pos * 16);
9829 uint32_t mask = ~(0xffffU << (pos * 16));
9831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9832 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9835 /* 64 bit move 16 it immediate keep remaining shorts. */
9837 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9839 unsigned rd = INSTR (4, 0);
9840 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9841 uint64_t value = (uint64_t) val << (pos * 16);
9842 uint64_t mask = ~(0xffffULL << (pos * 16));
9844 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9845 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9849 dexMoveWideImmediate (sim_cpu *cpu)
9851 /* assert instr[28:23] = 100101
9852 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9853 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9854 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9855 instr[20,5] = uimm16
9858 /* N.B. the (multiple of 16) shift is applied by the called routine,
9859 we just pass the multiplier. */
9862 uint32_t size = INSTR (31, 31);
9863 uint32_t op = INSTR (30, 29);
9864 uint32_t shift = INSTR (22, 21);
9866 /* 32 bit can only shift 0 or 1 lot of 16.
9867 anything else is an unallocated instruction. */
9868 if (size == 0 && (shift > 1))
9874 imm = INSTR (20, 5);
9879 movn32 (cpu, imm, shift);
9881 movz32 (cpu, imm, shift);
9883 movk32 (cpu, imm, shift);
9888 movn64 (cpu, imm, shift);
9890 movz64 (cpu, imm, shift);
9892 movk64 (cpu, imm, shift);
9896 /* Bitfield operations.
9897 These take a pair of bit positions r and s which are in {0..31}
9898 or {0..63} depending on the instruction word size.
9899 N.B register args may not be SP. */
9901 /* OK, we start with ubfm which just needs to pick
9902 some bits out of source zero the rest and write
9903 the result to dest. Just need two logical shifts. */
9905 /* 32 bit bitfield move, left and right of affected zeroed
9906 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9908 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9911 unsigned rn = INSTR (9, 5);
9912 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9914 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9917 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9918 We want only bits s:xxx:r at the bottom of the word
9919 so we LSL bit s up to bit 31 i.e. by 31 - s
9920 and then we LSR to bring bit 31 down to bit s - r
9921 i.e. by 31 + r - s. */
9923 value >>= 31 + r - s;
9927 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9928 We want only bits s:xxx:0 starting at it 31-(r-1)
9929 so we LSL bit s up to bit 31 i.e. by 31 - s
9930 and then we LSL to bring bit 31 down to 31-(r-1)+s
9931 i.e. by r - (s + 1). */
9933 value >>= r - (s + 1);
9936 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9938 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9941 /* 64 bit bitfield move, left and right of affected zeroed
9942 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9944 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9947 unsigned rn = INSTR (9, 5);
9948 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9952 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9953 We want only bits s:xxx:r at the bottom of the word.
9954 So we LSL bit s up to bit 63 i.e. by 63 - s
9955 and then we LSR to bring bit 63 down to bit s - r
9956 i.e. by 63 + r - s. */
9958 value >>= 63 + r - s;
9962 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9963 We want only bits s:xxx:0 starting at it 63-(r-1).
9964 So we LSL bit s up to bit 63 i.e. by 63 - s
9965 and then we LSL to bring bit 63 down to 63-(r-1)+s
9966 i.e. by r - (s + 1). */
9968 value >>= r - (s + 1);
9971 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9973 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9976 /* The signed versions need to insert sign bits
9977 on the left of the inserted bit field. so we do
9978 much the same as the unsigned version except we
9979 use an arithmetic shift right -- this just means
9980 we need to operate on signed values. */
9982 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9983 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9985 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9988 unsigned rn = INSTR (9, 5);
9989 /* as per ubfm32 but use an ASR instead of an LSR. */
9990 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9995 value >>= 31 + r - s;
10000 value >>= r - (s + 1);
10003 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10005 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10008 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
10009 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10011 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10014 unsigned rn = INSTR (9, 5);
10015 /* acpu per ubfm but use an ASR instead of an LSR. */
10016 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10021 value >>= 63 + r - s;
10026 value >>= r - (s + 1);
10029 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10031 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10034 /* Finally, these versions leave non-affected bits
10035 as is. so we need to generate the bits as per
10036 ubfm and also generate a mask to pick the
10037 bits from the original and computed values. */
10039 /* 32 bit bitfield move, non-affected bits left as is.
10040 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10042 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10044 unsigned rn = INSTR (9, 5);
10045 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10046 uint32_t mask = -1;
10050 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10053 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10054 We want only bits s:xxx:r at the bottom of the word
10055 so we LSL bit s up to bit 31 i.e. by 31 - s
10056 and then we LSR to bring bit 31 down to bit s - r
10057 i.e. by 31 + r - s. */
10059 value >>= 31 + r - s;
10060 /* the mask must include the same bits. */
10062 mask >>= 31 + r - s;
10066 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10067 We want only bits s:xxx:0 starting at it 31-(r-1)
10068 so we LSL bit s up to bit 31 i.e. by 31 - s
10069 and then we LSL to bring bit 31 down to 31-(r-1)+s
10070 i.e. by r - (s + 1). */
10072 value >>= r - (s + 1);
10073 /* The mask must include the same bits. */
10075 mask >>= r - (s + 1);
10079 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10084 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10085 aarch64_set_reg_u64
10086 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10089 /* 64 bit bitfield move, non-affected bits left as is.
10090 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10092 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10095 unsigned rn = INSTR (9, 5);
10096 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10097 uint64_t mask = 0xffffffffffffffffULL;
10101 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10102 We want only bits s:xxx:r at the bottom of the word
10103 so we LSL bit s up to bit 63 i.e. by 63 - s
10104 and then we LSR to bring bit 63 down to bit s - r
10105 i.e. by 63 + r - s. */
10107 value >>= 63 + r - s;
10108 /* The mask must include the same bits. */
10110 mask >>= 63 + r - s;
10114 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10115 We want only bits s:xxx:0 starting at it 63-(r-1)
10116 so we LSL bit s up to bit 63 i.e. by 63 - s
10117 and then we LSL to bring bit 63 down to 63-(r-1)+s
10118 i.e. by r - (s + 1). */
10120 value >>= r - (s + 1);
10121 /* The mask must include the same bits. */
10123 mask >>= r - (s + 1);
10126 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10128 aarch64_set_reg_u64
10129 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10133 dexBitfieldImmediate (sim_cpu *cpu)
10135 /* assert instr[28:23] = 100110
10136 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10137 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10138 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10139 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10140 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10144 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10147 uint32_t size = INSTR (31, 31);
10148 uint32_t N = INSTR (22, 22);
10149 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10150 /* or else we have an UNALLOC. */
10151 uint32_t immr = INSTR (21, 16);
10156 if (!size && uimm (immr, 5, 5))
10159 imms = INSTR (15, 10);
10160 if (!size && uimm (imms, 5, 5))
10163 /* Switch on combined size and op. */
10164 dispatch = INSTR (31, 29);
10167 case 0: sbfm32 (cpu, immr, imms); return;
10168 case 1: bfm32 (cpu, immr, imms); return;
10169 case 2: ubfm32 (cpu, immr, imms); return;
10170 case 4: sbfm (cpu, immr, imms); return;
10171 case 5: bfm (cpu, immr, imms); return;
10172 case 6: ubfm (cpu, immr, imms); return;
10173 default: HALT_UNALLOC;
10178 do_EXTR_32 (sim_cpu *cpu)
10180 /* instr[31:21] = 00010011100
10182 instr[15,10] = imms : 0xxxxx for 32 bit
10185 unsigned rm = INSTR (20, 16);
10186 unsigned imms = INSTR (15, 10) & 31;
10187 unsigned rn = INSTR ( 9, 5);
10188 unsigned rd = INSTR ( 4, 0);
10192 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10194 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10195 val2 <<= (32 - imms);
10197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10198 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10202 do_EXTR_64 (sim_cpu *cpu)
10204 /* instr[31:21] = 10010011100
10206 instr[15,10] = imms
10209 unsigned rm = INSTR (20, 16);
10210 unsigned imms = INSTR (15, 10) & 63;
10211 unsigned rn = INSTR ( 9, 5);
10212 unsigned rd = INSTR ( 4, 0);
10215 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10217 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10219 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10223 dexExtractImmediate (sim_cpu *cpu)
10225 /* assert instr[28:23] = 100111
10226 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10227 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10228 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10229 instr[21] = op0 : must be 0 or UNALLOC
10231 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10235 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10236 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10238 uint32_t size = INSTR (31, 31);
10239 uint32_t N = INSTR (22, 22);
10240 /* 32 bit operations must have imms[5] = 0
10241 or else we have an UNALLOC. */
10242 uint32_t imms = INSTR (15, 10);
10247 if (!size && uimm (imms, 5, 5))
10250 /* Switch on combined size and op. */
10251 dispatch = INSTR (31, 29);
10256 else if (dispatch == 4)
10259 else if (dispatch == 1)
10266 dexDPImm (sim_cpu *cpu)
10268 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10269 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10270 bits [25,23] of a DPImm are the secondary dispatch vector. */
10271 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10275 case DPIMM_PCADR_000:
10276 case DPIMM_PCADR_001:
10277 dexPCRelAddressing (cpu);
10280 case DPIMM_ADDSUB_010:
10281 case DPIMM_ADDSUB_011:
10282 dexAddSubtractImmediate (cpu);
10285 case DPIMM_LOG_100:
10286 dexLogicalImmediate (cpu);
10289 case DPIMM_MOV_101:
10290 dexMoveWideImmediate (cpu);
10293 case DPIMM_BITF_110:
10294 dexBitfieldImmediate (cpu);
10297 case DPIMM_EXTR_111:
10298 dexExtractImmediate (cpu);
10302 /* Should never reach here. */
10308 dexLoadUnscaledImmediate (sim_cpu *cpu)
10310 /* instr[29,24] == 111_00
10313 instr[31,30] = size
10316 instr[20,12] = simm9
10317 instr[9,5] = rn may be SP. */
10318 /* unsigned rt = INSTR (4, 0); */
10319 uint32_t V = INSTR (26, 26);
10320 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10321 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10325 /* GReg operations. */
10328 case 0: sturb (cpu, imm); return;
10329 case 1: ldurb32 (cpu, imm); return;
10330 case 2: ldursb64 (cpu, imm); return;
10331 case 3: ldursb32 (cpu, imm); return;
10332 case 4: sturh (cpu, imm); return;
10333 case 5: ldurh32 (cpu, imm); return;
10334 case 6: ldursh64 (cpu, imm); return;
10335 case 7: ldursh32 (cpu, imm); return;
10336 case 8: stur32 (cpu, imm); return;
10337 case 9: ldur32 (cpu, imm); return;
10338 case 10: ldursw (cpu, imm); return;
10339 case 12: stur64 (cpu, imm); return;
10340 case 13: ldur64 (cpu, imm); return;
10353 /* FReg operations. */
10356 case 2: fsturq (cpu, imm); return;
10357 case 3: fldurq (cpu, imm); return;
10358 case 8: fsturs (cpu, imm); return;
10359 case 9: fldurs (cpu, imm); return;
10360 case 12: fsturd (cpu, imm); return;
10361 case 13: fldurd (cpu, imm); return;
10363 case 0: /* STUR 8 bit FP. */
10364 case 1: /* LDUR 8 bit FP. */
10365 case 4: /* STUR 16 bit FP. */
10366 case 5: /* LDUR 8 bit FP. */
10380 /* N.B. A preliminary note regarding all the ldrs<x>32
10383 The signed value loaded by these instructions is cast to unsigned
10384 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10385 64 bit element of the GReg union. this performs a 32 bit sign extension
10386 (as required) but avoids 64 bit sign extension, thus ensuring that the
10387 top half of the register word is zero. this is what the spec demands
10388 when a 32 bit load occurs. */
10390 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10392 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10394 unsigned int rn = INSTR (9, 5);
10395 unsigned int rt = INSTR (4, 0);
10397 /* The target register may not be SP but the source may be
10398 there is no scaling required for a byte load. */
10399 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10400 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10401 (int64_t) aarch64_get_mem_s8 (cpu, address));
10404 /* 32 bit load sign-extended byte scaled or unscaled zero-
10405 or sign-extended 32-bit register offset. */
10407 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10409 unsigned int rm = INSTR (20, 16);
10410 unsigned int rn = INSTR (9, 5);
10411 unsigned int rt = INSTR (4, 0);
10413 /* rn may reference SP, rm and rt must reference ZR. */
10415 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10416 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10419 /* There is no scaling required for a byte load. */
10420 aarch64_set_reg_u64
10421 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10425 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10426 pre- or post-writeback. */
10428 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10431 unsigned int rn = INSTR (9, 5);
10432 unsigned int rt = INSTR (4, 0);
10434 if (rn == rt && wb != NoWriteBack)
10437 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10442 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10443 (int64_t) aarch64_get_mem_s8 (cpu, address));
10448 if (wb != NoWriteBack)
10449 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10452 /* 8 bit store scaled. */
10454 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10456 unsigned st = INSTR (4, 0);
10457 unsigned rn = INSTR (9, 5);
10459 aarch64_set_mem_u8 (cpu,
10460 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10461 aarch64_get_vec_u8 (cpu, st, 0));
10464 /* 8 bit store scaled or unscaled zero- or
10465 sign-extended 8-bit register offset. */
10467 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10469 unsigned rm = INSTR (20, 16);
10470 unsigned rn = INSTR (9, 5);
10471 unsigned st = INSTR (4, 0);
10473 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10474 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10476 uint64_t displacement = scaling == Scaled ? extended : 0;
10479 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10482 /* 16 bit store scaled. */
10484 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10486 unsigned st = INSTR (4, 0);
10487 unsigned rn = INSTR (9, 5);
10489 aarch64_set_mem_u16
10491 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10492 aarch64_get_vec_u16 (cpu, st, 0));
10495 /* 16 bit store scaled or unscaled zero-
10496 or sign-extended 16-bit register offset. */
10498 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10500 unsigned rm = INSTR (20, 16);
10501 unsigned rn = INSTR (9, 5);
10502 unsigned st = INSTR (4, 0);
10504 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10505 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10507 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10509 aarch64_set_mem_u16
10510 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10513 /* 32 bit store scaled unsigned 12 bit. */
10515 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10517 unsigned st = INSTR (4, 0);
10518 unsigned rn = INSTR (9, 5);
10520 aarch64_set_mem_u32
10522 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10523 aarch64_get_vec_u32 (cpu, st, 0));
10526 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10528 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10530 unsigned rn = INSTR (9, 5);
10531 unsigned st = INSTR (4, 0);
10533 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10538 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10543 if (wb != NoWriteBack)
10544 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10547 /* 32 bit store scaled or unscaled zero-
10548 or sign-extended 32-bit register offset. */
10550 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10552 unsigned rm = INSTR (20, 16);
10553 unsigned rn = INSTR (9, 5);
10554 unsigned st = INSTR (4, 0);
10556 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10557 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10559 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10561 aarch64_set_mem_u32
10562 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10565 /* 64 bit store scaled unsigned 12 bit. */
10567 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10569 unsigned st = INSTR (4, 0);
10570 unsigned rn = INSTR (9, 5);
10572 aarch64_set_mem_u64
10574 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10575 aarch64_get_vec_u64 (cpu, st, 0));
10578 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10580 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10582 unsigned rn = INSTR (9, 5);
10583 unsigned st = INSTR (4, 0);
10585 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10590 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10595 if (wb != NoWriteBack)
10596 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10599 /* 64 bit store scaled or unscaled zero-
10600 or sign-extended 32-bit register offset. */
10602 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10604 unsigned rm = INSTR (20, 16);
10605 unsigned rn = INSTR (9, 5);
10606 unsigned st = INSTR (4, 0);
10608 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10609 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10611 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10613 aarch64_set_mem_u64
10614 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10617 /* 128 bit store scaled unsigned 12 bit. */
10619 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10622 unsigned st = INSTR (4, 0);
10623 unsigned rn = INSTR (9, 5);
10626 aarch64_get_FP_long_double (cpu, st, & a);
10628 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10629 aarch64_set_mem_long_double (cpu, addr, a);
10632 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10634 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10637 unsigned rn = INSTR (9, 5);
10638 unsigned st = INSTR (4, 0);
10639 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10644 aarch64_get_FP_long_double (cpu, st, & a);
10645 aarch64_set_mem_long_double (cpu, address, a);
10650 if (wb != NoWriteBack)
10651 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10654 /* 128 bit store scaled or unscaled zero-
10655 or sign-extended 32-bit register offset. */
10657 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10659 unsigned rm = INSTR (20, 16);
10660 unsigned rn = INSTR (9, 5);
10661 unsigned st = INSTR (4, 0);
10663 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10664 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10666 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10670 aarch64_get_FP_long_double (cpu, st, & a);
10671 aarch64_set_mem_long_double (cpu, address + displacement, a);
10675 dexLoadImmediatePrePost (sim_cpu *cpu)
10677 /* instr[31,30] = size
10683 instr[20,12] = simm9
10684 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10686 instr[9,5] = Rn may be SP.
10689 uint32_t V = INSTR (26, 26);
10690 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10691 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10692 WriteBack wb = INSTR (11, 11);
10696 /* GReg operations. */
10699 case 0: strb_wb (cpu, imm, wb); return;
10700 case 1: ldrb32_wb (cpu, imm, wb); return;
10701 case 2: ldrsb_wb (cpu, imm, wb); return;
10702 case 3: ldrsb32_wb (cpu, imm, wb); return;
10703 case 4: strh_wb (cpu, imm, wb); return;
10704 case 5: ldrh32_wb (cpu, imm, wb); return;
10705 case 6: ldrsh64_wb (cpu, imm, wb); return;
10706 case 7: ldrsh32_wb (cpu, imm, wb); return;
10707 case 8: str32_wb (cpu, imm, wb); return;
10708 case 9: ldr32_wb (cpu, imm, wb); return;
10709 case 10: ldrsw_wb (cpu, imm, wb); return;
10710 case 12: str_wb (cpu, imm, wb); return;
10711 case 13: ldr_wb (cpu, imm, wb); return;
10721 /* FReg operations. */
10724 case 2: fstrq_wb (cpu, imm, wb); return;
10725 case 3: fldrq_wb (cpu, imm, wb); return;
10726 case 8: fstrs_wb (cpu, imm, wb); return;
10727 case 9: fldrs_wb (cpu, imm, wb); return;
10728 case 12: fstrd_wb (cpu, imm, wb); return;
10729 case 13: fldrd_wb (cpu, imm, wb); return;
10731 case 0: /* STUR 8 bit FP. */
10732 case 1: /* LDUR 8 bit FP. */
10733 case 4: /* STUR 16 bit FP. */
10734 case 5: /* LDUR 8 bit FP. */
10749 dexLoadRegisterOffset (sim_cpu *cpu)
10751 /* instr[31,30] = size
10758 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10759 110 ==> SXTW, 111 ==> SXTX,
10764 instr[4,0] = rt. */
10766 uint32_t V = INSTR (26, 26);
10767 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10768 Scaling scale = INSTR (12, 12);
10769 Extension extensionType = INSTR (15, 13);
10771 /* Check for illegal extension types. */
10772 if (uimm (extensionType, 1, 1) == 0)
10775 if (extensionType == UXTX || extensionType == SXTX)
10776 extensionType = NoExtension;
10780 /* GReg operations. */
10783 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10784 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10785 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10786 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10787 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10788 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10789 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10790 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10791 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10792 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10793 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10794 case 12: str_scale_ext (cpu, scale, extensionType); return;
10795 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10796 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10805 /* FReg operations. */
10808 case 1: /* LDUR 8 bit FP. */
10810 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10811 case 5: /* LDUR 8 bit FP. */
10813 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10814 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10816 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10817 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10818 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10819 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10820 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10834 dexLoadUnsignedImmediate (sim_cpu *cpu)
10836 /* instr[29,24] == 111_01
10837 instr[31,30] = size
10840 instr[21,10] = uimm12 : unsigned immediate offset
10841 instr[9,5] = rn may be SP.
10842 instr[4,0] = rt. */
10844 uint32_t V = INSTR (26,26);
10845 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10846 uint32_t imm = INSTR (21, 10);
10850 /* GReg operations. */
10853 case 0: strb_abs (cpu, imm); return;
10854 case 1: ldrb32_abs (cpu, imm); return;
10855 case 2: ldrsb_abs (cpu, imm); return;
10856 case 3: ldrsb32_abs (cpu, imm); return;
10857 case 4: strh_abs (cpu, imm); return;
10858 case 5: ldrh32_abs (cpu, imm); return;
10859 case 6: ldrsh_abs (cpu, imm); return;
10860 case 7: ldrsh32_abs (cpu, imm); return;
10861 case 8: str32_abs (cpu, imm); return;
10862 case 9: ldr32_abs (cpu, imm); return;
10863 case 10: ldrsw_abs (cpu, imm); return;
10864 case 12: str_abs (cpu, imm); return;
10865 case 13: ldr_abs (cpu, imm); return;
10866 case 14: prfm_abs (cpu, imm); return;
10875 /* FReg operations. */
10878 case 0: fstrb_abs (cpu, imm); return;
10879 case 4: fstrh_abs (cpu, imm); return;
10880 case 8: fstrs_abs (cpu, imm); return;
10881 case 12: fstrd_abs (cpu, imm); return;
10882 case 2: fstrq_abs (cpu, imm); return;
10884 case 1: fldrb_abs (cpu, imm); return;
10885 case 5: fldrh_abs (cpu, imm); return;
10886 case 9: fldrs_abs (cpu, imm); return;
10887 case 13: fldrd_abs (cpu, imm); return;
10888 case 3: fldrq_abs (cpu, imm); return;
10902 dexLoadExclusive (sim_cpu *cpu)
10904 /* assert instr[29:24] = 001000;
10905 instr[31,30] = size
10906 instr[23] = 0 if exclusive
10907 instr[22] = L : 1 if load, 0 if store
10908 instr[21] = 1 if pair
10910 instr[15] = o0 : 1 if ordered
10913 instr[4.0] = Rt. */
10915 switch (INSTR (22, 21))
10917 case 2: ldxr (cpu); return;
10918 case 0: stxr (cpu); return;
10924 dexLoadOther (sim_cpu *cpu)
10928 /* instr[29,25] = 111_0
10929 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10930 instr[21:11,10] is the secondary dispatch. */
10931 if (INSTR (24, 24))
10933 dexLoadUnsignedImmediate (cpu);
10937 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10940 case 0: dexLoadUnscaledImmediate (cpu); return;
10941 case 1: dexLoadImmediatePrePost (cpu); return;
10942 case 3: dexLoadImmediatePrePost (cpu); return;
10943 case 6: dexLoadRegisterOffset (cpu); return;
10955 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10957 unsigned rn = INSTR (14, 10);
10958 unsigned rd = INSTR (9, 5);
10959 unsigned rm = INSTR (4, 0);
10960 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10962 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10963 HALT_UNALLOC; /* ??? */
10970 aarch64_set_mem_u32 (cpu, address,
10971 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10972 aarch64_set_mem_u32 (cpu, address + 4,
10973 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10978 if (wb != NoWriteBack)
10979 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10983 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10985 unsigned rn = INSTR (14, 10);
10986 unsigned rd = INSTR (9, 5);
10987 unsigned rm = INSTR (4, 0);
10988 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10990 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10991 HALT_UNALLOC; /* ??? */
10998 aarch64_set_mem_u64 (cpu, address,
10999 aarch64_get_reg_u64 (cpu, rm, NO_SP));
11000 aarch64_set_mem_u64 (cpu, address + 8,
11001 aarch64_get_reg_u64 (cpu, rn, NO_SP));
11006 if (wb != NoWriteBack)
11007 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11011 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11013 unsigned rn = INSTR (14, 10);
11014 unsigned rd = INSTR (9, 5);
11015 unsigned rm = INSTR (4, 0);
11016 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11018 /* Treat this as unalloc to make sure we don't do it. */
11027 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11028 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11033 if (wb != NoWriteBack)
11034 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11038 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11040 unsigned rn = INSTR (14, 10);
11041 unsigned rd = INSTR (9, 5);
11042 unsigned rm = INSTR (4, 0);
11043 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11045 /* Treat this as unalloc to make sure we don't do it. */
11054 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11055 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11060 if (wb != NoWriteBack)
11061 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11065 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11067 unsigned rn = INSTR (14, 10);
11068 unsigned rd = INSTR (9, 5);
11069 unsigned rm = INSTR (4, 0);
11070 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11072 /* Treat this as unalloc to make sure we don't do it. */
11081 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11082 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11087 if (wb != NoWriteBack)
11088 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11092 dex_load_store_pair_gr (sim_cpu *cpu)
11094 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11095 instr[29,25] = instruction encoding: 101_0
11096 instr[26] = V : 1 if fp 0 if gp
11097 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11098 instr[22] = load/store (1=> load)
11099 instr[21,15] = signed, scaled, offset
11102 instr[ 4, 0] = Rm. */
11104 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11105 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11109 case 2: store_pair_u32 (cpu, offset, Post); return;
11110 case 3: load_pair_u32 (cpu, offset, Post); return;
11111 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11112 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11113 case 6: store_pair_u32 (cpu, offset, Pre); return;
11114 case 7: load_pair_u32 (cpu, offset, Pre); return;
11116 case 11: load_pair_s32 (cpu, offset, Post); return;
11117 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11118 case 15: load_pair_s32 (cpu, offset, Pre); return;
11120 case 18: store_pair_u64 (cpu, offset, Post); return;
11121 case 19: load_pair_u64 (cpu, offset, Post); return;
11122 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11123 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11124 case 22: store_pair_u64 (cpu, offset, Pre); return;
11125 case 23: load_pair_u64 (cpu, offset, Pre); return;
11133 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11135 unsigned rn = INSTR (14, 10);
11136 unsigned rd = INSTR (9, 5);
11137 unsigned rm = INSTR (4, 0);
11138 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11145 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11146 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11151 if (wb != NoWriteBack)
11152 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11156 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11158 unsigned rn = INSTR (14, 10);
11159 unsigned rd = INSTR (9, 5);
11160 unsigned rm = INSTR (4, 0);
11161 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11168 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11169 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11174 if (wb != NoWriteBack)
11175 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11179 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11182 unsigned rn = INSTR (14, 10);
11183 unsigned rd = INSTR (9, 5);
11184 unsigned rm = INSTR (4, 0);
11185 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11192 aarch64_get_FP_long_double (cpu, rm, & a);
11193 aarch64_set_mem_long_double (cpu, address, a);
11194 aarch64_get_FP_long_double (cpu, rn, & a);
11195 aarch64_set_mem_long_double (cpu, address + 16, a);
11200 if (wb != NoWriteBack)
11201 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11205 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11207 unsigned rn = INSTR (14, 10);
11208 unsigned rd = INSTR (9, 5);
11209 unsigned rm = INSTR (4, 0);
11210 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11220 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11221 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11226 if (wb != NoWriteBack)
11227 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11231 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11233 unsigned rn = INSTR (14, 10);
11234 unsigned rd = INSTR (9, 5);
11235 unsigned rm = INSTR (4, 0);
11236 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11246 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11247 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11252 if (wb != NoWriteBack)
11253 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11257 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11260 unsigned rn = INSTR (14, 10);
11261 unsigned rd = INSTR (9, 5);
11262 unsigned rm = INSTR (4, 0);
11263 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11273 aarch64_get_mem_long_double (cpu, address, & a);
11274 aarch64_set_FP_long_double (cpu, rm, a);
11275 aarch64_get_mem_long_double (cpu, address + 16, & a);
11276 aarch64_set_FP_long_double (cpu, rn, a);
11281 if (wb != NoWriteBack)
11282 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11286 dex_load_store_pair_fp (sim_cpu *cpu)
11288 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11289 instr[29,25] = instruction encoding
11290 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11291 instr[22] = load/store (1=> load)
11292 instr[21,15] = signed, scaled, offset
11295 instr[ 4, 0] = Rm */
11297 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11298 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11302 case 2: store_pair_float (cpu, offset, Post); return;
11303 case 3: load_pair_float (cpu, offset, Post); return;
11304 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11305 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11306 case 6: store_pair_float (cpu, offset, Pre); return;
11307 case 7: load_pair_float (cpu, offset, Pre); return;
11309 case 10: store_pair_double (cpu, offset, Post); return;
11310 case 11: load_pair_double (cpu, offset, Post); return;
11311 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11312 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11313 case 14: store_pair_double (cpu, offset, Pre); return;
11314 case 15: load_pair_double (cpu, offset, Pre); return;
11316 case 18: store_pair_long_double (cpu, offset, Post); return;
11317 case 19: load_pair_long_double (cpu, offset, Post); return;
11318 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11319 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11320 case 22: store_pair_long_double (cpu, offset, Pre); return;
11321 case 23: load_pair_long_double (cpu, offset, Pre); return;
11328 static inline unsigned
11329 vec_reg (unsigned v, unsigned o)
11331 return (v + o) & 0x3F;
11334 /* Load multiple N-element structures to N consecutive registers. */
11336 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11338 int all = INSTR (30, 30);
11339 unsigned size = INSTR (11, 10);
11340 unsigned vd = INSTR (4, 0);
11345 case 0: /* 8-bit operations. */
11347 for (i = 0; i < (16 * N); i++)
11348 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11349 aarch64_get_mem_u8 (cpu, address + i));
11351 for (i = 0; i < (8 * N); i++)
11352 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11353 aarch64_get_mem_u8 (cpu, address + i));
11356 case 1: /* 16-bit operations. */
11358 for (i = 0; i < (8 * N); i++)
11359 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11360 aarch64_get_mem_u16 (cpu, address + i * 2));
11362 for (i = 0; i < (4 * N); i++)
11363 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11364 aarch64_get_mem_u16 (cpu, address + i * 2));
11367 case 2: /* 32-bit operations. */
11369 for (i = 0; i < (4 * N); i++)
11370 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11371 aarch64_get_mem_u32 (cpu, address + i * 4));
11373 for (i = 0; i < (2 * N); i++)
11374 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11375 aarch64_get_mem_u32 (cpu, address + i * 4));
11378 case 3: /* 64-bit operations. */
11380 for (i = 0; i < (2 * N); i++)
11381 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11382 aarch64_get_mem_u64 (cpu, address + i * 8));
11384 for (i = 0; i < N; i++)
11385 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11386 aarch64_get_mem_u64 (cpu, address + i * 8));
11391 /* LD4: load multiple 4-element to four consecutive registers. */
11393 LD4 (sim_cpu *cpu, uint64_t address)
11395 vec_load (cpu, address, 4);
11398 /* LD3: load multiple 3-element structures to three consecutive registers. */
11400 LD3 (sim_cpu *cpu, uint64_t address)
11402 vec_load (cpu, address, 3);
11405 /* LD2: load multiple 2-element structures to two consecutive registers. */
11407 LD2 (sim_cpu *cpu, uint64_t address)
11409 vec_load (cpu, address, 2);
11412 /* Load multiple 1-element structures into one register. */
11414 LD1_1 (sim_cpu *cpu, uint64_t address)
11416 int all = INSTR (30, 30);
11417 unsigned size = INSTR (11, 10);
11418 unsigned vd = INSTR (4, 0);
11424 /* LD1 {Vd.16b}, addr, #16 */
11425 /* LD1 {Vd.8b}, addr, #8 */
11426 for (i = 0; i < (all ? 16 : 8); i++)
11427 aarch64_set_vec_u8 (cpu, vd, i,
11428 aarch64_get_mem_u8 (cpu, address + i));
11432 /* LD1 {Vd.8h}, addr, #16 */
11433 /* LD1 {Vd.4h}, addr, #8 */
11434 for (i = 0; i < (all ? 8 : 4); i++)
11435 aarch64_set_vec_u16 (cpu, vd, i,
11436 aarch64_get_mem_u16 (cpu, address + i * 2));
11440 /* LD1 {Vd.4s}, addr, #16 */
11441 /* LD1 {Vd.2s}, addr, #8 */
11442 for (i = 0; i < (all ? 4 : 2); i++)
11443 aarch64_set_vec_u32 (cpu, vd, i,
11444 aarch64_get_mem_u32 (cpu, address + i * 4));
11448 /* LD1 {Vd.2d}, addr, #16 */
11449 /* LD1 {Vd.1d}, addr, #8 */
11450 for (i = 0; i < (all ? 2 : 1); i++)
11451 aarch64_set_vec_u64 (cpu, vd, i,
11452 aarch64_get_mem_u64 (cpu, address + i * 8));
11457 /* Load multiple 1-element structures into two registers. */
11459 LD1_2 (sim_cpu *cpu, uint64_t address)
11461 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11462 So why have two different instructions ? There must be something
11463 wrong somewhere. */
11464 vec_load (cpu, address, 2);
11467 /* Load multiple 1-element structures into three registers. */
11469 LD1_3 (sim_cpu *cpu, uint64_t address)
11471 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11472 So why have two different instructions ? There must be something
11473 wrong somewhere. */
11474 vec_load (cpu, address, 3);
11477 /* Load multiple 1-element structures into four registers. */
11479 LD1_4 (sim_cpu *cpu, uint64_t address)
11481 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11482 So why have two different instructions ? There must be something
11483 wrong somewhere. */
11484 vec_load (cpu, address, 4);
11487 /* Store multiple N-element structures to N consecutive registers. */
11489 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11491 int all = INSTR (30, 30);
11492 unsigned size = INSTR (11, 10);
11493 unsigned vd = INSTR (4, 0);
11498 case 0: /* 8-bit operations. */
11500 for (i = 0; i < (16 * N); i++)
11503 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11505 for (i = 0; i < (8 * N); i++)
11508 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11511 case 1: /* 16-bit operations. */
11513 for (i = 0; i < (8 * N); i++)
11514 aarch64_set_mem_u16
11515 (cpu, address + i * 2,
11516 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11518 for (i = 0; i < (4 * N); i++)
11519 aarch64_set_mem_u16
11520 (cpu, address + i * 2,
11521 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11524 case 2: /* 32-bit operations. */
11526 for (i = 0; i < (4 * N); i++)
11527 aarch64_set_mem_u32
11528 (cpu, address + i * 4,
11529 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11531 for (i = 0; i < (2 * N); i++)
11532 aarch64_set_mem_u32
11533 (cpu, address + i * 4,
11534 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11537 case 3: /* 64-bit operations. */
11539 for (i = 0; i < (2 * N); i++)
11540 aarch64_set_mem_u64
11541 (cpu, address + i * 8,
11542 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11544 for (i = 0; i < N; i++)
11545 aarch64_set_mem_u64
11546 (cpu, address + i * 8,
11547 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11552 /* Store multiple 4-element structure to four consecutive registers. */
11554 ST4 (sim_cpu *cpu, uint64_t address)
11556 vec_store (cpu, address, 4);
11559 /* Store multiple 3-element structures to three consecutive registers. */
11561 ST3 (sim_cpu *cpu, uint64_t address)
11563 vec_store (cpu, address, 3);
11566 /* Store multiple 2-element structures to two consecutive registers. */
11568 ST2 (sim_cpu *cpu, uint64_t address)
11570 vec_store (cpu, address, 2);
11573 /* Store multiple 1-element structures into one register. */
11575 ST1_1 (sim_cpu *cpu, uint64_t address)
11577 int all = INSTR (30, 30);
11578 unsigned size = INSTR (11, 10);
11579 unsigned vd = INSTR (4, 0);
11585 for (i = 0; i < (all ? 16 : 8); i++)
11586 aarch64_set_mem_u8 (cpu, address + i,
11587 aarch64_get_vec_u8 (cpu, vd, i));
11591 for (i = 0; i < (all ? 8 : 4); i++)
11592 aarch64_set_mem_u16 (cpu, address + i * 2,
11593 aarch64_get_vec_u16 (cpu, vd, i));
11597 for (i = 0; i < (all ? 4 : 2); i++)
11598 aarch64_set_mem_u32 (cpu, address + i * 4,
11599 aarch64_get_vec_u32 (cpu, vd, i));
11603 for (i = 0; i < (all ? 2 : 1); i++)
11604 aarch64_set_mem_u64 (cpu, address + i * 8,
11605 aarch64_get_vec_u64 (cpu, vd, i));
11610 /* Store multiple 1-element structures into two registers. */
11612 ST1_2 (sim_cpu *cpu, uint64_t address)
11614 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11615 So why have two different instructions ? There must be
11616 something wrong somewhere. */
11617 vec_store (cpu, address, 2);
11620 /* Store multiple 1-element structures into three registers. */
11622 ST1_3 (sim_cpu *cpu, uint64_t address)
11624 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11625 So why have two different instructions ? There must be
11626 something wrong somewhere. */
11627 vec_store (cpu, address, 3);
11630 /* Store multiple 1-element structures into four registers. */
11632 ST1_4 (sim_cpu *cpu, uint64_t address)
11634 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11635 So why have two different instructions ? There must be
11636 something wrong somewhere. */
11637 vec_store (cpu, address, 4);
11640 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11643 switch (INSTR (15, 14)) \
11646 lane = (full << 3) | (s << 2) | size; \
11651 if ((size & 1) == 1) \
11653 lane = (full << 2) | (s << 1) | (size >> 1); \
11658 if ((size & 2) == 2) \
11661 if ((size & 1) == 0) \
11663 lane = (full << 1) | s; \
11681 /* Load single structure into one lane of N registers. */
11683 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11686 instr[30] = element selector 0=>half, 1=>all elements
11687 instr[29,24] = 00 1101
11688 instr[23] = 0=>simple, 1=>post
11690 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11691 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11692 11111 (immediate post inc)
11693 instr[15,13] = opcode
11694 instr[12] = S, used for lane number
11695 instr[11,10] = size, also used for lane number
11696 instr[9,5] = address
11699 unsigned full = INSTR (30, 30);
11700 unsigned vd = INSTR (4, 0);
11701 unsigned size = INSTR (11, 10);
11702 unsigned s = INSTR (12, 12);
11703 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11707 NYI_assert (29, 24, 0x0D);
11708 NYI_assert (22, 22, 1);
11710 /* Compute the lane number first (using size), and then compute size. */
11711 LDn_STn_SINGLE_LANE_AND_SIZE ();
11713 for (i = 0; i < nregs; i++)
11718 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11719 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11725 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11726 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11732 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11733 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11739 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11740 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11746 /* Store single structure from one lane from N registers. */
11748 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11751 instr[30] = element selector 0=>half, 1=>all elements
11752 instr[29,24] = 00 1101
11753 instr[23] = 0=>simple, 1=>post
11755 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11756 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11757 11111 (immediate post inc)
11758 instr[15,13] = opcode
11759 instr[12] = S, used for lane number
11760 instr[11,10] = size, also used for lane number
11761 instr[9,5] = address
11764 unsigned full = INSTR (30, 30);
11765 unsigned vd = INSTR (4, 0);
11766 unsigned size = INSTR (11, 10);
11767 unsigned s = INSTR (12, 12);
11768 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11772 NYI_assert (29, 24, 0x0D);
11773 NYI_assert (22, 22, 0);
11775 /* Compute the lane number first (using size), and then compute size. */
11776 LDn_STn_SINGLE_LANE_AND_SIZE ();
11778 for (i = 0; i < nregs; i++)
11783 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11784 aarch64_set_mem_u8 (cpu, address + i, val);
11790 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11791 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11797 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11798 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11804 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11805 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11811 /* Load single structure into all lanes of N registers. */
11813 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11816 instr[30] = element selector 0=>half, 1=>all elements
11817 instr[29,24] = 00 1101
11818 instr[23] = 0=>simple, 1=>post
11820 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11821 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11822 11111 (immediate post inc)
11824 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11826 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11827 10=> word(s), 11=> double(d)
11828 instr[9,5] = address
11831 unsigned full = INSTR (30, 30);
11832 unsigned vd = INSTR (4, 0);
11833 unsigned size = INSTR (11, 10);
11834 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11837 NYI_assert (29, 24, 0x0D);
11838 NYI_assert (22, 22, 1);
11839 NYI_assert (15, 14, 3);
11840 NYI_assert (12, 12, 0);
11842 for (n = 0; n < nregs; n++)
11847 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11848 for (i = 0; i < (full ? 16 : 8); i++)
11849 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11855 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11856 for (i = 0; i < (full ? 8 : 4); i++)
11857 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11863 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11864 for (i = 0; i < (full ? 4 : 2); i++)
11865 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11871 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11872 for (i = 0; i < (full ? 2 : 1); i++)
11873 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11883 do_vec_load_store (sim_cpu *cpu)
11885 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11888 instr[30] = element selector 0=>half, 1=>all elements
11889 instr[29,25] = 00110
11890 instr[24] = 0=>multiple struct, 1=>single struct
11891 instr[23] = 0=>simple, 1=>post
11892 instr[22] = 0=>store, 1=>load
11893 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11894 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11895 11111 (immediate post inc)
11896 instr[15,12] = elements and destinations. eg for load:
11897 0000=>LD4 => load multiple 4-element to
11898 four consecutive registers
11899 0100=>LD3 => load multiple 3-element to
11900 three consecutive registers
11901 1000=>LD2 => load multiple 2-element to
11902 two consecutive registers
11903 0010=>LD1 => load multiple 1-element to
11904 four consecutive registers
11905 0110=>LD1 => load multiple 1-element to
11906 three consecutive registers
11907 1010=>LD1 => load multiple 1-element to
11908 two consecutive registers
11909 0111=>LD1 => load multiple 1-element to
11913 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11914 10=> word(s), 11=> double(d)
11915 instr[9,5] = Vn, can be SP
11925 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11928 single = INSTR (24, 24);
11929 post = INSTR (23, 23);
11930 load = INSTR (22, 22);
11931 type = INSTR (15, 12);
11933 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11935 if (! single && INSTR (21, 21) != 0)
11940 unsigned vm = INSTR (20, 16);
11944 unsigned sizeof_operation;
11948 if ((type >= 0) && (type <= 11))
11950 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11951 switch (INSTR (15, 14))
11954 sizeof_operation = nregs * 1;
11957 sizeof_operation = nregs * 2;
11960 if (INSTR (10, 10) == 0)
11961 sizeof_operation = nregs * 4;
11963 sizeof_operation = nregs * 8;
11969 else if (type == 0xC)
11971 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11972 sizeof_operation <<= INSTR (11, 10);
11974 else if (type == 0xE)
11976 sizeof_operation = INSTR (21, 21) ? 4 : 3;
11977 sizeof_operation <<= INSTR (11, 10);
11986 case 0: sizeof_operation = 32; break;
11987 case 4: sizeof_operation = 24; break;
11988 case 8: sizeof_operation = 16; break;
11991 /* One register, immediate offset variant. */
11992 sizeof_operation = 8;
11996 /* Two registers, immediate offset variant. */
11997 sizeof_operation = 16;
12001 /* Three registers, immediate offset variant. */
12002 sizeof_operation = 24;
12006 /* Four registers, immediate offset variant. */
12007 sizeof_operation = 32;
12014 if (INSTR (30, 30))
12015 sizeof_operation *= 2;
12018 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12021 aarch64_set_reg_u64 (cpu, vn, SP_OK,
12022 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12026 NYI_assert (20, 16, 0);
12033 if ((type >= 0) && (type <= 11))
12034 do_vec_LDn_single (cpu, address);
12035 else if ((type == 0xC) || (type == 0xE))
12036 do_vec_LDnR (cpu, address);
12043 if ((type >= 0) && (type <= 11))
12045 do_vec_STn_single (cpu, address);
12056 case 0: LD4 (cpu, address); return;
12057 case 4: LD3 (cpu, address); return;
12058 case 8: LD2 (cpu, address); return;
12059 case 2: LD1_4 (cpu, address); return;
12060 case 6: LD1_3 (cpu, address); return;
12061 case 10: LD1_2 (cpu, address); return;
12062 case 7: LD1_1 (cpu, address); return;
12072 case 0: ST4 (cpu, address); return;
12073 case 4: ST3 (cpu, address); return;
12074 case 8: ST2 (cpu, address); return;
12075 case 2: ST1_4 (cpu, address); return;
12076 case 6: ST1_3 (cpu, address); return;
12077 case 10: ST1_2 (cpu, address); return;
12078 case 7: ST1_1 (cpu, address); return;
12085 dexLdSt (sim_cpu *cpu)
12087 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12088 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12089 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12090 bits [29,28:26] of a LS are the secondary dispatch vector. */
12091 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12096 dexLoadExclusive (cpu); return;
12100 dexLoadLiteral (cpu); return;
12104 dexLoadOther (cpu); return;
12106 case LS_ADVSIMD_001:
12107 do_vec_load_store (cpu); return;
12110 dex_load_store_pair_gr (cpu); return;
12113 dex_load_store_pair_fp (cpu); return;
12116 /* Should never reach here. */
12121 /* Specific decode and execute for group Data Processing Register. */
12124 dexLogicalShiftedRegister (sim_cpu *cpu)
12126 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12128 instr[28:24] = 01010
12129 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12132 instr[15,10] = count : must be 0xxxxx for 32 bit
12136 uint32_t size = INSTR (31, 31);
12137 Shift shiftType = INSTR (23, 22);
12138 uint32_t count = INSTR (15, 10);
12140 /* 32 bit operations must have count[5] = 0.
12141 or else we have an UNALLOC. */
12142 if (size == 0 && uimm (count, 5, 5))
12145 /* Dispatch on size:op:N. */
12146 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12148 case 0: and32_shift (cpu, shiftType, count); return;
12149 case 1: bic32_shift (cpu, shiftType, count); return;
12150 case 2: orr32_shift (cpu, shiftType, count); return;
12151 case 3: orn32_shift (cpu, shiftType, count); return;
12152 case 4: eor32_shift (cpu, shiftType, count); return;
12153 case 5: eon32_shift (cpu, shiftType, count); return;
12154 case 6: ands32_shift (cpu, shiftType, count); return;
12155 case 7: bics32_shift (cpu, shiftType, count); return;
12156 case 8: and64_shift (cpu, shiftType, count); return;
12157 case 9: bic64_shift (cpu, shiftType, count); return;
12158 case 10:orr64_shift (cpu, shiftType, count); return;
12159 case 11:orn64_shift (cpu, shiftType, count); return;
12160 case 12:eor64_shift (cpu, shiftType, count); return;
12161 case 13:eon64_shift (cpu, shiftType, count); return;
12162 case 14:ands64_shift (cpu, shiftType, count); return;
12163 case 15:bics64_shift (cpu, shiftType, count); return;
12167 /* 32 bit conditional select. */
12169 csel32 (sim_cpu *cpu, CondCode cc)
12171 unsigned rm = INSTR (20, 16);
12172 unsigned rn = INSTR (9, 5);
12173 unsigned rd = INSTR (4, 0);
12175 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12176 testConditionCode (cpu, cc)
12177 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12178 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12181 /* 64 bit conditional select. */
12183 csel64 (sim_cpu *cpu, CondCode cc)
12185 unsigned rm = INSTR (20, 16);
12186 unsigned rn = INSTR (9, 5);
12187 unsigned rd = INSTR (4, 0);
12189 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12190 testConditionCode (cpu, cc)
12191 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12192 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12195 /* 32 bit conditional increment. */
12197 csinc32 (sim_cpu *cpu, CondCode cc)
12199 unsigned rm = INSTR (20, 16);
12200 unsigned rn = INSTR (9, 5);
12201 unsigned rd = INSTR (4, 0);
12203 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12204 testConditionCode (cpu, cc)
12205 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12206 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12209 /* 64 bit conditional increment. */
12211 csinc64 (sim_cpu *cpu, CondCode cc)
12213 unsigned rm = INSTR (20, 16);
12214 unsigned rn = INSTR (9, 5);
12215 unsigned rd = INSTR (4, 0);
12217 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12218 testConditionCode (cpu, cc)
12219 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12220 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12223 /* 32 bit conditional invert. */
12225 csinv32 (sim_cpu *cpu, CondCode cc)
12227 unsigned rm = INSTR (20, 16);
12228 unsigned rn = INSTR (9, 5);
12229 unsigned rd = INSTR (4, 0);
12231 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12232 testConditionCode (cpu, cc)
12233 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12234 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12237 /* 64 bit conditional invert. */
12239 csinv64 (sim_cpu *cpu, CondCode cc)
12241 unsigned rm = INSTR (20, 16);
12242 unsigned rn = INSTR (9, 5);
12243 unsigned rd = INSTR (4, 0);
12245 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12246 testConditionCode (cpu, cc)
12247 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12248 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12251 /* 32 bit conditional negate. */
12253 csneg32 (sim_cpu *cpu, CondCode cc)
12255 unsigned rm = INSTR (20, 16);
12256 unsigned rn = INSTR (9, 5);
12257 unsigned rd = INSTR (4, 0);
12259 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12260 testConditionCode (cpu, cc)
12261 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12262 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12265 /* 64 bit conditional negate. */
12267 csneg64 (sim_cpu *cpu, CondCode cc)
12269 unsigned rm = INSTR (20, 16);
12270 unsigned rn = INSTR (9, 5);
12271 unsigned rd = INSTR (4, 0);
12273 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12274 testConditionCode (cpu, cc)
12275 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12276 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12280 dexCondSelect (sim_cpu *cpu)
12282 /* instr[28,21] = 11011011
12283 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12284 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12285 100 ==> CSINV, 101 ==> CSNEG,
12287 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12288 instr[15,12] = cond
12289 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12291 CondCode cc = INSTR (15, 12);
12292 uint32_t S = INSTR (29, 29);
12293 uint32_t op2 = INSTR (11, 10);
12301 switch ((INSTR (31, 30) << 1) | op2)
12303 case 0: csel32 (cpu, cc); return;
12304 case 1: csinc32 (cpu, cc); return;
12305 case 2: csinv32 (cpu, cc); return;
12306 case 3: csneg32 (cpu, cc); return;
12307 case 4: csel64 (cpu, cc); return;
12308 case 5: csinc64 (cpu, cc); return;
12309 case 6: csinv64 (cpu, cc); return;
12310 case 7: csneg64 (cpu, cc); return;
12314 /* Some helpers for counting leading 1 or 0 bits. */
12316 /* Counts the number of leading bits which are the same
12317 in a 32 bit value in the range 1 to 32. */
12319 leading32 (uint32_t value)
12321 int32_t mask= 0xffff0000;
12322 uint32_t count= 16; /* Counts number of bits set in mask. */
12323 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12324 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12326 while (lo + 1 < hi)
12328 int32_t test = (value & mask);
12330 if (test == 0 || test == mask)
12333 count = (lo + hi) / 2;
12334 mask >>= (count - lo);
12339 count = (lo + hi) / 2;
12340 mask <<= hi - count;
12349 test = (value & mask);
12351 if (test == 0 || test == mask)
12360 /* Counts the number of leading bits which are the same
12361 in a 64 bit value in the range 1 to 64. */
12363 leading64 (uint64_t value)
12365 int64_t mask= 0xffffffff00000000LL;
12366 uint64_t count = 32; /* Counts number of bits set in mask. */
12367 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12368 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12370 while (lo + 1 < hi)
12372 int64_t test = (value & mask);
12374 if (test == 0 || test == mask)
12377 count = (lo + hi) / 2;
12378 mask >>= (count - lo);
12383 count = (lo + hi) / 2;
12384 mask <<= hi - count;
12393 test = (value & mask);
12395 if (test == 0 || test == mask)
12404 /* Bit operations. */
12405 /* N.B register args may not be SP. */
12407 /* 32 bit count leading sign bits. */
12409 cls32 (sim_cpu *cpu)
12411 unsigned rn = INSTR (9, 5);
12412 unsigned rd = INSTR (4, 0);
12414 /* N.B. the result needs to exclude the leading bit. */
12415 aarch64_set_reg_u64
12416 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12419 /* 64 bit count leading sign bits. */
12421 cls64 (sim_cpu *cpu)
12423 unsigned rn = INSTR (9, 5);
12424 unsigned rd = INSTR (4, 0);
12426 /* N.B. the result needs to exclude the leading bit. */
12427 aarch64_set_reg_u64
12428 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12431 /* 32 bit count leading zero bits. */
12433 clz32 (sim_cpu *cpu)
12435 unsigned rn = INSTR (9, 5);
12436 unsigned rd = INSTR (4, 0);
12437 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12439 /* if the sign (top) bit is set then the count is 0. */
12440 if (pick32 (value, 31, 31))
12441 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12443 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12446 /* 64 bit count leading zero bits. */
12448 clz64 (sim_cpu *cpu)
12450 unsigned rn = INSTR (9, 5);
12451 unsigned rd = INSTR (4, 0);
12452 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12454 /* if the sign (top) bit is set then the count is 0. */
12455 if (pick64 (value, 63, 63))
12456 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12458 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12461 /* 32 bit reverse bits. */
12463 rbit32 (sim_cpu *cpu)
12465 unsigned rn = INSTR (9, 5);
12466 unsigned rd = INSTR (4, 0);
12467 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12468 uint32_t result = 0;
12471 for (i = 0; i < 32; i++)
12474 result |= (value & 1);
12477 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12480 /* 64 bit reverse bits. */
12482 rbit64 (sim_cpu *cpu)
12484 unsigned rn = INSTR (9, 5);
12485 unsigned rd = INSTR (4, 0);
12486 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12487 uint64_t result = 0;
12490 for (i = 0; i < 64; i++)
12493 result |= (value & 1UL);
12496 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12499 /* 32 bit reverse bytes. */
12501 rev32 (sim_cpu *cpu)
12503 unsigned rn = INSTR (9, 5);
12504 unsigned rd = INSTR (4, 0);
12505 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12506 uint32_t result = 0;
12509 for (i = 0; i < 4; i++)
12512 result |= (value & 0xff);
12515 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12518 /* 64 bit reverse bytes. */
12520 rev64 (sim_cpu *cpu)
12522 unsigned rn = INSTR (9, 5);
12523 unsigned rd = INSTR (4, 0);
12524 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12525 uint64_t result = 0;
12528 for (i = 0; i < 8; i++)
12531 result |= (value & 0xffULL);
12534 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12537 /* 32 bit reverse shorts. */
12538 /* N.B.this reverses the order of the bytes in each half word. */
12540 revh32 (sim_cpu *cpu)
12542 unsigned rn = INSTR (9, 5);
12543 unsigned rd = INSTR (4, 0);
12544 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12545 uint32_t result = 0;
12548 for (i = 0; i < 2; i++)
12551 result |= (value & 0x00ff00ff);
12554 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12557 /* 64 bit reverse shorts. */
12558 /* N.B.this reverses the order of the bytes in each half word. */
12560 revh64 (sim_cpu *cpu)
12562 unsigned rn = INSTR (9, 5);
12563 unsigned rd = INSTR (4, 0);
12564 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12565 uint64_t result = 0;
12568 for (i = 0; i < 2; i++)
12571 result |= (value & 0x00ff00ff00ff00ffULL);
12574 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12578 dexDataProc1Source (sim_cpu *cpu)
12581 instr[28,21] = 111010110
12582 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12583 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12584 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12585 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12586 000010 ==> REV, 000011 ==> UNALLOC
12587 000100 ==> CLZ, 000101 ==> CLS
12589 instr[9,5] = rn : may not be SP
12590 instr[4,0] = rd : may not be SP. */
12592 uint32_t S = INSTR (29, 29);
12593 uint32_t opcode2 = INSTR (20, 16);
12594 uint32_t opcode = INSTR (15, 10);
12595 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12608 case 0: rbit32 (cpu); return;
12609 case 1: revh32 (cpu); return;
12610 case 2: rev32 (cpu); return;
12611 case 4: clz32 (cpu); return;
12612 case 5: cls32 (cpu); return;
12613 case 8: rbit64 (cpu); return;
12614 case 9: revh64 (cpu); return;
12615 case 10:rev32 (cpu); return;
12616 case 11:rev64 (cpu); return;
12617 case 12:clz64 (cpu); return;
12618 case 13:cls64 (cpu); return;
12619 default: HALT_UNALLOC;
12624 Shifts by count supplied in register.
12625 N.B register args may not be SP.
12626 These all use the shifted auxiliary function for
12627 simplicity and clarity. Writing the actual shift
12628 inline would avoid a branch and so be faster but
12629 would also necessitate getting signs right. */
12631 /* 32 bit arithmetic shift right. */
12633 asrv32 (sim_cpu *cpu)
12635 unsigned rm = INSTR (20, 16);
12636 unsigned rn = INSTR (9, 5);
12637 unsigned rd = INSTR (4, 0);
12639 aarch64_set_reg_u64
12641 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12642 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12645 /* 64 bit arithmetic shift right. */
12647 asrv64 (sim_cpu *cpu)
12649 unsigned rm = INSTR (20, 16);
12650 unsigned rn = INSTR (9, 5);
12651 unsigned rd = INSTR (4, 0);
12653 aarch64_set_reg_u64
12655 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12656 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12659 /* 32 bit logical shift left. */
12661 lslv32 (sim_cpu *cpu)
12663 unsigned rm = INSTR (20, 16);
12664 unsigned rn = INSTR (9, 5);
12665 unsigned rd = INSTR (4, 0);
12667 aarch64_set_reg_u64
12669 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12670 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12673 /* 64 bit arithmetic shift left. */
12675 lslv64 (sim_cpu *cpu)
12677 unsigned rm = INSTR (20, 16);
12678 unsigned rn = INSTR (9, 5);
12679 unsigned rd = INSTR (4, 0);
12681 aarch64_set_reg_u64
12683 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12684 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12687 /* 32 bit logical shift right. */
12689 lsrv32 (sim_cpu *cpu)
12691 unsigned rm = INSTR (20, 16);
12692 unsigned rn = INSTR (9, 5);
12693 unsigned rd = INSTR (4, 0);
12695 aarch64_set_reg_u64
12697 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12698 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12701 /* 64 bit logical shift right. */
12703 lsrv64 (sim_cpu *cpu)
12705 unsigned rm = INSTR (20, 16);
12706 unsigned rn = INSTR (9, 5);
12707 unsigned rd = INSTR (4, 0);
12709 aarch64_set_reg_u64
12711 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12712 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12715 /* 32 bit rotate right. */
12717 rorv32 (sim_cpu *cpu)
12719 unsigned rm = INSTR (20, 16);
12720 unsigned rn = INSTR (9, 5);
12721 unsigned rd = INSTR (4, 0);
12723 aarch64_set_reg_u64
12725 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12726 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12729 /* 64 bit rotate right. */
12731 rorv64 (sim_cpu *cpu)
12733 unsigned rm = INSTR (20, 16);
12734 unsigned rn = INSTR (9, 5);
12735 unsigned rd = INSTR (4, 0);
12737 aarch64_set_reg_u64
12739 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12740 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12746 /* 32 bit signed divide. */
12748 cpuiv32 (sim_cpu *cpu)
12750 unsigned rm = INSTR (20, 16);
12751 unsigned rn = INSTR (9, 5);
12752 unsigned rd = INSTR (4, 0);
12753 /* N.B. the pseudo-code does the divide using 64 bit data. */
12754 /* TODO : check that this rounds towards zero as required. */
12755 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12756 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12758 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12759 divisor ? ((int32_t) (dividend / divisor)) : 0);
12762 /* 64 bit signed divide. */
12764 cpuiv64 (sim_cpu *cpu)
12766 unsigned rm = INSTR (20, 16);
12767 unsigned rn = INSTR (9, 5);
12768 unsigned rd = INSTR (4, 0);
12770 /* TODO : check that this rounds towards zero as required. */
12771 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12773 aarch64_set_reg_s64
12775 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12778 /* 32 bit unsigned divide. */
12780 udiv32 (sim_cpu *cpu)
12782 unsigned rm = INSTR (20, 16);
12783 unsigned rn = INSTR (9, 5);
12784 unsigned rd = INSTR (4, 0);
12786 /* N.B. the pseudo-code does the divide using 64 bit data. */
12787 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12788 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12790 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12791 divisor ? (uint32_t) (dividend / divisor) : 0);
12794 /* 64 bit unsigned divide. */
12796 udiv64 (sim_cpu *cpu)
12798 unsigned rm = INSTR (20, 16);
12799 unsigned rn = INSTR (9, 5);
12800 unsigned rd = INSTR (4, 0);
12802 /* TODO : check that this rounds towards zero as required. */
12803 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12805 aarch64_set_reg_u64
12807 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12811 dexDataProc2Source (sim_cpu *cpu)
12813 /* assert instr[30] == 0
12814 instr[28,21] == 11010110
12815 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12816 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12817 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12818 001000 ==> LSLV, 001001 ==> LSRV
12819 001010 ==> ASRV, 001011 ==> RORV
12823 uint32_t S = INSTR (29, 29);
12824 uint32_t opcode = INSTR (15, 10);
12832 dispatch = ( (INSTR (31, 31) << 3)
12833 | (uimm (opcode, 3, 3) << 2)
12834 | uimm (opcode, 1, 0));
12837 case 2: udiv32 (cpu); return;
12838 case 3: cpuiv32 (cpu); return;
12839 case 4: lslv32 (cpu); return;
12840 case 5: lsrv32 (cpu); return;
12841 case 6: asrv32 (cpu); return;
12842 case 7: rorv32 (cpu); return;
12843 case 10: udiv64 (cpu); return;
12844 case 11: cpuiv64 (cpu); return;
12845 case 12: lslv64 (cpu); return;
12846 case 13: lsrv64 (cpu); return;
12847 case 14: asrv64 (cpu); return;
12848 case 15: rorv64 (cpu); return;
12849 default: HALT_UNALLOC;
12856 /* 32 bit multiply and add. */
12858 madd32 (sim_cpu *cpu)
12860 unsigned rm = INSTR (20, 16);
12861 unsigned ra = INSTR (14, 10);
12862 unsigned rn = INSTR (9, 5);
12863 unsigned rd = INSTR (4, 0);
12865 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12866 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12867 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12868 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12869 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12872 /* 64 bit multiply and add. */
12874 madd64 (sim_cpu *cpu)
12876 unsigned rm = INSTR (20, 16);
12877 unsigned ra = INSTR (14, 10);
12878 unsigned rn = INSTR (9, 5);
12879 unsigned rd = INSTR (4, 0);
12881 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12882 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12883 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12884 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12885 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12888 /* 32 bit multiply and sub. */
12890 msub32 (sim_cpu *cpu)
12892 unsigned rm = INSTR (20, 16);
12893 unsigned ra = INSTR (14, 10);
12894 unsigned rn = INSTR (9, 5);
12895 unsigned rd = INSTR (4, 0);
12897 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12898 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12899 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12900 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12901 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12904 /* 64 bit multiply and sub. */
12906 msub64 (sim_cpu *cpu)
12908 unsigned rm = INSTR (20, 16);
12909 unsigned ra = INSTR (14, 10);
12910 unsigned rn = INSTR (9, 5);
12911 unsigned rd = INSTR (4, 0);
12913 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12914 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12915 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12916 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12917 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12920 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12922 smaddl (sim_cpu *cpu)
12924 unsigned rm = INSTR (20, 16);
12925 unsigned ra = INSTR (14, 10);
12926 unsigned rn = INSTR (9, 5);
12927 unsigned rd = INSTR (4, 0);
12929 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12930 obtain a 64 bit product. */
12931 aarch64_set_reg_s64
12933 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12934 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12935 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12938 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12940 smsubl (sim_cpu *cpu)
12942 unsigned rm = INSTR (20, 16);
12943 unsigned ra = INSTR (14, 10);
12944 unsigned rn = INSTR (9, 5);
12945 unsigned rd = INSTR (4, 0);
12947 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12948 obtain a 64 bit product. */
12949 aarch64_set_reg_s64
12951 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12952 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12953 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12956 /* Integer Multiply/Divide. */
12958 /* First some macros and a helper function. */
12959 /* Macros to test or access elements of 64 bit words. */
12961 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12962 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12963 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12964 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12965 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12966 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12968 /* Offset of sign bit in 64 bit signed integger. */
12969 #define SIGN_SHIFT_U64 63
12970 /* The sign bit itself -- also identifies the minimum negative int value. */
12971 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12972 /* Return true if a 64 bit signed int presented as an unsigned int is the
12973 most negative value. */
12974 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12975 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12976 int has its sign bit set to false. */
12977 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12978 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12979 an unsigned int has its sign bit set or not. */
12980 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12981 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12982 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12984 /* Multiply two 64 bit ints and return.
12985 the hi 64 bits of the 128 bit product. */
12988 mul64hi (uint64_t value1, uint64_t value2)
12990 uint64_t resultmid1;
12992 uint64_t value1_lo = lowWordToU64 (value1);
12993 uint64_t value1_hi = highWordToU64 (value1) ;
12994 uint64_t value2_lo = lowWordToU64 (value2);
12995 uint64_t value2_hi = highWordToU64 (value2);
12997 /* Cross-multiply and collect results. */
12998 uint64_t xproductlo = value1_lo * value2_lo;
12999 uint64_t xproductmid1 = value1_lo * value2_hi;
13000 uint64_t xproductmid2 = value1_hi * value2_lo;
13001 uint64_t xproducthi = value1_hi * value2_hi;
13002 uint64_t carry = 0;
13003 /* Start accumulating 64 bit results. */
13004 /* Drop bottom half of lowest cross-product. */
13005 uint64_t resultmid = xproductlo >> 32;
13006 /* Add in middle products. */
13007 resultmid = resultmid + xproductmid1;
13009 /* Check for overflow. */
13010 if (resultmid < xproductmid1)
13011 /* Carry over 1 into top cross-product. */
13014 resultmid1 = resultmid + xproductmid2;
13016 /* Check for overflow. */
13017 if (resultmid1 < xproductmid2)
13018 /* Carry over 1 into top cross-product. */
13021 /* Drop lowest 32 bits of middle cross-product. */
13022 result = resultmid1 >> 32;
13023 /* Move carry bit to just above middle cross-product highest bit. */
13024 carry = carry << 32;
13026 /* Add top cross-product plus and any carry. */
13027 result += xproducthi + carry;
13032 /* Signed multiply high, source, source2 :
13033 64 bit, dest <-- high 64-bit of result. */
13035 smulh (sim_cpu *cpu)
13039 unsigned rm = INSTR (20, 16);
13040 unsigned rn = INSTR (9, 5);
13041 unsigned rd = INSTR (4, 0);
13042 GReg ra = INSTR (14, 10);
13043 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13044 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13052 /* Convert to unsigned and use the unsigned mul64hi routine
13053 the fix the sign up afterwards. */
13074 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13076 uresult = mul64hi (uvalue1, uvalue2);
13081 /* Multiply 128-bit result by -1, which means highpart gets inverted,
13082 and has carry in added only if low part is 0. */
13084 if ((uvalue1 * uvalue2) == 0)
13088 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13091 /* Unsigned multiply add long -- source, source2 :
13092 32 bit, source3 : 64 bit. */
13094 umaddl (sim_cpu *cpu)
13096 unsigned rm = INSTR (20, 16);
13097 unsigned ra = INSTR (14, 10);
13098 unsigned rn = INSTR (9, 5);
13099 unsigned rd = INSTR (4, 0);
13101 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13102 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13103 obtain a 64 bit product. */
13104 aarch64_set_reg_u64
13106 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13107 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13108 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13111 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13113 umsubl (sim_cpu *cpu)
13115 unsigned rm = INSTR (20, 16);
13116 unsigned ra = INSTR (14, 10);
13117 unsigned rn = INSTR (9, 5);
13118 unsigned rd = INSTR (4, 0);
13120 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13121 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13122 obtain a 64 bit product. */
13123 aarch64_set_reg_u64
13125 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13126 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13127 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13130 /* Unsigned multiply high, source, source2 :
13131 64 bit, dest <-- high 64-bit of result. */
13133 umulh (sim_cpu *cpu)
13135 unsigned rm = INSTR (20, 16);
13136 unsigned rn = INSTR (9, 5);
13137 unsigned rd = INSTR (4, 0);
13138 GReg ra = INSTR (14, 10);
13143 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13144 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13145 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13146 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13150 dexDataProc3Source (sim_cpu *cpu)
13152 /* assert instr[28,24] == 11011. */
13153 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13154 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13155 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13156 instr[15] = o0 : 0/1 ==> ok
13157 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13158 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13159 0100 ==> SMULH, (64 bit only)
13160 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13161 1100 ==> UMULH (64 bit only)
13165 uint32_t size = INSTR (31, 31);
13166 uint32_t op54 = INSTR (30, 29);
13167 uint32_t op31 = INSTR (23, 21);
13168 uint32_t o0 = INSTR (15, 15);
13185 dispatch = (op31 << 1) | o0;
13189 case 0: madd64 (cpu); return;
13190 case 1: msub64 (cpu); return;
13191 case 2: smaddl (cpu); return;
13192 case 3: smsubl (cpu); return;
13193 case 4: smulh (cpu); return;
13194 case 10: umaddl (cpu); return;
13195 case 11: umsubl (cpu); return;
13196 case 12: umulh (cpu); return;
13197 default: HALT_UNALLOC;
13202 dexDPReg (sim_cpu *cpu)
13204 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13205 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13206 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13207 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13211 case DPREG_LOG_000:
13212 case DPREG_LOG_001:
13213 dexLogicalShiftedRegister (cpu); return;
13215 case DPREG_ADDSHF_010:
13216 dexAddSubtractShiftedRegister (cpu); return;
13218 case DPREG_ADDEXT_011:
13219 dexAddSubtractExtendedRegister (cpu); return;
13221 case DPREG_ADDCOND_100:
13223 /* This set bundles a variety of different operations. */
13225 /* 1) add/sub w carry. */
13226 uint32_t mask1 = 0x1FE00000U;
13227 uint32_t val1 = 0x1A000000U;
13228 /* 2) cond compare register/immediate. */
13229 uint32_t mask2 = 0x1FE00000U;
13230 uint32_t val2 = 0x1A400000U;
13231 /* 3) cond select. */
13232 uint32_t mask3 = 0x1FE00000U;
13233 uint32_t val3 = 0x1A800000U;
13234 /* 4) data proc 1/2 source. */
13235 uint32_t mask4 = 0x1FE00000U;
13236 uint32_t val4 = 0x1AC00000U;
13238 if ((aarch64_get_instr (cpu) & mask1) == val1)
13239 dexAddSubtractWithCarry (cpu);
13241 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13244 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13245 dexCondSelect (cpu);
13247 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13249 /* Bit 30 is clear for data proc 2 source
13250 and set for data proc 1 source. */
13251 if (aarch64_get_instr (cpu) & (1U << 30))
13252 dexDataProc1Source (cpu);
13254 dexDataProc2Source (cpu);
13258 /* Should not reach here. */
13264 case DPREG_3SRC_110:
13265 dexDataProc3Source (cpu); return;
13267 case DPREG_UNALLOC_101:
13270 case DPREG_3SRC_111:
13271 dexDataProc3Source (cpu); return;
13274 /* Should never reach here. */
13279 /* Unconditional Branch immediate.
13280 Offset is a PC-relative byte offset in the range +/- 128MiB.
13281 The offset is assumed to be raw from the decode i.e. the
13282 simulator is expected to scale them from word offsets to byte. */
13284 /* Unconditional branch. */
13286 buc (sim_cpu *cpu, int32_t offset)
13288 aarch64_set_next_PC_by_offset (cpu, offset);
13291 static unsigned stack_depth = 0;
13293 /* Unconditional branch and link -- writes return PC to LR. */
13295 bl (sim_cpu *cpu, int32_t offset)
13297 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13298 aarch64_save_LR (cpu);
13299 aarch64_set_next_PC_by_offset (cpu, offset);
13301 if (TRACE_BRANCH_P (cpu))
13305 " %*scall %" PRIx64 " [%s]"
13306 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13307 stack_depth, " ", aarch64_get_next_PC (cpu),
13308 aarch64_get_func (CPU_STATE (cpu),
13309 aarch64_get_next_PC (cpu)),
13310 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13311 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13312 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13317 /* Unconditional Branch register.
13318 Branch/return address is in source register. */
13320 /* Unconditional branch. */
13324 unsigned rn = INSTR (9, 5);
13325 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13326 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13329 /* Unconditional branch and link -- writes return PC to LR. */
13333 unsigned rn = INSTR (9, 5);
13335 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13336 /* The pseudo code in the spec says we update LR before fetching.
13337 the value from the rn. */
13338 aarch64_save_LR (cpu);
13339 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13341 if (TRACE_BRANCH_P (cpu))
13345 " %*scall %" PRIx64 " [%s]"
13346 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13347 stack_depth, " ", aarch64_get_next_PC (cpu),
13348 aarch64_get_func (CPU_STATE (cpu),
13349 aarch64_get_next_PC (cpu)),
13350 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13351 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13352 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13357 /* Return -- assembler will default source to LR this is functionally
13358 equivalent to br but, presumably, unlike br it side effects the
13359 branch predictor. */
13363 unsigned rn = INSTR (9, 5);
13364 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13366 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13367 if (TRACE_BRANCH_P (cpu))
13370 " %*sreturn [result: %" PRIx64 "]",
13371 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13376 /* NOP -- we implement this and call it from the decode in case we
13377 want to intercept it later. */
13382 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13385 /* Data synchronization barrier. */
13390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13393 /* Data memory barrier. */
13398 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13401 /* Instruction synchronization barrier. */
13406 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13410 dexBranchImmediate (sim_cpu *cpu)
13412 /* assert instr[30,26] == 00101
13413 instr[31] ==> 0 == B, 1 == BL
13414 instr[25,0] == imm26 branch offset counted in words. */
13416 uint32_t top = INSTR (31, 31);
13417 /* We have a 26 byte signed word offset which we need to pass to the
13418 execute routine as a signed byte offset. */
13419 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13427 /* Control Flow. */
13429 /* Conditional branch
13431 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13432 a bit position in the range 0 .. 63
13434 cc is a CondCode enum value as pulled out of the decode
13436 N.B. any offset register (source) can only be Xn or Wn. */
13439 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13441 /* The test returns TRUE if CC is met. */
13442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13443 if (testConditionCode (cpu, cc))
13444 aarch64_set_next_PC_by_offset (cpu, offset);
13447 /* 32 bit branch on register non-zero. */
13449 cbnz32 (sim_cpu *cpu, int32_t offset)
13451 unsigned rt = INSTR (4, 0);
13453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13454 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13455 aarch64_set_next_PC_by_offset (cpu, offset);
13458 /* 64 bit branch on register zero. */
13460 cbnz (sim_cpu *cpu, int32_t offset)
13462 unsigned rt = INSTR (4, 0);
13464 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13465 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13466 aarch64_set_next_PC_by_offset (cpu, offset);
13469 /* 32 bit branch on register non-zero. */
13471 cbz32 (sim_cpu *cpu, int32_t offset)
13473 unsigned rt = INSTR (4, 0);
13475 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13476 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13477 aarch64_set_next_PC_by_offset (cpu, offset);
13480 /* 64 bit branch on register zero. */
13482 cbz (sim_cpu *cpu, int32_t offset)
13484 unsigned rt = INSTR (4, 0);
13486 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13487 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13488 aarch64_set_next_PC_by_offset (cpu, offset);
13491 /* Branch on register bit test non-zero -- one size fits all. */
13493 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13495 unsigned rt = INSTR (4, 0);
13497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13498 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13499 aarch64_set_next_PC_by_offset (cpu, offset);
13502 /* Branch on register bit test zero -- one size fits all. */
13504 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13506 unsigned rt = INSTR (4, 0);
13508 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13509 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13510 aarch64_set_next_PC_by_offset (cpu, offset);
13514 dexCompareBranchImmediate (sim_cpu *cpu)
13516 /* instr[30,25] = 01 1010
13517 instr[31] = size : 0 ==> 32, 1 ==> 64
13518 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13519 instr[23,5] = simm19 branch offset counted in words
13522 uint32_t size = INSTR (31, 31);
13523 uint32_t op = INSTR (24, 24);
13524 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13529 cbz32 (cpu, offset);
13531 cbnz32 (cpu, offset);
13538 cbnz (cpu, offset);
13543 dexTestBranchImmediate (sim_cpu *cpu)
13545 /* instr[31] = b5 : bit 5 of test bit idx
13546 instr[30,25] = 01 1011
13547 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13548 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13549 instr[18,5] = simm14 : signed offset counted in words
13550 instr[4,0] = uimm5 */
13552 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13553 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13555 NYI_assert (30, 25, 0x1b);
13557 if (INSTR (24, 24) == 0)
13558 tbz (cpu, pos, offset);
13560 tbnz (cpu, pos, offset);
13564 dexCondBranchImmediate (sim_cpu *cpu)
13566 /* instr[31,25] = 010 1010
13567 instr[24] = op1; op => 00 ==> B.cond
13568 instr[23,5] = simm19 : signed offset counted in words
13570 instr[3,0] = cond */
13573 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13575 NYI_assert (31, 25, 0x2a);
13580 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13582 bcc (cpu, offset, INSTR (3, 0));
13586 dexBranchRegister (sim_cpu *cpu)
13588 /* instr[31,25] = 110 1011
13589 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13590 instr[20,16] = op2 : must be 11111
13591 instr[15,10] = op3 : must be 000000
13592 instr[4,0] = op2 : must be 11111. */
13594 uint32_t op = INSTR (24, 21);
13595 uint32_t op2 = INSTR (20, 16);
13596 uint32_t op3 = INSTR (15, 10);
13597 uint32_t op4 = INSTR (4, 0);
13599 NYI_assert (31, 25, 0x6b);
13601 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13615 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13616 /* anything else is unallocated. */
13617 uint32_t rn = INSTR (4, 0);
13622 if (op == 4 || op == 5)
13629 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13630 but this may not be available. So instead we define the values we need
13632 #define AngelSVC_Reason_Open 0x01
13633 #define AngelSVC_Reason_Close 0x02
13634 #define AngelSVC_Reason_Write 0x05
13635 #define AngelSVC_Reason_Read 0x06
13636 #define AngelSVC_Reason_IsTTY 0x09
13637 #define AngelSVC_Reason_Seek 0x0A
13638 #define AngelSVC_Reason_FLen 0x0C
13639 #define AngelSVC_Reason_Remove 0x0E
13640 #define AngelSVC_Reason_Rename 0x0F
13641 #define AngelSVC_Reason_Clock 0x10
13642 #define AngelSVC_Reason_Time 0x11
13643 #define AngelSVC_Reason_System 0x12
13644 #define AngelSVC_Reason_Errno 0x13
13645 #define AngelSVC_Reason_GetCmdLine 0x15
13646 #define AngelSVC_Reason_HeapInfo 0x16
13647 #define AngelSVC_Reason_ReportException 0x18
13648 #define AngelSVC_Reason_Elapsed 0x30
13652 handle_halt (sim_cpu *cpu, uint32_t val)
13654 uint64_t result = 0;
13656 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13659 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13660 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13661 sim_stopped, SIM_SIGTRAP);
13664 /* We have encountered an Angel SVC call. See if we can process it. */
13665 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13667 case AngelSVC_Reason_HeapInfo:
13669 /* Get the values. */
13670 uint64_t stack_top = aarch64_get_stack_start (cpu);
13671 uint64_t heap_base = aarch64_get_heap_start (cpu);
13673 /* Get the pointer */
13674 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13675 ptr = aarch64_get_mem_u64 (cpu, ptr);
13677 /* Fill in the memory block. */
13678 /* Start addr of heap. */
13679 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13680 /* End addr of heap. */
13681 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13682 /* Lowest stack addr. */
13683 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13684 /* Initial stack addr. */
13685 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13687 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13691 case AngelSVC_Reason_Open:
13693 /* Get the pointer */
13694 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13695 /* FIXME: For now we just assume that we will only be asked
13696 to open the standard file descriptors. */
13700 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13704 case AngelSVC_Reason_Close:
13706 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13707 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13712 case AngelSVC_Reason_Errno:
13714 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13717 case AngelSVC_Reason_Clock:
13719 #ifdef CLOCKS_PER_SEC
13720 (CLOCKS_PER_SEC >= 100)
13721 ? (clock () / (CLOCKS_PER_SEC / 100))
13722 : ((clock () * 100) / CLOCKS_PER_SEC)
13724 /* Presume unix... clock() returns microseconds. */
13728 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13731 case AngelSVC_Reason_GetCmdLine:
13733 /* Get the pointer */
13734 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13735 ptr = aarch64_get_mem_u64 (cpu, ptr);
13737 /* FIXME: No command line for now. */
13738 aarch64_set_mem_u64 (cpu, ptr, 0);
13739 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13743 case AngelSVC_Reason_IsTTY:
13745 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13748 case AngelSVC_Reason_Write:
13750 /* Get the pointer */
13751 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13752 /* Get the write control block. */
13753 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13754 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13755 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13757 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13758 PRIx64 " on descriptor %" PRIx64,
13763 TRACE_SYSCALL (cpu,
13764 " AngelSVC: Write: Suspiciously long write: %ld",
13766 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13767 sim_stopped, SIM_SIGBUS);
13771 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13775 TRACE (cpu, 0, "\n");
13776 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13777 (int) len, aarch64_get_mem_ptr (cpu, buf));
13778 TRACE (cpu, 0, "\n");
13782 TRACE_SYSCALL (cpu,
13783 " AngelSVC: Write: Unexpected file handle: %d",
13785 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13786 sim_stopped, SIM_SIGABRT);
13791 case AngelSVC_Reason_ReportException:
13793 /* Get the pointer */
13794 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13795 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13796 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13797 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13799 TRACE_SYSCALL (cpu,
13800 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13803 if (type == 0x20026)
13804 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13805 sim_exited, state);
13807 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13808 sim_stopped, SIM_SIGINT);
13812 case AngelSVC_Reason_Read:
13813 case AngelSVC_Reason_FLen:
13814 case AngelSVC_Reason_Seek:
13815 case AngelSVC_Reason_Remove:
13816 case AngelSVC_Reason_Time:
13817 case AngelSVC_Reason_System:
13818 case AngelSVC_Reason_Rename:
13819 case AngelSVC_Reason_Elapsed:
13821 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13822 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13823 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13824 sim_stopped, SIM_SIGTRAP);
13827 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13831 dexExcpnGen (sim_cpu *cpu)
13833 /* instr[31:24] = 11010100
13834 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13835 010 ==> HLT, 101 ==> DBG GEN EXCPN
13836 instr[20,5] = imm16
13837 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13838 instr[1,0] = LL : discriminates opc */
13840 uint32_t opc = INSTR (23, 21);
13841 uint32_t imm16 = INSTR (20, 5);
13842 uint32_t opc2 = INSTR (4, 2);
13845 NYI_assert (31, 24, 0xd4);
13852 /* We only implement HLT and BRK for now. */
13853 if (opc == 1 && LL == 0)
13855 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13856 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13857 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13860 if (opc == 2 && LL == 0)
13861 handle_halt (cpu, imm16);
13863 else if (opc == 0 || opc == 5)
13870 /* Stub for accessing system registers. */
13873 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13874 unsigned crm, unsigned op2)
13876 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13877 /* DCZID_EL0 - the Data Cache Zero ID register.
13878 We do not support DC ZVA at the moment, so
13879 we return a value with the disable bit set.
13880 We implement support for the DCZID register since
13881 it is used by the C library's memset function. */
13882 return ((uint64_t) 1) << 4;
13884 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13885 /* Cache Type Register. */
13886 return 0x80008000UL;
13888 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13889 /* TPIDR_EL0 - thread pointer id. */
13890 return aarch64_get_thread_id (cpu);
13892 if (op1 == 3 && crm == 4 && op2 == 0)
13893 return aarch64_get_FPCR (cpu);
13895 if (op1 == 3 && crm == 4 && op2 == 1)
13896 return aarch64_get_FPSR (cpu);
13898 else if (op1 == 3 && crm == 2 && op2 == 0)
13899 return aarch64_get_CPSR (cpu);
13905 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13906 unsigned crm, unsigned op2, uint64_t val)
13908 if (op1 == 3 && crm == 4 && op2 == 0)
13909 aarch64_set_FPCR (cpu, val);
13911 else if (op1 == 3 && crm == 4 && op2 == 1)
13912 aarch64_set_FPSR (cpu, val);
13914 else if (op1 == 3 && crm == 2 && op2 == 0)
13915 aarch64_set_CPSR (cpu, val);
13922 do_mrs (sim_cpu *cpu)
13924 /* instr[31:20] = 1101 0101 0001 1
13931 unsigned sys_op0 = INSTR (19, 19) + 2;
13932 unsigned sys_op1 = INSTR (18, 16);
13933 unsigned sys_crn = INSTR (15, 12);
13934 unsigned sys_crm = INSTR (11, 8);
13935 unsigned sys_op2 = INSTR (7, 5);
13936 unsigned rt = INSTR (4, 0);
13938 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13939 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13940 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13944 do_MSR_immediate (sim_cpu *cpu)
13946 /* instr[31:19] = 1101 0101 0000 0
13948 instr[15,12] = 0100
13951 instr[4,0] = 1 1111 */
13953 unsigned op1 = INSTR (18, 16);
13954 /*unsigned crm = INSTR (11, 8);*/
13955 unsigned op2 = INSTR (7, 5);
13957 NYI_assert (31, 19, 0x1AA0);
13958 NYI_assert (15, 12, 0x4);
13959 NYI_assert (4, 0, 0x1F);
13964 HALT_NYI; /* set SPSel. */
13971 HALT_NYI; /* set DAIFset. */
13973 HALT_NYI; /* set DAIFclr. */
13982 do_MSR_reg (sim_cpu *cpu)
13984 /* instr[31:20] = 1101 0101 0001
13992 unsigned sys_op0 = INSTR (19, 19) + 2;
13993 unsigned sys_op1 = INSTR (18, 16);
13994 unsigned sys_crn = INSTR (15, 12);
13995 unsigned sys_crm = INSTR (11, 8);
13996 unsigned sys_op2 = INSTR (7, 5);
13997 unsigned rt = INSTR (4, 0);
13999 NYI_assert (31, 20, 0xD51);
14001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14002 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14003 aarch64_get_reg_u64 (cpu, rt, NO_SP));
14007 do_SYS (sim_cpu *cpu)
14009 /* instr[31,19] = 1101 0101 0000 1
14015 NYI_assert (31, 19, 0x1AA1);
14017 /* FIXME: For now we just silently accept system ops. */
14021 dexSystem (sim_cpu *cpu)
14023 /* instr[31:22] = 1101 01010 0
14030 instr[4,0] = uimm5 */
14032 /* We are interested in HINT, DSB, DMB and ISB
14034 Hint #0 encodes NOOP (this is the only hint we care about)
14035 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14036 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14038 DSB, DMB, ISB are data store barrier, data memory barrier and
14039 instruction store barrier, respectively, where
14041 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14042 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14043 CRm<3:2> ==> domain, CRm<1:0> ==> types,
14044 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14045 10 ==> InerShareable, 11 ==> FullSystem
14046 types : 01 ==> Reads, 10 ==> Writes,
14047 11 ==> All, 00 ==> All (domain == FullSystem). */
14049 unsigned rt = INSTR (4, 0);
14051 NYI_assert (31, 22, 0x354);
14053 switch (INSTR (21, 12))
14058 /* NOP has CRm != 0000 OR. */
14059 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
14060 uint32_t crm = INSTR (11, 8);
14061 uint32_t op2 = INSTR (7, 5);
14063 if (crm != 0 || (op2 == 0 || op2 > 5))
14065 /* Actually call nop method so we can reimplement it later. */
14074 uint32_t op2 = INSTR (7, 5);
14079 case 4: dsb (cpu); return;
14080 case 5: dmb (cpu); return;
14081 case 6: isb (cpu); return;
14082 default: HALT_UNALLOC;
14093 do_SYS (cpu); /* DC is an alias of SYS. */
14097 if (INSTR (21, 20) == 0x1)
14099 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14100 do_MSR_immediate (cpu);
14108 dexBr (sim_cpu *cpu)
14110 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14111 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14112 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14113 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14118 return dexBranchImmediate (cpu);
14120 case BR_IMMCMP_001:
14121 /* Compare has bit 25 clear while test has it set. */
14122 if (!INSTR (25, 25))
14123 dexCompareBranchImmediate (cpu);
14125 dexTestBranchImmediate (cpu);
14128 case BR_IMMCOND_010:
14129 /* This is a conditional branch if bit 25 is clear otherwise
14131 if (!INSTR (25, 25))
14132 dexCondBranchImmediate (cpu);
14137 case BR_UNALLOC_011:
14141 dexBranchImmediate (cpu);
14144 case BR_IMMCMP_101:
14145 /* Compare has bit 25 clear while test has it set. */
14146 if (!INSTR (25, 25))
14147 dexCompareBranchImmediate (cpu);
14149 dexTestBranchImmediate (cpu);
14153 /* Unconditional branch reg has bit 25 set. */
14154 if (INSTR (25, 25))
14155 dexBranchRegister (cpu);
14157 /* This includes both Excpn Gen, System and unalloc operations.
14158 We need to decode the Excpn Gen operation BRK so we can plant
14159 debugger entry points.
14160 Excpn Gen operations have instr [24] = 0.
14161 we need to decode at least one of the System operations NOP
14162 which is an alias for HINT #0.
14163 System operations have instr [24,22] = 100. */
14164 else if (INSTR (24, 24) == 0)
14167 else if (INSTR (24, 22) == 4)
14175 case BR_UNALLOC_111:
14179 /* Should never reach here. */
14185 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14187 /* We need to check if gdb wants an in here. */
14188 /* checkBreak (cpu);. */
14190 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14194 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14195 case GROUP_LDST_0100: dexLdSt (cpu); break;
14196 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14197 case GROUP_LDST_0110: dexLdSt (cpu); break;
14198 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14199 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14200 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14201 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14202 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14203 case GROUP_LDST_1100: dexLdSt (cpu); break;
14204 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14205 case GROUP_LDST_1110: dexLdSt (cpu); break;
14206 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14208 case GROUP_UNALLOC_0001:
14209 case GROUP_UNALLOC_0010:
14210 case GROUP_UNALLOC_0011:
14214 /* Should never reach here. */
14220 aarch64_step (sim_cpu *cpu)
14222 uint64_t pc = aarch64_get_PC (cpu);
14224 if (pc == TOP_LEVEL_RETURN_PC)
14227 aarch64_set_next_PC (cpu, pc + 4);
14229 /* Code is always little-endian. */
14230 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14231 & aarch64_get_instr (cpu), pc, 4);
14232 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14234 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14235 aarch64_get_instr (cpu));
14236 TRACE_DISASM (cpu, pc);
14238 aarch64_decode_and_execute (cpu, pc);
14244 aarch64_run (SIM_DESC sd)
14246 sim_cpu *cpu = STATE_CPU (sd, 0);
14248 while (aarch64_step (cpu))
14250 aarch64_update_PC (cpu);
14252 if (sim_events_tick (sd))
14253 sim_events_process (sd);
14256 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14257 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14261 aarch64_init (sim_cpu *cpu, uint64_t pc)
14263 uint64_t sp = aarch64_get_stack_start (cpu);
14265 /* Install SP, FP and PC and set LR to -20
14266 so we can detect a top-level return. */
14267 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14268 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14269 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14270 aarch64_set_next_PC (cpu, pc);
14271 aarch64_update_PC (cpu);
14272 aarch64_init_LIT_table ();