1 /* simulator.c -- Interface for the AArch64 simulator.
3 Copyright (C) 2015-2016 Free Software Foundation, Inc.
5 Contributed by Red Hat.
7 This file is part of GDB.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include <sys/types.h>
31 #include "simulator.h"
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
45 #define HALT_UNALLOC \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
68 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: "); \
69 trace_disasm (CPU_STATE (cpu), cpu, aarch64_get_PC (cpu)); \
71 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
72 sim_stopped, SIM_SIGABRT); \
76 #define NYI_assert(HI, LO, EXPECTED) \
79 if (INSTR ((HI), (LO)) != (EXPECTED)) \
84 /* Helper functions used by expandLogicalImmediate. */
86 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
87 static inline uint64_t
90 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
93 /* result<0> to val<N> */
94 static inline uint64_t
95 pickbit (uint64_t val, int N)
97 return pickbits64 (val, N, N);
101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
107 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
108 (in other words, right rotated by R), then replicated. */
112 mask = 0xffffffffffffffffull;
118 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
119 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
120 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
121 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
122 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
125 mask = (1ull << simd_size) - 1;
126 /* Top bits are IGNORED. */
130 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
131 if (S == simd_size - 1)
134 /* S+1 consecutive bits to 1. */
135 /* NOTE: S can't be 63 due to detection above. */
136 imm = (1ull << (S + 1)) - 1;
138 /* Rotate to the left by simd_size - R. */
140 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
142 /* Replicate the value according to SIMD size. */
145 case 2: imm = (imm << 2) | imm;
146 case 4: imm = (imm << 4) | imm;
147 case 8: imm = (imm << 8) | imm;
148 case 16: imm = (imm << 16) | imm;
149 case 32: imm = (imm << 32) | imm;
157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
158 for each possible combination i.e. 13 bits worth of int entries. */
159 #define LI_TABLE_SIZE (1 << 13)
160 static uint64_t LITable[LI_TABLE_SIZE];
163 aarch64_init_LIT_table (void)
167 for (index = 0; index < LI_TABLE_SIZE; index++)
169 uint32_t N = uimm (index, 12, 12);
170 uint32_t immr = uimm (index, 11, 6);
171 uint32_t imms = uimm (index, 5, 0);
173 LITable [index] = expand_logical_immediate (imms, immr, N);
178 dexNotify (sim_cpu *cpu)
180 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
181 2 ==> exit Java, 3 ==> start next bytecode. */
182 uint32_t type = INSTR (14, 0);
184 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
189 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
190 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
194 aarch64_get_reg_u64 (cpu, R22, 0)); */
197 /* aarch64_notifyMethodExit (); */
200 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
201 aarch64_get_reg_u64 (cpu, R22, 0)); */
206 /* secondary decode within top level groups */
209 dexPseudo (sim_cpu *cpu)
211 /* assert instr[28,27] = 00
213 We provide 2 pseudo instructions:
215 HALT stops execution of the simulator causing an immediate
216 return to the x86 code which entered it.
218 CALLOUT initiates recursive entry into x86 code. A register
219 argument holds the address of the x86 routine. Immediate
220 values in the instruction identify the number of general
221 purpose and floating point register arguments to be passed
222 and the type of any value to be returned. */
224 uint32_t PSEUDO_HALT = 0xE0000000U;
225 uint32_t PSEUDO_CALLOUT = 0x00018000U;
226 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
227 uint32_t PSEUDO_NOTIFY = 0x00014000U;
230 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
232 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
233 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
234 sim_stopped, SIM_SIGTRAP);
237 dispatch = INSTR (31, 15);
239 /* We do not handle callouts at the moment. */
240 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
242 TRACE_EVENTS (cpu, " Callout");
243 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
244 sim_stopped, SIM_SIGABRT);
247 else if (dispatch == PSEUDO_NOTIFY)
254 /* Load-store single register (unscaled offset)
255 These instructions employ a base register plus an unscaled signed
258 N.B. the base register (source) can be Xn or SP. all other
259 registers may not be SP. */
261 /* 32 bit load 32 bit unscaled signed 9 bit. */
263 ldur32 (sim_cpu *cpu, int32_t offset)
265 unsigned rn = INSTR (9, 5);
266 unsigned rt = INSTR (4, 0);
268 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
269 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
273 /* 64 bit load 64 bit unscaled signed 9 bit. */
275 ldur64 (sim_cpu *cpu, int32_t offset)
277 unsigned rn = INSTR (9, 5);
278 unsigned rt = INSTR (4, 0);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
287 ldurb32 (sim_cpu *cpu, int32_t offset)
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
292 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
293 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
297 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 ldursb32 (sim_cpu *cpu, int32_t offset)
301 unsigned rn = INSTR (9, 5);
302 unsigned rt = INSTR (4, 0);
304 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
305 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
309 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
311 ldursb64 (sim_cpu *cpu, int32_t offset)
313 unsigned rn = INSTR (9, 5);
314 unsigned rt = INSTR (4, 0);
316 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
317 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 /* 32 bit load zero-extended short unscaled signed 9 bit */
323 ldurh32 (sim_cpu *cpu, int32_t offset)
325 unsigned rn = INSTR (9, 5);
326 unsigned rd = INSTR (4, 0);
328 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
329 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
333 /* 32 bit load sign-extended short unscaled signed 9 bit */
335 ldursh32 (sim_cpu *cpu, int32_t offset)
337 unsigned rn = INSTR (9, 5);
338 unsigned rd = INSTR (4, 0);
340 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
341 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
345 /* 64 bit load sign-extended short unscaled signed 9 bit */
347 ldursh64 (sim_cpu *cpu, int32_t offset)
349 unsigned rn = INSTR (9, 5);
350 unsigned rt = INSTR (4, 0);
352 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
353 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
357 /* 64 bit load sign-extended word unscaled signed 9 bit */
359 ldursw (sim_cpu *cpu, int32_t offset)
361 unsigned rn = INSTR (9, 5);
362 unsigned rd = INSTR (4, 0);
364 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
365 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
369 /* N.B. with stores the value in source is written to the address
370 identified by source2 modified by offset. */
372 /* 32 bit store 32 bit unscaled signed 9 bit. */
374 stur32 (sim_cpu *cpu, int32_t offset)
376 unsigned rn = INSTR (9, 5);
377 unsigned rd = INSTR (4, 0);
379 aarch64_set_mem_u32 (cpu,
380 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
381 aarch64_get_reg_u32 (cpu, rd, NO_SP));
384 /* 64 bit store 64 bit unscaled signed 9 bit */
386 stur64 (sim_cpu *cpu, int32_t offset)
388 unsigned rn = INSTR (9, 5);
389 unsigned rd = INSTR (4, 0);
391 aarch64_set_mem_u64 (cpu,
392 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
393 aarch64_get_reg_u64 (cpu, rd, NO_SP));
396 /* 32 bit store byte unscaled signed 9 bit */
398 sturb (sim_cpu *cpu, int32_t offset)
400 unsigned rn = INSTR (9, 5);
401 unsigned rd = INSTR (4, 0);
403 aarch64_set_mem_u8 (cpu,
404 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
405 aarch64_get_reg_u8 (cpu, rd, NO_SP));
408 /* 32 bit store short unscaled signed 9 bit */
410 sturh (sim_cpu *cpu, int32_t offset)
412 unsigned rn = INSTR (9, 5);
413 unsigned rd = INSTR (4, 0);
415 aarch64_set_mem_u16 (cpu,
416 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
417 aarch64_get_reg_u16 (cpu, rd, NO_SP));
420 /* Load single register pc-relative label
421 Offset is a signed 19 bit immediate count in words
424 /* 32 bit pc-relative load */
426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
428 unsigned rd = INSTR (4, 0);
430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
432 (cpu, aarch64_get_PC (cpu) + offset * 4));
435 /* 64 bit pc-relative load */
437 ldr_pcrel (sim_cpu *cpu, int32_t offset)
439 unsigned rd = INSTR (4, 0);
441 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 (cpu, aarch64_get_PC (cpu) + offset * 4));
446 /* sign extended 32 bit pc-relative load */
448 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
450 unsigned rd = INSTR (4, 0);
452 aarch64_set_reg_u64 (cpu, rd, NO_SP,
454 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 /* float pc-relative load */
459 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
461 unsigned int rd = INSTR (4, 0);
463 aarch64_set_vec_u32 (cpu, rd, 0,
465 (cpu, aarch64_get_PC (cpu) + offset * 4));
468 /* double pc-relative load */
470 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
472 unsigned int st = INSTR (4, 0);
474 aarch64_set_vec_u64 (cpu, st, 0,
476 (cpu, aarch64_get_PC (cpu) + offset * 4));
479 /* long double pc-relative load. */
481 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
483 unsigned int st = INSTR (4, 0);
484 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
487 aarch64_get_mem_long_double (cpu, addr, & a);
488 aarch64_set_FP_long_double (cpu, st, a);
491 /* This can be used to scale an offset by applying
492 the requisite shift. the second argument is either
495 #define SCALE(_offset, _elementSize) \
496 ((_offset) << ScaleShift ## _elementSize)
498 /* This can be used to optionally scale a register derived offset
499 by applying the requisite shift as indicated by the Scaling
500 argument. The second argument is either Byte, Short, Word
501 or Long. The third argument is either Scaled or Unscaled.
502 N.B. when _Scaling is Scaled the shift gets ANDed with
503 all 1s while when it is Unscaled it gets ANDed with 0. */
505 #define OPT_SCALE(_offset, _elementType, _Scaling) \
506 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
508 /* This can be used to zero or sign extend a 32 bit register derived
509 value to a 64 bit value. the first argument must be the value as
510 a uint32_t and the second must be either UXTW or SXTW. The result
511 is returned as an int64_t. */
513 static inline int64_t
514 extend (uint32_t value, Extension extension)
522 /* A branchless variant of this ought to be possible. */
523 if (extension == UXTW || extension == NoExtension)
530 /* Scalar Floating Point
532 FP load/store single register (4 addressing modes)
534 N.B. the base register (source) can be the stack pointer.
535 The secondary source register (source2) can only be an Xn register. */
537 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
539 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
541 unsigned rn = INSTR (9, 5);
542 unsigned st = INSTR (4, 0);
543 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
548 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
552 if (wb != NoWriteBack)
553 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
556 /* Load 8 bit with unsigned 12 bit offset. */
558 fldrb_abs (sim_cpu *cpu, uint32_t offset)
560 unsigned rd = INSTR (4, 0);
561 unsigned rn = INSTR (9, 5);
562 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
564 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
567 /* Load 16 bit scaled unsigned 12 bit. */
569 fldrh_abs (sim_cpu *cpu, uint32_t offset)
571 unsigned rd = INSTR (4, 0);
572 unsigned rn = INSTR (9, 5);
573 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
575 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
578 /* Load 32 bit scaled unsigned 12 bit. */
580 fldrs_abs (sim_cpu *cpu, uint32_t offset)
582 unsigned rd = INSTR (4, 0);
583 unsigned rn = INSTR (9, 5);
584 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
586 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
589 /* Load 64 bit scaled unsigned 12 bit. */
591 fldrd_abs (sim_cpu *cpu, uint32_t offset)
593 unsigned rd = INSTR (4, 0);
594 unsigned rn = INSTR (9, 5);
595 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
597 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
600 /* Load 128 bit scaled unsigned 12 bit. */
602 fldrq_abs (sim_cpu *cpu, uint32_t offset)
604 unsigned rd = INSTR (4, 0);
605 unsigned rn = INSTR (9, 5);
606 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
608 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
609 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
612 /* Load 32 bit scaled or unscaled zero- or sign-extended
613 32-bit register offset. */
615 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
617 unsigned rm = INSTR (20, 16);
618 unsigned rn = INSTR (9, 5);
619 unsigned st = INSTR (4, 0);
620 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
621 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
622 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
624 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
625 (cpu, address + displacement));
628 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
630 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
632 unsigned rn = INSTR (9, 5);
633 unsigned st = INSTR (4, 0);
634 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
639 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
644 if (wb != NoWriteBack)
645 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
648 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
650 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
652 unsigned rm = INSTR (20, 16);
653 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
654 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
656 fldrd_wb (cpu, displacement, NoWriteBack);
659 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
661 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
664 unsigned rn = INSTR (9, 5);
665 unsigned st = INSTR (4, 0);
666 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
671 aarch64_get_mem_long_double (cpu, address, & a);
672 aarch64_set_FP_long_double (cpu, st, a);
677 if (wb != NoWriteBack)
678 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
681 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
683 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
685 unsigned rm = INSTR (20, 16);
686 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
687 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
689 fldrq_wb (cpu, displacement, NoWriteBack);
694 load-store single register
695 There are four addressing modes available here which all employ a
696 64 bit source (base) register.
698 N.B. the base register (source) can be the stack pointer.
699 The secondary source register (source2)can only be an Xn register.
701 Scaled, 12-bit, unsigned immediate offset, without pre- and
703 Unscaled, 9-bit, signed immediate offset with pre- or post-index
705 scaled or unscaled 64-bit register offset.
706 scaled or unscaled 32-bit extended register offset.
708 All offsets are assumed to be raw from the decode i.e. the
709 simulator is expected to adjust scaled offsets based on the
710 accessed data size with register or extended register offset
711 versions the same applies except that in the latter case the
712 operation may also require a sign extend.
714 A separate method is provided for each possible addressing mode. */
716 /* 32 bit load 32 bit scaled unsigned 12 bit */
718 ldr32_abs (sim_cpu *cpu, uint32_t offset)
720 unsigned rn = INSTR (9, 5);
721 unsigned rt = INSTR (4, 0);
723 /* The target register may not be SP but the source may be. */
724 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
725 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
726 + SCALE (offset, 32)));
729 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
731 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
733 unsigned rn = INSTR (9, 5);
734 unsigned rt = INSTR (4, 0);
737 if (rn == rt && wb != NoWriteBack)
740 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
745 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
750 if (wb != NoWriteBack)
751 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
754 /* 32 bit load 32 bit scaled or unscaled
755 zero- or sign-extended 32-bit register offset */
757 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
759 unsigned rm = INSTR (20, 16);
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 /* rn may reference SP, rm and rt must reference ZR */
764 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
765 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
766 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
768 aarch64_set_reg_u64 (cpu, rt, NO_SP,
769 aarch64_get_mem_u32 (cpu, address + displacement));
772 /* 64 bit load 64 bit scaled unsigned 12 bit */
774 ldr_abs (sim_cpu *cpu, uint32_t offset)
776 unsigned rn = INSTR (9, 5);
777 unsigned rt = INSTR (4, 0);
779 /* The target register may not be SP but the source may be. */
780 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
781 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
782 + SCALE (offset, 64)));
785 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
787 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
789 unsigned rn = INSTR (9, 5);
790 unsigned rt = INSTR (4, 0);
793 if (rn == rt && wb != NoWriteBack)
796 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
801 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
806 if (wb != NoWriteBack)
807 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
810 /* 64 bit load 64 bit scaled or unscaled zero-
811 or sign-extended 32-bit register offset. */
813 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
815 unsigned rm = INSTR (20, 16);
816 unsigned rn = INSTR (9, 5);
817 unsigned rt = INSTR (4, 0);
818 /* rn may reference SP, rm and rt must reference ZR */
820 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
821 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
822 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
824 aarch64_set_reg_u64 (cpu, rt, NO_SP,
825 aarch64_get_mem_u64 (cpu, address + displacement));
828 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
830 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
832 unsigned rn = INSTR (9, 5);
833 unsigned rt = INSTR (4, 0);
835 /* The target register may not be SP but the source may be
836 there is no scaling required for a byte load. */
837 aarch64_set_reg_u64 (cpu, rt, NO_SP,
839 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
842 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
844 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
846 unsigned rn = INSTR (9, 5);
847 unsigned rt = INSTR (4, 0);
850 if (rn == rt && wb != NoWriteBack)
853 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
858 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
863 if (wb != NoWriteBack)
864 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
867 /* 32 bit load zero-extended byte scaled or unscaled zero-
868 or sign-extended 32-bit register offset. */
870 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
872 unsigned rm = INSTR (20, 16);
873 unsigned rn = INSTR (9, 5);
874 unsigned rt = INSTR (4, 0);
875 /* rn may reference SP, rm and rt must reference ZR */
877 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
878 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
881 /* There is no scaling required for a byte load. */
882 aarch64_set_reg_u64 (cpu, rt, NO_SP,
883 aarch64_get_mem_u8 (cpu, address + displacement));
886 /* 64 bit load sign-extended byte unscaled signed 9 bit
887 with pre- or post-writeback. */
889 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
891 unsigned rn = INSTR (9, 5);
892 unsigned rt = INSTR (4, 0);
896 if (rn == rt && wb != NoWriteBack)
899 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
904 val = aarch64_get_mem_s8 (cpu, address);
905 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
910 if (wb != NoWriteBack)
911 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
914 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
916 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
918 ldrsb_wb (cpu, offset, NoWriteBack);
921 /* 64 bit load sign-extended byte scaled or unscaled zero-
922 or sign-extended 32-bit register offset. */
924 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
926 unsigned rm = INSTR (20, 16);
927 unsigned rn = INSTR (9, 5);
928 unsigned rt = INSTR (4, 0);
929 /* rn may reference SP, rm and rt must reference ZR */
931 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
932 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
934 /* There is no scaling required for a byte load. */
935 aarch64_set_reg_s64 (cpu, rt, NO_SP,
936 aarch64_get_mem_s8 (cpu, address + displacement));
939 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
941 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
943 unsigned rn = INSTR (9, 5);
944 unsigned rt = INSTR (4, 0);
947 /* The target register may not be SP but the source may be. */
948 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
949 + SCALE (offset, 16));
950 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
953 /* 32 bit load zero-extended short unscaled signed 9 bit
954 with pre- or post-writeback. */
956 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
958 unsigned rn = INSTR (9, 5);
959 unsigned rt = INSTR (4, 0);
962 if (rn == rt && wb != NoWriteBack)
965 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
970 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
975 if (wb != NoWriteBack)
976 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
979 /* 32 bit load zero-extended short scaled or unscaled zero-
980 or sign-extended 32-bit register offset. */
982 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
984 unsigned rm = INSTR (20, 16);
985 unsigned rn = INSTR (9, 5);
986 unsigned rt = INSTR (4, 0);
987 /* rn may reference SP, rm and rt must reference ZR */
989 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
990 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
991 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
993 aarch64_set_reg_u32 (cpu, rt, NO_SP,
994 aarch64_get_mem_u16 (cpu, address + displacement));
997 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
999 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1001 unsigned rn = INSTR (9, 5);
1002 unsigned rt = INSTR (4, 0);
1005 /* The target register may not be SP but the source may be. */
1006 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1007 + SCALE (offset, 16));
1008 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1011 /* 32 bit load sign-extended short unscaled signed 9 bit
1012 with pre- or post-writeback. */
1014 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1016 unsigned rn = INSTR (9, 5);
1017 unsigned rt = INSTR (4, 0);
1020 if (rn == rt && wb != NoWriteBack)
1023 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1028 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1029 (int32_t) aarch64_get_mem_s16 (cpu, address));
1034 if (wb != NoWriteBack)
1035 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1038 /* 32 bit load sign-extended short scaled or unscaled zero-
1039 or sign-extended 32-bit register offset. */
1041 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1043 unsigned rm = INSTR (20, 16);
1044 unsigned rn = INSTR (9, 5);
1045 unsigned rt = INSTR (4, 0);
1046 /* rn may reference SP, rm and rt must reference ZR */
1048 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1049 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1050 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1052 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1053 (int32_t) aarch64_get_mem_s16
1054 (cpu, address + displacement));
1057 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1059 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1061 unsigned rn = INSTR (9, 5);
1062 unsigned rt = INSTR (4, 0);
1065 /* The target register may not be SP but the source may be. */
1066 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1067 + SCALE (offset, 16));
1068 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1071 /* 64 bit load sign-extended short unscaled signed 9 bit
1072 with pre- or post-writeback. */
1074 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1076 unsigned rn = INSTR (9, 5);
1077 unsigned rt = INSTR (4, 0);
1081 if (rn == rt && wb != NoWriteBack)
1084 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1089 val = aarch64_get_mem_s16 (cpu, address);
1090 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1095 if (wb != NoWriteBack)
1096 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1099 /* 64 bit load sign-extended short scaled or unscaled zero-
1100 or sign-extended 32-bit register offset. */
1102 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1104 unsigned rm = INSTR (20, 16);
1105 unsigned rn = INSTR (9, 5);
1106 unsigned rt = INSTR (4, 0);
1108 /* rn may reference SP, rm and rt must reference ZR */
1110 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1111 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1112 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1115 val = aarch64_get_mem_s16 (cpu, address + displacement);
1116 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1119 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1121 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1123 unsigned rn = INSTR (9, 5);
1124 unsigned rt = INSTR (4, 0);
1127 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1128 + SCALE (offset, 32));
1129 /* The target register may not be SP but the source may be. */
1130 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1133 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1134 with pre- or post-writeback. */
1136 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1138 unsigned rn = INSTR (9, 5);
1139 unsigned rt = INSTR (4, 0);
1142 if (rn == rt && wb != NoWriteBack)
1145 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1150 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1155 if (wb != NoWriteBack)
1156 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1159 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1160 or sign-extended 32-bit register offset. */
1162 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1164 unsigned rm = INSTR (20, 16);
1165 unsigned rn = INSTR (9, 5);
1166 unsigned rt = INSTR (4, 0);
1167 /* rn may reference SP, rm and rt must reference ZR */
1169 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1170 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1171 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1173 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1174 aarch64_get_mem_s32 (cpu, address + displacement));
1177 /* N.B. with stores the value in source is written to the
1178 address identified by source2 modified by source3/offset. */
1180 /* 32 bit store scaled unsigned 12 bit. */
1182 str32_abs (sim_cpu *cpu, uint32_t offset)
1184 unsigned rn = INSTR (9, 5);
1185 unsigned rt = INSTR (4, 0);
1187 /* The target register may not be SP but the source may be. */
1188 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1189 + SCALE (offset, 32)),
1190 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1193 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1195 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1197 unsigned rn = INSTR (9, 5);
1198 unsigned rt = INSTR (4, 0);
1201 if (rn == rt && wb != NoWriteBack)
1204 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1208 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1213 if (wb != NoWriteBack)
1214 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1217 /* 32 bit store scaled or unscaled zero- or
1218 sign-extended 32-bit register offset. */
1220 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1222 unsigned rm = INSTR (20, 16);
1223 unsigned rn = INSTR (9, 5);
1224 unsigned rt = INSTR (4, 0);
1226 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1227 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1228 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1230 aarch64_set_mem_u32 (cpu, address + displacement,
1231 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1234 /* 64 bit store scaled unsigned 12 bit. */
1236 str_abs (sim_cpu *cpu, uint32_t offset)
1238 unsigned rn = INSTR (9, 5);
1239 unsigned rt = INSTR (4, 0);
1241 aarch64_set_mem_u64 (cpu,
1242 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1243 + SCALE (offset, 64),
1244 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1247 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1249 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1251 unsigned rn = INSTR (9, 5);
1252 unsigned rt = INSTR (4, 0);
1255 if (rn == rt && wb != NoWriteBack)
1258 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1263 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1268 if (wb != NoWriteBack)
1269 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1272 /* 64 bit store scaled or unscaled zero-
1273 or sign-extended 32-bit register offset. */
1275 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1277 unsigned rm = INSTR (20, 16);
1278 unsigned rn = INSTR (9, 5);
1279 unsigned rt = INSTR (4, 0);
1280 /* rn may reference SP, rm and rt must reference ZR */
1282 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1283 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1285 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1287 aarch64_set_mem_u64 (cpu, address + displacement,
1288 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1291 /* 32 bit store byte scaled unsigned 12 bit. */
1293 strb_abs (sim_cpu *cpu, uint32_t offset)
1295 unsigned rn = INSTR (9, 5);
1296 unsigned rt = INSTR (4, 0);
1298 /* The target register may not be SP but the source may be.
1299 There is no scaling required for a byte load. */
1300 aarch64_set_mem_u8 (cpu,
1301 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1302 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1305 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1307 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1309 unsigned rn = INSTR (9, 5);
1310 unsigned rt = INSTR (4, 0);
1313 if (rn == rt && wb != NoWriteBack)
1316 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1321 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1326 if (wb != NoWriteBack)
1327 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1330 /* 32 bit store byte scaled or unscaled zero-
1331 or sign-extended 32-bit register offset. */
1333 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1335 unsigned rm = INSTR (20, 16);
1336 unsigned rn = INSTR (9, 5);
1337 unsigned rt = INSTR (4, 0);
1338 /* rn may reference SP, rm and rt must reference ZR */
1340 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1341 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1344 /* There is no scaling required for a byte load. */
1345 aarch64_set_mem_u8 (cpu, address + displacement,
1346 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1349 /* 32 bit store short scaled unsigned 12 bit. */
1351 strh_abs (sim_cpu *cpu, uint32_t offset)
1353 unsigned rn = INSTR (9, 5);
1354 unsigned rt = INSTR (4, 0);
1356 /* The target register may not be SP but the source may be. */
1357 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1358 + SCALE (offset, 16),
1359 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1362 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1364 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1366 unsigned rn = INSTR (9, 5);
1367 unsigned rt = INSTR (4, 0);
1370 if (rn == rt && wb != NoWriteBack)
1373 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1378 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1387 /* 32 bit store short scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1390 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1399 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1401 aarch64_set_mem_u16 (cpu, address + displacement,
1402 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1405 /* Prefetch unsigned 12 bit. */
1407 prfm_abs (sim_cpu *cpu, uint32_t offset)
1409 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1410 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1411 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1412 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1413 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1414 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1416 PrfOp prfop = prfop (instr, 4, 0);
1417 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1418 + SCALE (offset, 64). */
1420 /* TODO : implement prefetch of address. */
1423 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1425 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1427 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1428 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1429 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1430 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1431 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1432 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1434 rn may reference SP, rm may only reference ZR
1435 PrfOp prfop = prfop (instr, 4, 0);
1436 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1439 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1440 uint64_t address = base + displacement. */
1442 /* TODO : implement prefetch of address */
1445 /* 64 bit pc-relative prefetch. */
1447 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1449 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1450 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1451 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1452 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1453 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1454 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1456 PrfOp prfop = prfop (instr, 4, 0);
1457 uint64_t address = aarch64_get_PC (cpu) + offset. */
1459 /* TODO : implement this */
1462 /* Load-store exclusive. */
1467 unsigned rn = INSTR (9, 5);
1468 unsigned rt = INSTR (4, 0);
1469 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1470 int size = INSTR (31, 30);
1471 /* int ordered = INSTR (15, 15); */
1472 /* int exclusive = ! INSTR (23, 23); */
1477 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1480 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1483 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1486 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1494 unsigned rn = INSTR (9, 5);
1495 unsigned rt = INSTR (4, 0);
1496 unsigned rs = INSTR (20, 16);
1497 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int size = INSTR (31, 30);
1499 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1503 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1504 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1505 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1506 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1509 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1513 dexLoadLiteral (sim_cpu *cpu)
1515 /* instr[29,27] == 011
1517 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1518 010 ==> LDRX, 011 ==> FLDRD
1519 100 ==> LDRSW, 101 ==> FLDRQ
1520 110 ==> PRFM, 111 ==> UNALLOC
1521 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1522 instr[23, 5] == simm19 */
1524 /* unsigned rt = INSTR (4, 0); */
1525 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1526 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1530 case 0: ldr32_pcrel (cpu, imm); break;
1531 case 1: fldrs_pcrel (cpu, imm); break;
1532 case 2: ldr_pcrel (cpu, imm); break;
1533 case 3: fldrd_pcrel (cpu, imm); break;
1534 case 4: ldrsw_pcrel (cpu, imm); break;
1535 case 5: fldrq_pcrel (cpu, imm); break;
1536 case 6: prfm_pcrel (cpu, imm); break;
1543 /* Immediate arithmetic
1544 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1545 value left shifted by 12 bits (done at decode).
1547 N.B. the register args (dest, source) can normally be Xn or SP.
1548 the exception occurs for flag setting instructions which may
1549 only use Xn for the output (dest). */
1551 /* 32 bit add immediate. */
1553 add32 (sim_cpu *cpu, uint32_t aimm)
1555 unsigned rn = INSTR (9, 5);
1556 unsigned rd = INSTR (4, 0);
1558 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1559 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1562 /* 64 bit add immediate. */
1564 add64 (sim_cpu *cpu, uint32_t aimm)
1566 unsigned rn = INSTR (9, 5);
1567 unsigned rd = INSTR (4, 0);
1569 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1570 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1574 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1576 int32_t result = value1 + value2;
1577 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1578 uint64_t uresult = (uint64_t)(uint32_t) value1
1579 + (uint64_t)(uint32_t) value2;
1585 if (result & (1 << 31))
1588 if (uresult != result)
1591 if (sresult != result)
1594 aarch64_set_CPSR (cpu, flags);
1598 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1600 int64_t sval1 = value1;
1601 int64_t sval2 = value2;
1602 uint64_t result = value1 + value2;
1603 int64_t sresult = sval1 + sval2;
1609 if (result & (1ULL << 63))
1616 /* Negative plus a negative. Overflow happens if
1617 the result is greater than either of the operands. */
1618 if (sresult > sval1 || sresult > sval2)
1621 /* else Negative plus a positive. Overflow cannot happen. */
1623 else /* value1 is +ve. */
1627 /* Overflow can only occur if we computed "0 - MININT". */
1628 if (sval1 == 0 && sval2 == (1LL << 63))
1633 /* Postive plus positive - overflow has happened if the
1634 result is smaller than either of the operands. */
1635 if (result < value1 || result < value2)
1640 aarch64_set_CPSR (cpu, flags);
1643 #define NEG(a) (((a) & signbit) == signbit)
1644 #define POS(a) (((a) & signbit) == 0)
1647 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1649 uint32_t result = value1 - value2;
1651 uint32_t signbit = 1U << 31;
1659 if ( (NEG (value1) && POS (value2))
1660 || (NEG (value1) && POS (result))
1661 || (POS (value2) && POS (result)))
1664 if ( (NEG (value1) && POS (value2) && POS (result))
1665 || (POS (value1) && NEG (value2) && NEG (result)))
1668 aarch64_set_CPSR (cpu, flags);
1672 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1674 uint64_t result = value1 - value2;
1676 uint64_t signbit = 1ULL << 63;
1684 if ( (NEG (value1) && POS (value2))
1685 || (NEG (value1) && POS (result))
1686 || (POS (value2) && POS (result)))
1689 if ( (NEG (value1) && POS (value2) && POS (result))
1690 || (POS (value1) && NEG (value2) && NEG (result)))
1693 aarch64_set_CPSR (cpu, flags);
1697 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1706 if (result & (1 << 31))
1711 aarch64_set_CPSR (cpu, flags);
1715 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1724 if (result & (1ULL << 63))
1729 aarch64_set_CPSR (cpu, flags);
1732 /* 32 bit add immediate set flags. */
1734 adds32 (sim_cpu *cpu, uint32_t aimm)
1736 unsigned rn = INSTR (9, 5);
1737 unsigned rd = INSTR (4, 0);
1738 /* TODO : do we need to worry about signs here? */
1739 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1741 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1742 set_flags_for_add32 (cpu, value1, aimm);
1745 /* 64 bit add immediate set flags. */
1747 adds64 (sim_cpu *cpu, uint32_t aimm)
1749 unsigned rn = INSTR (9, 5);
1750 unsigned rd = INSTR (4, 0);
1751 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1752 uint64_t value2 = aimm;
1754 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1755 set_flags_for_add64 (cpu, value1, value2);
1758 /* 32 bit sub immediate. */
1760 sub32 (sim_cpu *cpu, uint32_t aimm)
1762 unsigned rn = INSTR (9, 5);
1763 unsigned rd = INSTR (4, 0);
1765 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1766 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1769 /* 64 bit sub immediate. */
1771 sub64 (sim_cpu *cpu, uint32_t aimm)
1773 unsigned rn = INSTR (9, 5);
1774 unsigned rd = INSTR (4, 0);
1776 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1777 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1780 /* 32 bit sub immediate set flags. */
1782 subs32 (sim_cpu *cpu, uint32_t aimm)
1784 unsigned rn = INSTR (9, 5);
1785 unsigned rd = INSTR (4, 0);
1786 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1787 uint32_t value2 = aimm;
1789 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1790 set_flags_for_sub32 (cpu, value1, value2);
1793 /* 64 bit sub immediate set flags. */
1795 subs64 (sim_cpu *cpu, uint32_t aimm)
1797 unsigned rn = INSTR (9, 5);
1798 unsigned rd = INSTR (4, 0);
1799 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1800 uint32_t value2 = aimm;
1802 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1803 set_flags_for_sub64 (cpu, value1, value2);
1806 /* Data Processing Register. */
1808 /* First two helpers to perform the shift operations. */
1810 static inline uint32_t
1811 shifted32 (uint32_t value, Shift shift, uint32_t count)
1817 return (value << count);
1819 return (value >> count);
1822 int32_t svalue = value;
1823 return (svalue >> count);
1827 uint32_t top = value >> count;
1828 uint32_t bottom = value << (32 - count);
1829 return (bottom | top);
1834 static inline uint64_t
1835 shifted64 (uint64_t value, Shift shift, uint32_t count)
1841 return (value << count);
1843 return (value >> count);
1846 int64_t svalue = value;
1847 return (svalue >> count);
1851 uint64_t top = value >> count;
1852 uint64_t bottom = value << (64 - count);
1853 return (bottom | top);
1858 /* Arithmetic shifted register.
1859 These allow an optional LSL, ASR or LSR to the second source
1860 register with a count up to the register bit count.
1862 N.B register args may not be SP. */
1864 /* 32 bit ADD shifted register. */
1866 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1868 unsigned rm = INSTR (20, 16);
1869 unsigned rn = INSTR (9, 5);
1870 unsigned rd = INSTR (4, 0);
1872 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1873 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1874 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1878 /* 64 bit ADD shifted register. */
1880 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1882 unsigned rm = INSTR (20, 16);
1883 unsigned rn = INSTR (9, 5);
1884 unsigned rd = INSTR (4, 0);
1886 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1887 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1888 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1892 /* 32 bit ADD shifted register setting flags. */
1894 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1896 unsigned rm = INSTR (20, 16);
1897 unsigned rn = INSTR (9, 5);
1898 unsigned rd = INSTR (4, 0);
1900 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1901 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1904 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1905 set_flags_for_add32 (cpu, value1, value2);
1908 /* 64 bit ADD shifted register setting flags. */
1910 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1912 unsigned rm = INSTR (20, 16);
1913 unsigned rn = INSTR (9, 5);
1914 unsigned rd = INSTR (4, 0);
1916 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1917 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1920 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1921 set_flags_for_add64 (cpu, value1, value2);
1924 /* 32 bit SUB shifted register. */
1926 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1928 unsigned rm = INSTR (20, 16);
1929 unsigned rn = INSTR (9, 5);
1930 unsigned rd = INSTR (4, 0);
1932 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1933 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1934 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1938 /* 64 bit SUB shifted register. */
1940 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1942 unsigned rm = INSTR (20, 16);
1943 unsigned rn = INSTR (9, 5);
1944 unsigned rd = INSTR (4, 0);
1946 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1947 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1948 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1952 /* 32 bit SUB shifted register setting flags. */
1954 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1956 unsigned rm = INSTR (20, 16);
1957 unsigned rn = INSTR (9, 5);
1958 unsigned rd = INSTR (4, 0);
1960 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1961 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1964 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1965 set_flags_for_sub32 (cpu, value1, value2);
1968 /* 64 bit SUB shifted register setting flags. */
1970 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1972 unsigned rm = INSTR (20, 16);
1973 unsigned rn = INSTR (9, 5);
1974 unsigned rd = INSTR (4, 0);
1976 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1977 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1980 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1981 set_flags_for_sub64 (cpu, value1, value2);
1984 /* First a couple more helpers to fetch the
1985 relevant source register element either
1986 sign or zero extended as required by the
1990 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
1994 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
1995 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
1996 case UXTW: /* Fall through. */
1997 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
1998 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
1999 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2000 case SXTW: /* Fall through. */
2001 case SXTX: /* Fall through. */
2002 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2007 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2011 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2012 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2013 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2014 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2015 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2016 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2017 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2019 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2023 /* Arithmetic extending register
2024 These allow an optional sign extension of some portion of the
2025 second source register followed by an optional left shift of
2026 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2028 N.B output (dest) and first input arg (source) may normally be Xn
2029 or SP. However, for flag setting operations dest can only be
2030 Xn. Second input registers are always Xn. */
2032 /* 32 bit ADD extending register. */
2034 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2036 unsigned rm = INSTR (20, 16);
2037 unsigned rn = INSTR (9, 5);
2038 unsigned rd = INSTR (4, 0);
2040 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2041 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2042 + (extreg32 (cpu, rm, extension) << shift));
2045 /* 64 bit ADD extending register.
2046 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2048 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2050 unsigned rm = INSTR (20, 16);
2051 unsigned rn = INSTR (9, 5);
2052 unsigned rd = INSTR (4, 0);
2054 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2055 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2056 + (extreg64 (cpu, rm, extension) << shift));
2059 /* 32 bit ADD extending register setting flags. */
2061 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2063 unsigned rm = INSTR (20, 16);
2064 unsigned rn = INSTR (9, 5);
2065 unsigned rd = INSTR (4, 0);
2067 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2068 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2070 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2071 set_flags_for_add32 (cpu, value1, value2);
2074 /* 64 bit ADD extending register setting flags */
2075 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2077 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2079 unsigned rm = INSTR (20, 16);
2080 unsigned rn = INSTR (9, 5);
2081 unsigned rd = INSTR (4, 0);
2083 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2084 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2086 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2087 set_flags_for_add64 (cpu, value1, value2);
2090 /* 32 bit SUB extending register. */
2092 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2098 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2099 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2100 - (extreg32 (cpu, rm, extension) << shift));
2103 /* 64 bit SUB extending register. */
2104 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2106 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 unsigned rm = INSTR (20, 16);
2109 unsigned rn = INSTR (9, 5);
2110 unsigned rd = INSTR (4, 0);
2112 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2113 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2114 - (extreg64 (cpu, rm, extension) << shift));
2117 /* 32 bit SUB extending register setting flags. */
2119 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2121 unsigned rm = INSTR (20, 16);
2122 unsigned rn = INSTR (9, 5);
2123 unsigned rd = INSTR (4, 0);
2125 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2126 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2128 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2129 set_flags_for_sub32 (cpu, value1, value2);
2132 /* 64 bit SUB extending register setting flags */
2133 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2135 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2137 unsigned rm = INSTR (20, 16);
2138 unsigned rn = INSTR (9, 5);
2139 unsigned rd = INSTR (4, 0);
2141 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2142 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2144 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2145 set_flags_for_sub64 (cpu, value1, value2);
2149 dexAddSubtractImmediate (sim_cpu *cpu)
2151 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2152 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2153 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2154 instr[28,24] = 10001
2155 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2156 instr[21,10] = uimm12
2160 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2161 uint32_t shift = INSTR (23, 22);
2162 uint32_t imm = INSTR (21, 10);
2163 uint32_t dispatch = INSTR (31, 29);
2165 NYI_assert (28, 24, 0x11);
2175 case 0: add32 (cpu, imm); break;
2176 case 1: adds32 (cpu, imm); break;
2177 case 2: sub32 (cpu, imm); break;
2178 case 3: subs32 (cpu, imm); break;
2179 case 4: add64 (cpu, imm); break;
2180 case 5: adds64 (cpu, imm); break;
2181 case 6: sub64 (cpu, imm); break;
2182 case 7: subs64 (cpu, imm); break;
2187 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2189 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2190 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2191 instr[28,24] = 01011
2192 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2195 instr[15,10] = count : must be 0xxxxx for 32 bit
2199 uint32_t size = INSTR (31, 31);
2200 uint32_t count = INSTR (15, 10);
2201 Shift shiftType = INSTR (23, 22);
2203 NYI_assert (28, 24, 0x0B);
2204 NYI_assert (21, 21, 0);
2206 /* Shift encoded as ROR is unallocated. */
2207 if (shiftType == ROR)
2210 /* 32 bit operations must have count[5] = 0
2211 or else we have an UNALLOC. */
2212 if (size == 0 && uimm (count, 5, 5))
2215 /* Dispatch on size:op i.e instr [31,29]. */
2216 switch (INSTR (31, 29))
2218 case 0: add32_shift (cpu, shiftType, count); break;
2219 case 1: adds32_shift (cpu, shiftType, count); break;
2220 case 2: sub32_shift (cpu, shiftType, count); break;
2221 case 3: subs32_shift (cpu, shiftType, count); break;
2222 case 4: add64_shift (cpu, shiftType, count); break;
2223 case 5: adds64_shift (cpu, shiftType, count); break;
2224 case 6: sub64_shift (cpu, shiftType, count); break;
2225 case 7: subs64_shift (cpu, shiftType, count); break;
2230 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2232 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2233 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2234 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2235 instr[28,24] = 01011
2236 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2239 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2240 000 ==> LSL|UXTW, 001 ==> UXTZ,
2241 000 ==> SXTB, 001 ==> SXTH,
2242 000 ==> SXTW, 001 ==> SXTX,
2243 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2247 Extension extensionType = INSTR (15, 13);
2248 uint32_t shift = INSTR (12, 10);
2250 NYI_assert (28, 24, 0x0B);
2251 NYI_assert (21, 21, 1);
2253 /* Shift may not exceed 4. */
2257 /* Dispatch on size:op:set?. */
2258 switch (INSTR (31, 29))
2260 case 0: add32_ext (cpu, extensionType, shift); break;
2261 case 1: adds32_ext (cpu, extensionType, shift); break;
2262 case 2: sub32_ext (cpu, extensionType, shift); break;
2263 case 3: subs32_ext (cpu, extensionType, shift); break;
2264 case 4: add64_ext (cpu, extensionType, shift); break;
2265 case 5: adds64_ext (cpu, extensionType, shift); break;
2266 case 6: sub64_ext (cpu, extensionType, shift); break;
2267 case 7: subs64_ext (cpu, extensionType, shift); break;
2271 /* Conditional data processing
2272 Condition register is implicit 3rd source. */
2274 /* 32 bit add with carry. */
2275 /* N.B register args may not be SP. */
2278 adc32 (sim_cpu *cpu)
2280 unsigned rm = INSTR (20, 16);
2281 unsigned rn = INSTR (9, 5);
2282 unsigned rd = INSTR (4, 0);
2284 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2285 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2286 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2290 /* 64 bit add with carry */
2292 adc64 (sim_cpu *cpu)
2294 unsigned rm = INSTR (20, 16);
2295 unsigned rn = INSTR (9, 5);
2296 unsigned rd = INSTR (4, 0);
2298 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2299 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2300 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2304 /* 32 bit add with carry setting flags. */
2306 adcs32 (sim_cpu *cpu)
2308 unsigned rm = INSTR (20, 16);
2309 unsigned rn = INSTR (9, 5);
2310 unsigned rd = INSTR (4, 0);
2312 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2313 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2314 uint32_t carry = IS_SET (C);
2316 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2317 set_flags_for_add32 (cpu, value1, value2 + carry);
2320 /* 64 bit add with carry setting flags. */
2322 adcs64 (sim_cpu *cpu)
2324 unsigned rm = INSTR (20, 16);
2325 unsigned rn = INSTR (9, 5);
2326 unsigned rd = INSTR (4, 0);
2328 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2329 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2330 uint64_t carry = IS_SET (C);
2332 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2333 set_flags_for_add64 (cpu, value1, value2 + carry);
2336 /* 32 bit sub with carry. */
2338 sbc32 (sim_cpu *cpu)
2340 unsigned rm = INSTR (20, 16);
2341 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2342 unsigned rd = INSTR (4, 0);
2344 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2345 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2346 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2350 /* 64 bit sub with carry */
2352 sbc64 (sim_cpu *cpu)
2354 unsigned rm = INSTR (20, 16);
2355 unsigned rn = INSTR (9, 5);
2356 unsigned rd = INSTR (4, 0);
2358 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2359 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2360 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2364 /* 32 bit sub with carry setting flags */
2366 sbcs32 (sim_cpu *cpu)
2368 unsigned rm = INSTR (20, 16);
2369 unsigned rn = INSTR (9, 5);
2370 unsigned rd = INSTR (4, 0);
2372 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2373 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2374 uint32_t carry = IS_SET (C);
2375 uint32_t result = value1 - value2 + 1 - carry;
2377 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2378 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2381 /* 64 bit sub with carry setting flags */
2383 sbcs64 (sim_cpu *cpu)
2385 unsigned rm = INSTR (20, 16);
2386 unsigned rn = INSTR (9, 5);
2387 unsigned rd = INSTR (4, 0);
2389 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2390 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2391 uint64_t carry = IS_SET (C);
2392 uint64_t result = value1 - value2 + 1 - carry;
2394 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2395 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2399 dexAddSubtractWithCarry (sim_cpu *cpu)
2401 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2402 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2403 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2404 instr[28,21] = 1 1010 000
2406 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2410 uint32_t op2 = INSTR (15, 10);
2412 NYI_assert (28, 21, 0xD0);
2417 /* Dispatch on size:op:set?. */
2418 switch (INSTR (31, 29))
2420 case 0: adc32 (cpu); break;
2421 case 1: adcs32 (cpu); break;
2422 case 2: sbc32 (cpu); break;
2423 case 3: sbcs32 (cpu); break;
2424 case 4: adc64 (cpu); break;
2425 case 5: adcs64 (cpu); break;
2426 case 6: sbc64 (cpu); break;
2427 case 7: sbcs64 (cpu); break;
2432 testConditionCode (sim_cpu *cpu, CondCode cc)
2434 /* This should be reduceable to branchless logic
2435 by some careful testing of bits in CC followed
2436 by the requisite masking and combining of bits
2437 from the flag register.
2439 For now we do it with a switch. */
2444 case EQ: res = IS_SET (Z); break;
2445 case NE: res = IS_CLEAR (Z); break;
2446 case CS: res = IS_SET (C); break;
2447 case CC: res = IS_CLEAR (C); break;
2448 case MI: res = IS_SET (N); break;
2449 case PL: res = IS_CLEAR (N); break;
2450 case VS: res = IS_SET (V); break;
2451 case VC: res = IS_CLEAR (V); break;
2452 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2453 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2454 case GE: res = IS_SET (N) == IS_SET (V); break;
2455 case LT: res = IS_SET (N) != IS_SET (V); break;
2456 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2457 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2468 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2470 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2471 instr[30] = compare with positive (1) or negative value (0)
2472 instr[29,21] = 1 1101 0010
2473 instr[20,16] = Rm or const
2475 instr[11] = compare reg (0) or const (1)
2479 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2484 NYI_assert (29, 21, 0x1d2);
2485 NYI_assert (10, 10, 0);
2486 NYI_assert (4, 4, 0);
2488 if (! testConditionCode (cpu, INSTR (15, 12)))
2490 aarch64_set_CPSR (cpu, INSTR (3, 0));
2494 negate = INSTR (30, 30) ? 1 : -1;
2495 rm = INSTR (20, 16);
2501 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2502 negate * (uint64_t) rm);
2504 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2505 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2510 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2513 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2514 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2519 do_vec_MOV_whole_vector (sim_cpu *cpu)
2521 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2524 instr[30] = half(0)/full(1)
2525 instr[29,21] = 001110101
2527 instr[15,10] = 000111
2531 unsigned vs = INSTR (9, 5);
2532 unsigned vd = INSTR (4, 0);
2534 NYI_assert (29, 21, 0x075);
2535 NYI_assert (15, 10, 0x07);
2537 if (INSTR (20, 16) != vs)
2541 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2543 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2547 do_vec_MOV_into_scalar (sim_cpu *cpu)
2550 instr[30] = word(0)/long(1)
2551 instr[29,21] = 00 1110 000
2552 instr[20,18] = element size and index
2553 instr[17,10] = 00 0011 11
2554 instr[9,5] = V source
2555 instr[4,0] = R dest */
2557 unsigned vs = INSTR (9, 5);
2558 unsigned rd = INSTR (4, 0);
2560 NYI_assert (29, 21, 0x070);
2561 NYI_assert (17, 10, 0x0F);
2563 switch (INSTR (20, 18))
2566 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2570 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2577 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2578 (cpu, vs, INSTR (20, 19)));
2587 do_vec_INS (sim_cpu *cpu)
2589 /* instr[31,21] = 01001110000
2590 instr[20,16] = element size and index
2591 instr[15,10] = 000111
2592 instr[9,5] = W source
2593 instr[4,0] = V dest */
2596 unsigned rs = INSTR (9, 5);
2597 unsigned vd = INSTR (4, 0);
2599 NYI_assert (31, 21, 0x270);
2600 NYI_assert (15, 10, 0x07);
2604 index = INSTR (20, 17);
2605 aarch64_set_vec_u8 (cpu, vd, index,
2606 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2608 else if (INSTR (17, 17))
2610 index = INSTR (20, 18);
2611 aarch64_set_vec_u16 (cpu, vd, index,
2612 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2614 else if (INSTR (18, 18))
2616 index = INSTR (20, 19);
2617 aarch64_set_vec_u32 (cpu, vd, index,
2618 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2620 else if (INSTR (19, 19))
2622 index = INSTR (20, 20);
2623 aarch64_set_vec_u64 (cpu, vd, index,
2624 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2631 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2634 instr[30] = half(0)/full(1)
2635 instr[29,21] = 00 1110 000
2636 instr[20,16] = element size and index
2637 instr[15,10] = 0000 01
2638 instr[9,5] = V source
2639 instr[4,0] = V dest. */
2641 unsigned full = INSTR (30, 30);
2642 unsigned vs = INSTR (9, 5);
2643 unsigned vd = INSTR (4, 0);
2646 NYI_assert (29, 21, 0x070);
2647 NYI_assert (15, 10, 0x01);
2651 index = INSTR (20, 17);
2653 for (i = 0; i < (full ? 16 : 8); i++)
2654 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2656 else if (INSTR (17, 17))
2658 index = INSTR (20, 18);
2660 for (i = 0; i < (full ? 8 : 4); i++)
2661 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2663 else if (INSTR (18, 18))
2665 index = INSTR (20, 19);
2667 for (i = 0; i < (full ? 4 : 2); i++)
2668 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2672 if (INSTR (19, 19) == 0)
2678 index = INSTR (20, 20);
2680 for (i = 0; i < 2; i++)
2681 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2686 do_vec_TBL (sim_cpu *cpu)
2689 instr[30] = half(0)/full(1)
2690 instr[29,21] = 00 1110 000
2693 instr[14,13] = vec length
2695 instr[9,5] = V start
2696 instr[4,0] = V dest */
2698 int full = INSTR (30, 30);
2699 int len = INSTR (14, 13) + 1;
2700 unsigned vm = INSTR (20, 16);
2701 unsigned vn = INSTR (9, 5);
2702 unsigned vd = INSTR (4, 0);
2705 NYI_assert (29, 21, 0x070);
2706 NYI_assert (12, 10, 0);
2708 for (i = 0; i < (full ? 16 : 8); i++)
2710 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2714 val = aarch64_get_vec_u8 (cpu, vn, selector);
2715 else if (selector < 32)
2716 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2717 else if (selector < 48)
2718 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2719 else if (selector < 64)
2720 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2724 aarch64_set_vec_u8 (cpu, vd, i, val);
2729 do_vec_TRN (sim_cpu *cpu)
2732 instr[30] = half(0)/full(1)
2733 instr[29,24] = 00 1110
2738 instr[14] = TRN1 (0) / TRN2 (1)
2740 instr[9,5] = V source
2741 instr[4,0] = V dest. */
2743 int full = INSTR (30, 30);
2744 int second = INSTR (14, 14);
2745 unsigned vm = INSTR (20, 16);
2746 unsigned vn = INSTR (9, 5);
2747 unsigned vd = INSTR (4, 0);
2750 NYI_assert (29, 24, 0x0E);
2751 NYI_assert (13, 10, 0xA);
2753 switch (INSTR (23, 22))
2756 for (i = 0; i < (full ? 8 : 4); i++)
2760 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2762 (cpu, vd, 1 * 2 + 1,
2763 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2768 for (i = 0; i < (full ? 4 : 2); i++)
2772 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2774 (cpu, vd, 1 * 2 + 1,
2775 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2781 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2783 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2785 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2787 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2794 aarch64_set_vec_u64 (cpu, vd, 0,
2795 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2796 aarch64_set_vec_u64 (cpu, vd, 1,
2797 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2803 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2806 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2807 [must be 1 for 64-bit xfer]
2808 instr[29,20] = 00 1110 0000
2809 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2810 0100=> 32-bits. 1000=>64-bits
2811 instr[15,10] = 0000 11
2812 instr[9,5] = W source
2813 instr[4,0] = V dest. */
2816 unsigned Vd = INSTR (4, 0);
2817 unsigned Rs = INSTR (9, 5);
2818 int both = INSTR (30, 30);
2820 NYI_assert (29, 20, 0x0E0);
2821 NYI_assert (15, 10, 0x03);
2823 switch (INSTR (19, 16))
2826 for (i = 0; i < (both ? 16 : 8); i++)
2827 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2831 for (i = 0; i < (both ? 8 : 4); i++)
2832 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2836 for (i = 0; i < (both ? 4 : 2); i++)
2837 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2843 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2844 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2853 do_vec_UZP (sim_cpu *cpu)
2856 instr[30] = half(0)/full(1)
2857 instr[29,24] = 00 1110
2858 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2862 instr[14] = lower (0) / upper (1)
2867 int full = INSTR (30, 30);
2868 int upper = INSTR (14, 14);
2870 unsigned vm = INSTR (20, 16);
2871 unsigned vn = INSTR (9, 5);
2872 unsigned vd = INSTR (4, 0);
2874 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2875 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2876 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2877 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2882 uint64_t input1 = upper ? val_n1 : val_m1;
2883 uint64_t input2 = upper ? val_n2 : val_m2;
2886 NYI_assert (29, 24, 0x0E);
2887 NYI_assert (21, 21, 0);
2888 NYI_assert (15, 15, 0);
2889 NYI_assert (13, 10, 6);
2891 switch (INSTR (23, 23))
2894 for (i = 0; i < 8; i++)
2896 val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
2897 val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
2902 for (i = 0; i < 4; i++)
2904 val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
2905 val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
2910 val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
2911 val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
2919 aarch64_set_vec_u64 (cpu, vd, 0, val1);
2921 aarch64_set_vec_u64 (cpu, vd, 1, val2);
2925 do_vec_ZIP (sim_cpu *cpu)
2928 instr[30] = half(0)/full(1)
2929 instr[29,24] = 00 1110
2930 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
2934 instr[14] = lower (0) / upper (1)
2939 int full = INSTR (30, 30);
2940 int upper = INSTR (14, 14);
2942 unsigned vm = INSTR (20, 16);
2943 unsigned vn = INSTR (9, 5);
2944 unsigned vd = INSTR (4, 0);
2946 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2947 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2948 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2949 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2954 uint64_t input1 = upper ? val_n1 : val_m1;
2955 uint64_t input2 = upper ? val_n2 : val_m2;
2957 NYI_assert (29, 24, 0x0E);
2958 NYI_assert (21, 21, 0);
2959 NYI_assert (15, 15, 0);
2960 NYI_assert (13, 10, 0xE);
2962 switch (INSTR (23, 23))
2966 ((input1 << 0) & (0xFF << 0))
2967 | ((input2 << 8) & (0xFF << 8))
2968 | ((input1 << 8) & (0xFF << 16))
2969 | ((input2 << 16) & (0xFF << 24))
2970 | ((input1 << 16) & (0xFFULL << 32))
2971 | ((input2 << 24) & (0xFFULL << 40))
2972 | ((input1 << 24) & (0xFFULL << 48))
2973 | ((input2 << 32) & (0xFFULL << 56));
2976 ((input1 >> 32) & (0xFF << 0))
2977 | ((input2 >> 24) & (0xFF << 8))
2978 | ((input1 >> 24) & (0xFF << 16))
2979 | ((input2 >> 16) & (0xFF << 24))
2980 | ((input1 >> 16) & (0xFFULL << 32))
2981 | ((input2 >> 8) & (0xFFULL << 40))
2982 | ((input1 >> 8) & (0xFFULL << 48))
2983 | ((input2 >> 0) & (0xFFULL << 56));
2988 ((input1 << 0) & (0xFFFF << 0))
2989 | ((input2 << 16) & (0xFFFF << 16))
2990 | ((input1 << 16) & (0xFFFFULL << 32))
2991 | ((input2 << 32) & (0xFFFFULL << 48));
2994 ((input1 >> 32) & (0xFFFF << 0))
2995 | ((input2 >> 16) & (0xFFFF << 16))
2996 | ((input1 >> 16) & (0xFFFFULL << 32))
2997 | ((input2 >> 0) & (0xFFFFULL << 48));
3001 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3002 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3011 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3013 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3016 /* Floating point immediates are encoded in 8 bits.
3017 fpimm[7] = sign bit.
3018 fpimm[6:4] = signed exponent.
3019 fpimm[3:0] = fraction (assuming leading 1).
3020 i.e. F = s * 1.f * 2^(e - b). */
3023 fp_immediate_for_encoding_32 (uint32_t imm8)
3026 uint32_t s, e, f, i;
3028 s = (imm8 >> 7) & 0x1;
3029 e = (imm8 >> 4) & 0x7;
3032 /* The fp value is s * n/16 * 2r where n is 16+e. */
3033 u = (16.0 + f) / 16.0;
3035 /* N.B. exponent is signed. */
3040 for (i = 0; i <= epos; i++)
3047 for (i = 0; i < eneg; i++)
3058 fp_immediate_for_encoding_64 (uint32_t imm8)
3061 uint32_t s, e, f, i;
3063 s = (imm8 >> 7) & 0x1;
3064 e = (imm8 >> 4) & 0x7;
3067 /* The fp value is s * n/16 * 2r where n is 16+e. */
3068 u = (16.0 + f) / 16.0;
3070 /* N.B. exponent is signed. */
3075 for (i = 0; i <= epos; i++)
3082 for (i = 0; i < eneg; i++)
3093 do_vec_MOV_immediate (sim_cpu *cpu)
3096 instr[30] = full/half selector
3097 instr[29,19] = 00111100000
3098 instr[18,16] = high 3 bits of uimm8
3099 instr[15,12] = size & shift:
3101 0010 => 32-bit + LSL#8
3102 0100 => 32-bit + LSL#16
3103 0110 => 32-bit + LSL#24
3104 1010 => 16-bit + LSL#8
3106 1101 => 32-bit + MSL#16
3107 1100 => 32-bit + MSL#8
3111 instr[9,5] = low 5-bits of uimm8
3114 int full = INSTR (30, 30);
3115 unsigned vd = INSTR (4, 0);
3116 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3119 NYI_assert (29, 19, 0x1E0);
3120 NYI_assert (11, 10, 1);
3122 switch (INSTR (15, 12))
3124 case 0x0: /* 32-bit, no shift. */
3125 case 0x2: /* 32-bit, shift by 8. */
3126 case 0x4: /* 32-bit, shift by 16. */
3127 case 0x6: /* 32-bit, shift by 24. */
3128 val <<= (8 * INSTR (14, 13));
3129 for (i = 0; i < (full ? 4 : 2); i++)
3130 aarch64_set_vec_u32 (cpu, vd, i, val);
3133 case 0xa: /* 16-bit, shift by 8. */
3136 case 0x8: /* 16-bit, no shift. */
3137 for (i = 0; i < (full ? 8 : 4); i++)
3138 aarch64_set_vec_u16 (cpu, vd, i, val);
3140 case 0xd: /* 32-bit, mask shift by 16. */
3144 case 0xc: /* 32-bit, mask shift by 8. */
3147 for (i = 0; i < (full ? 4 : 2); i++)
3148 aarch64_set_vec_u32 (cpu, vd, i, val);
3151 case 0xe: /* 8-bit, no shift. */
3152 for (i = 0; i < (full ? 16 : 8); i++)
3153 aarch64_set_vec_u8 (cpu, vd, i, val);
3156 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3158 float u = fp_immediate_for_encoding_32 (val);
3159 for (i = 0; i < (full ? 4 : 2); i++)
3160 aarch64_set_vec_float (cpu, vd, i, u);
3170 do_vec_MVNI (sim_cpu *cpu)
3173 instr[30] = full/half selector
3174 instr[29,19] = 10111100000
3175 instr[18,16] = high 3 bits of uimm8
3176 instr[15,12] = selector
3178 instr[9,5] = low 5-bits of uimm8
3181 int full = INSTR (30, 30);
3182 unsigned vd = INSTR (4, 0);
3183 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3186 NYI_assert (29, 19, 0x5E0);
3187 NYI_assert (11, 10, 1);
3189 switch (INSTR (15, 12))
3191 case 0x0: /* 32-bit, no shift. */
3192 case 0x2: /* 32-bit, shift by 8. */
3193 case 0x4: /* 32-bit, shift by 16. */
3194 case 0x6: /* 32-bit, shift by 24. */
3195 val <<= (8 * INSTR (14, 13));
3197 for (i = 0; i < (full ? 4 : 2); i++)
3198 aarch64_set_vec_u32 (cpu, vd, i, val);
3201 case 0xa: /* 16-bit, 8 bit shift. */
3203 case 0x8: /* 16-bit, no shift. */
3205 for (i = 0; i < (full ? 8 : 4); i++)
3206 aarch64_set_vec_u16 (cpu, vd, i, val);
3209 case 0xd: /* 32-bit, mask shift by 16. */
3212 case 0xc: /* 32-bit, mask shift by 8. */
3216 for (i = 0; i < (full ? 4 : 2); i++)
3217 aarch64_set_vec_u32 (cpu, vd, i, val);
3220 case 0xE: /* MOVI Dn, #mask64 */
3224 for (i = 0; i < 8; i++)
3226 mask |= (0xFFUL << (i * 8));
3227 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3228 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3232 case 0xf: /* FMOV Vd.2D, #fpimm. */
3234 double u = fp_immediate_for_encoding_64 (val);
3239 aarch64_set_vec_double (cpu, vd, 0, u);
3240 aarch64_set_vec_double (cpu, vd, 1, u);
3249 #define ABS(A) ((A) < 0 ? - (A) : (A))
3252 do_vec_ABS (sim_cpu *cpu)
3255 instr[30] = half(0)/full(1)
3256 instr[29,24] = 00 1110
3257 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3258 instr[21,10] = 10 0000 1011 10
3262 unsigned vn = INSTR (9, 5);
3263 unsigned vd = INSTR (4, 0);
3264 unsigned full = INSTR (30, 30);
3267 NYI_assert (29, 24, 0x0E);
3268 NYI_assert (21, 10, 0x82E);
3270 switch (INSTR (23, 22))
3273 for (i = 0; i < (full ? 16 : 8); i++)
3274 aarch64_set_vec_s8 (cpu, vd, i,
3275 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3279 for (i = 0; i < (full ? 8 : 4); i++)
3280 aarch64_set_vec_s16 (cpu, vd, i,
3281 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3285 for (i = 0; i < (full ? 4 : 2); i++)
3286 aarch64_set_vec_s32 (cpu, vd, i,
3287 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3293 for (i = 0; i < 2; i++)
3294 aarch64_set_vec_s64 (cpu, vd, i,
3295 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3301 do_vec_ADDV (sim_cpu *cpu)
3304 instr[30] = full/half selector
3305 instr[29,24] = 00 1110
3306 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3307 instr[21,10] = 11 0001 1011 10
3311 unsigned vm = INSTR (9, 5);
3312 unsigned rd = INSTR (4, 0);
3315 int full = INSTR (30, 30);
3317 NYI_assert (29, 24, 0x0E);
3318 NYI_assert (21, 10, 0xC6E);
3320 switch (INSTR (23, 22))
3323 for (i = 0; i < (full ? 16 : 8); i++)
3324 val += aarch64_get_vec_u8 (cpu, vm, i);
3325 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3329 for (i = 0; i < (full ? 8 : 4); i++)
3330 val += aarch64_get_vec_u16 (cpu, vm, i);
3331 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3335 for (i = 0; i < (full ? 4 : 2); i++)
3336 val += aarch64_get_vec_u32 (cpu, vm, i);
3337 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3343 val = aarch64_get_vec_u64 (cpu, vm, 0);
3344 val += aarch64_get_vec_u64 (cpu, vm, 1);
3345 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3351 do_vec_ins_2 (sim_cpu *cpu)
3353 /* instr[31,21] = 01001110000
3354 instr[20,18] = size & element selector
3356 instr[13] = direction: to vec(0), from vec (1)
3362 unsigned vm = INSTR (9, 5);
3363 unsigned vd = INSTR (4, 0);
3365 NYI_assert (31, 21, 0x270);
3366 NYI_assert (17, 14, 0);
3367 NYI_assert (12, 10, 7);
3369 if (INSTR (13, 13) == 1)
3371 if (INSTR (18, 18) == 1)
3374 elem = INSTR (20, 19);
3375 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3376 aarch64_get_vec_u32 (cpu, vm, elem));
3381 if (INSTR (19, 19) != 1)
3384 elem = INSTR (20, 20);
3385 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3386 aarch64_get_vec_u64 (cpu, vm, elem));
3391 if (INSTR (18, 18) == 1)
3394 elem = INSTR (20, 19);
3395 aarch64_set_vec_u32 (cpu, vd, elem,
3396 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3401 if (INSTR (19, 19) != 1)
3404 elem = INSTR (20, 20);
3405 aarch64_set_vec_u64 (cpu, vd, elem,
3406 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3411 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3414 DST_TYPE a[N], b[N]; \
3416 for (i = 0; i < (N); i++) \
3418 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3419 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3421 for (i = 0; i < (N); i++) \
3422 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3427 do_vec_mull (sim_cpu *cpu)
3430 instr[30] = lower(0)/upper(1) selector
3431 instr[29] = signed(0)/unsigned(1)
3432 instr[28,24] = 0 1110
3433 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3436 instr[15,10] = 11 0000
3440 int unsign = INSTR (29, 29);
3441 int bias = INSTR (30, 30);
3442 unsigned vm = INSTR (20, 16);
3443 unsigned vn = INSTR ( 9, 5);
3444 unsigned vd = INSTR ( 4, 0);
3447 NYI_assert (28, 24, 0x0E);
3448 NYI_assert (15, 10, 0x30);
3450 /* NB: Read source values before writing results, in case
3451 the source and destination vectors are the same. */
3452 switch (INSTR (23, 22))
3458 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3460 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3467 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3469 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3476 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3478 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3487 do_vec_fadd (sim_cpu *cpu)
3490 instr[30] = half(0)/full(1)
3491 instr[29,24] = 001110
3492 instr[23] = FADD(0)/FSUB(1)
3493 instr[22] = float (0)/double(1)
3496 instr[15,10] = 110101
3500 unsigned vm = INSTR (20, 16);
3501 unsigned vn = INSTR (9, 5);
3502 unsigned vd = INSTR (4, 0);
3504 int full = INSTR (30, 30);
3506 NYI_assert (29, 24, 0x0E);
3507 NYI_assert (21, 21, 1);
3508 NYI_assert (15, 10, 0x35);
3517 for (i = 0; i < 2; i++)
3518 aarch64_set_vec_double (cpu, vd, i,
3519 aarch64_get_vec_double (cpu, vn, i)
3520 - aarch64_get_vec_double (cpu, vm, i));
3524 for (i = 0; i < (full ? 4 : 2); i++)
3525 aarch64_set_vec_float (cpu, vd, i,
3526 aarch64_get_vec_float (cpu, vn, i)
3527 - aarch64_get_vec_float (cpu, vm, i));
3537 for (i = 0; i < 2; i++)
3538 aarch64_set_vec_double (cpu, vd, i,
3539 aarch64_get_vec_double (cpu, vm, i)
3540 + aarch64_get_vec_double (cpu, vn, i));
3544 for (i = 0; i < (full ? 4 : 2); i++)
3545 aarch64_set_vec_float (cpu, vd, i,
3546 aarch64_get_vec_float (cpu, vm, i)
3547 + aarch64_get_vec_float (cpu, vn, i));
3553 do_vec_add (sim_cpu *cpu)
3556 instr[30] = full/half selector
3557 instr[29,24] = 001110
3558 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3561 instr[15,10] = 100001
3565 unsigned vm = INSTR (20, 16);
3566 unsigned vn = INSTR (9, 5);
3567 unsigned vd = INSTR (4, 0);
3569 int full = INSTR (30, 30);
3571 NYI_assert (29, 24, 0x0E);
3572 NYI_assert (21, 21, 1);
3573 NYI_assert (15, 10, 0x21);
3575 switch (INSTR (23, 22))
3578 for (i = 0; i < (full ? 16 : 8); i++)
3579 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3580 + aarch64_get_vec_u8 (cpu, vm, i));
3584 for (i = 0; i < (full ? 8 : 4); i++)
3585 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3586 + aarch64_get_vec_u16 (cpu, vm, i));
3590 for (i = 0; i < (full ? 4 : 2); i++)
3591 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3592 + aarch64_get_vec_u32 (cpu, vm, i));
3598 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3599 + aarch64_get_vec_u64 (cpu, vm, 0));
3600 aarch64_set_vec_u64 (cpu, vd, 1,
3601 aarch64_get_vec_u64 (cpu, vn, 1)
3602 + aarch64_get_vec_u64 (cpu, vm, 1));
3608 do_vec_mul (sim_cpu *cpu)
3611 instr[30] = full/half selector
3612 instr[29,24] = 00 1110
3613 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3616 instr[15,10] = 10 0111
3620 unsigned vm = INSTR (20, 16);
3621 unsigned vn = INSTR (9, 5);
3622 unsigned vd = INSTR (4, 0);
3624 int full = INSTR (30, 30);
3627 NYI_assert (29, 24, 0x0E);
3628 NYI_assert (21, 21, 1);
3629 NYI_assert (15, 10, 0x27);
3631 switch (INSTR (23, 22))
3634 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint16_t, u8, u16);
3638 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint32_t, u16, u32);
3642 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint64_t, u32, u64);
3651 do_vec_MLA (sim_cpu *cpu)
3654 instr[30] = full/half selector
3655 instr[29,24] = 00 1110
3656 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3659 instr[15,10] = 1001 01
3663 unsigned vm = INSTR (20, 16);
3664 unsigned vn = INSTR (9, 5);
3665 unsigned vd = INSTR (4, 0);
3667 int full = INSTR (30, 30);
3669 NYI_assert (29, 24, 0x0E);
3670 NYI_assert (21, 21, 1);
3671 NYI_assert (15, 10, 0x25);
3673 switch (INSTR (23, 22))
3677 uint16_t a[16], b[16];
3679 for (i = 0; i < (full ? 16 : 8); i++)
3681 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3682 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3685 for (i = 0; i < (full ? 16 : 8); i++)
3687 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3689 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3696 uint32_t a[8], b[8];
3698 for (i = 0; i < (full ? 8 : 4); i++)
3700 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3701 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3704 for (i = 0; i < (full ? 8 : 4); i++)
3706 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3708 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3715 uint64_t a[4], b[4];
3717 for (i = 0; i < (full ? 4 : 2); i++)
3719 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3720 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3723 for (i = 0; i < (full ? 4 : 2); i++)
3725 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3727 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3738 fmaxnm (float a, float b)
3740 if (fpclassify (a) == FP_NORMAL)
3742 if (fpclassify (b) == FP_NORMAL)
3743 return a > b ? a : b;
3746 else if (fpclassify (b) == FP_NORMAL)
3752 fminnm (float a, float b)
3754 if (fpclassify (a) == FP_NORMAL)
3756 if (fpclassify (b) == FP_NORMAL)
3757 return a < b ? a : b;
3760 else if (fpclassify (b) == FP_NORMAL)
3766 dmaxnm (double a, double b)
3768 if (fpclassify (a) == FP_NORMAL)
3770 if (fpclassify (b) == FP_NORMAL)
3771 return a > b ? a : b;
3774 else if (fpclassify (b) == FP_NORMAL)
3780 dminnm (double a, double b)
3782 if (fpclassify (a) == FP_NORMAL)
3784 if (fpclassify (b) == FP_NORMAL)
3785 return a < b ? a : b;
3788 else if (fpclassify (b) == FP_NORMAL)
3794 do_vec_FminmaxNMP (sim_cpu *cpu)
3797 instr [30] = half (0)/full (1)
3798 instr [29,24] = 10 1110
3799 instr [23] = max(0)/min(1)
3800 instr [22] = float (0)/double (1)
3803 instr [15,10] = 1100 01
3805 instr [4.0] = Vd. */
3807 unsigned vm = INSTR (20, 16);
3808 unsigned vn = INSTR (9, 5);
3809 unsigned vd = INSTR (4, 0);
3810 int full = INSTR (30, 30);
3812 NYI_assert (29, 24, 0x2E);
3813 NYI_assert (21, 21, 1);
3814 NYI_assert (15, 10, 0x31);
3818 double (* fn)(double, double) = INSTR (23, 23)
3823 aarch64_set_vec_double (cpu, vd, 0,
3824 fn (aarch64_get_vec_double (cpu, vn, 0),
3825 aarch64_get_vec_double (cpu, vn, 1)));
3826 aarch64_set_vec_double (cpu, vd, 0,
3827 fn (aarch64_get_vec_double (cpu, vm, 0),
3828 aarch64_get_vec_double (cpu, vm, 1)));
3832 float (* fn)(float, float) = INSTR (23, 23)
3835 aarch64_set_vec_float (cpu, vd, 0,
3836 fn (aarch64_get_vec_float (cpu, vn, 0),
3837 aarch64_get_vec_float (cpu, vn, 1)));
3839 aarch64_set_vec_float (cpu, vd, 1,
3840 fn (aarch64_get_vec_float (cpu, vn, 2),
3841 aarch64_get_vec_float (cpu, vn, 3)));
3843 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3844 fn (aarch64_get_vec_float (cpu, vm, 0),
3845 aarch64_get_vec_float (cpu, vm, 1)));
3847 aarch64_set_vec_float (cpu, vd, 3,
3848 fn (aarch64_get_vec_float (cpu, vm, 2),
3849 aarch64_get_vec_float (cpu, vm, 3)));
3854 do_vec_AND (sim_cpu *cpu)
3857 instr[30] = half (0)/full (1)
3858 instr[29,21] = 001110001
3860 instr[15,10] = 000111
3864 unsigned vm = INSTR (20, 16);
3865 unsigned vn = INSTR (9, 5);
3866 unsigned vd = INSTR (4, 0);
3868 int full = INSTR (30, 30);
3870 NYI_assert (29, 21, 0x071);
3871 NYI_assert (15, 10, 0x07);
3873 for (i = 0; i < (full ? 4 : 2); i++)
3874 aarch64_set_vec_u32 (cpu, vd, i,
3875 aarch64_get_vec_u32 (cpu, vn, i)
3876 & aarch64_get_vec_u32 (cpu, vm, i));
3880 do_vec_BSL (sim_cpu *cpu)
3883 instr[30] = half (0)/full (1)
3884 instr[29,21] = 101110011
3886 instr[15,10] = 000111
3890 unsigned vm = INSTR (20, 16);
3891 unsigned vn = INSTR (9, 5);
3892 unsigned vd = INSTR (4, 0);
3894 int full = INSTR (30, 30);
3896 NYI_assert (29, 21, 0x173);
3897 NYI_assert (15, 10, 0x07);
3899 for (i = 0; i < (full ? 16 : 8); i++)
3900 aarch64_set_vec_u8 (cpu, vd, i,
3901 ( aarch64_get_vec_u8 (cpu, vd, i)
3902 & aarch64_get_vec_u8 (cpu, vn, i))
3903 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
3904 & aarch64_get_vec_u8 (cpu, vm, i)));
3908 do_vec_EOR (sim_cpu *cpu)
3911 instr[30] = half (0)/full (1)
3912 instr[29,21] = 10 1110 001
3914 instr[15,10] = 000111
3918 unsigned vm = INSTR (20, 16);
3919 unsigned vn = INSTR (9, 5);
3920 unsigned vd = INSTR (4, 0);
3922 int full = INSTR (30, 30);
3924 NYI_assert (29, 21, 0x171);
3925 NYI_assert (15, 10, 0x07);
3927 for (i = 0; i < (full ? 4 : 2); i++)
3928 aarch64_set_vec_u32 (cpu, vd, i,
3929 aarch64_get_vec_u32 (cpu, vn, i)
3930 ^ aarch64_get_vec_u32 (cpu, vm, i));
3934 do_vec_bit (sim_cpu *cpu)
3937 instr[30] = half (0)/full (1)
3938 instr[29,23] = 10 1110 1
3939 instr[22] = BIT (0) / BIF (1)
3942 instr[15,10] = 0001 11
3946 unsigned vm = INSTR (20, 16);
3947 unsigned vn = INSTR (9, 5);
3948 unsigned vd = INSTR (4, 0);
3949 unsigned full = INSTR (30, 30);
3950 unsigned test_false = INSTR (22, 22);
3953 NYI_assert (29, 23, 0x5D);
3954 NYI_assert (21, 21, 1);
3955 NYI_assert (15, 10, 0x07);
3959 for (i = 0; i < (full ? 16 : 8); i++)
3960 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
3961 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3965 for (i = 0; i < (full ? 16 : 8); i++)
3966 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
3967 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3972 do_vec_ORN (sim_cpu *cpu)
3975 instr[30] = half (0)/full (1)
3976 instr[29,21] = 00 1110 111
3978 instr[15,10] = 00 0111
3982 unsigned vm = INSTR (20, 16);
3983 unsigned vn = INSTR (9, 5);
3984 unsigned vd = INSTR (4, 0);
3986 int full = INSTR (30, 30);
3988 NYI_assert (29, 21, 0x077);
3989 NYI_assert (15, 10, 0x07);
3991 for (i = 0; i < (full ? 16 : 8); i++)
3992 aarch64_set_vec_u8 (cpu, vd, i,
3993 aarch64_get_vec_u8 (cpu, vn, i)
3994 | ~ aarch64_get_vec_u8 (cpu, vm, i));
3998 do_vec_ORR (sim_cpu *cpu)
4001 instr[30] = half (0)/full (1)
4002 instr[29,21] = 00 1110 101
4004 instr[15,10] = 0001 11
4008 unsigned vm = INSTR (20, 16);
4009 unsigned vn = INSTR (9, 5);
4010 unsigned vd = INSTR (4, 0);
4012 int full = INSTR (30, 30);
4014 NYI_assert (29, 21, 0x075);
4015 NYI_assert (15, 10, 0x07);
4017 for (i = 0; i < (full ? 16 : 8); i++)
4018 aarch64_set_vec_u8 (cpu, vd, i,
4019 aarch64_get_vec_u8 (cpu, vn, i)
4020 | aarch64_get_vec_u8 (cpu, vm, i));
4024 do_vec_BIC (sim_cpu *cpu)
4027 instr[30] = half (0)/full (1)
4028 instr[29,21] = 00 1110 011
4030 instr[15,10] = 00 0111
4034 unsigned vm = INSTR (20, 16);
4035 unsigned vn = INSTR (9, 5);
4036 unsigned vd = INSTR (4, 0);
4038 int full = INSTR (30, 30);
4040 NYI_assert (29, 21, 0x073);
4041 NYI_assert (15, 10, 0x07);
4043 for (i = 0; i < (full ? 16 : 8); i++)
4044 aarch64_set_vec_u8 (cpu, vd, i,
4045 aarch64_get_vec_u8 (cpu, vn, i)
4046 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4050 do_vec_XTN (sim_cpu *cpu)
4053 instr[30] = first part (0)/ second part (1)
4054 instr[29,24] = 00 1110
4055 instr[23,22] = size: byte(00), half(01), word (10)
4056 instr[21,10] = 1000 0100 1010
4060 unsigned vs = INSTR (9, 5);
4061 unsigned vd = INSTR (4, 0);
4062 unsigned bias = INSTR (30, 30);
4065 NYI_assert (29, 24, 0x0E);
4066 NYI_assert (21, 10, 0x84A);
4068 switch (INSTR (23, 22))
4072 for (i = 0; i < 8; i++)
4073 aarch64_set_vec_u8 (cpu, vd, i + 8,
4074 aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4076 for (i = 0; i < 8; i++)
4077 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4082 for (i = 0; i < 4; i++)
4083 aarch64_set_vec_u16 (cpu, vd, i + 4,
4084 aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4086 for (i = 0; i < 4; i++)
4087 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4092 for (i = 0; i < 2; i++)
4093 aarch64_set_vec_u32 (cpu, vd, i + 4,
4094 aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4096 for (i = 0; i < 2; i++)
4097 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4103 do_vec_maxv (sim_cpu *cpu)
4106 instr[30] = half(0)/full(1)
4107 instr[29] = signed (0)/unsigned(1)
4108 instr[28,24] = 0 1110
4109 instr[23,22] = size: byte(00), half(01), word (10)
4111 instr[20,17] = 1 000
4112 instr[16] = max(0)/min(1)
4113 instr[15,10] = 1010 10
4114 instr[9,5] = V source
4115 instr[4.0] = R dest. */
4117 unsigned vs = INSTR (9, 5);
4118 unsigned rd = INSTR (4, 0);
4119 unsigned full = INSTR (30, 30);
4122 NYI_assert (28, 24, 0x0E);
4123 NYI_assert (21, 21, 1);
4124 NYI_assert (20, 17, 8);
4125 NYI_assert (15, 10, 0x2A);
4127 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4129 case 0: /* SMAXV. */
4132 switch (INSTR (23, 22))
4135 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4136 for (i = 1; i < (full ? 16 : 8); i++)
4137 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4140 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4141 for (i = 1; i < (full ? 8 : 4); i++)
4142 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4145 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4146 for (i = 1; i < (full ? 4 : 2); i++)
4147 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4152 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4156 case 1: /* SMINV. */
4159 switch (INSTR (23, 22))
4162 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4163 for (i = 1; i < (full ? 16 : 8); i++)
4164 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4167 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4168 for (i = 1; i < (full ? 8 : 4); i++)
4169 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4172 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4173 for (i = 1; i < (full ? 4 : 2); i++)
4174 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4180 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4184 case 2: /* UMAXV. */
4187 switch (INSTR (23, 22))
4190 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4191 for (i = 1; i < (full ? 16 : 8); i++)
4192 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4195 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4196 for (i = 1; i < (full ? 8 : 4); i++)
4197 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4200 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4201 for (i = 1; i < (full ? 4 : 2); i++)
4202 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4208 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4212 case 3: /* UMINV. */
4215 switch (INSTR (23, 22))
4218 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4219 for (i = 1; i < (full ? 16 : 8); i++)
4220 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4223 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4224 for (i = 1; i < (full ? 8 : 4); i++)
4225 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4228 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4229 for (i = 1; i < (full ? 4 : 2); i++)
4230 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4236 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4243 do_vec_fminmaxV (sim_cpu *cpu)
4245 /* instr[31,24] = 0110 1110
4246 instr[23] = max(0)/min(1)
4247 instr[22,14] = 011 0000 11
4248 instr[13,12] = nm(00)/normal(11)
4250 instr[9,5] = V source
4251 instr[4.0] = R dest. */
4253 unsigned vs = INSTR (9, 5);
4254 unsigned rd = INSTR (4, 0);
4256 float res = aarch64_get_vec_float (cpu, vs, 0);
4258 NYI_assert (31, 24, 0x6E);
4259 NYI_assert (22, 14, 0x0C3);
4260 NYI_assert (11, 10, 2);
4264 switch (INSTR (13, 12))
4266 case 0: /* FMNINNMV. */
4267 for (i = 1; i < 4; i++)
4268 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4271 case 3: /* FMINV. */
4272 for (i = 1; i < 4; i++)
4273 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4282 switch (INSTR (13, 12))
4284 case 0: /* FMNAXNMV. */
4285 for (i = 1; i < 4; i++)
4286 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4289 case 3: /* FMAXV. */
4290 for (i = 1; i < 4; i++)
4291 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4299 aarch64_set_FP_float (cpu, rd, res);
4303 do_vec_Fminmax (sim_cpu *cpu)
4306 instr[30] = half(0)/full(1)
4307 instr[29,24] = 00 1110
4308 instr[23] = max(0)/min(1)
4309 instr[22] = float(0)/double(1)
4313 instr[13,12] = nm(00)/normal(11)
4318 unsigned vm = INSTR (20, 16);
4319 unsigned vn = INSTR (9, 5);
4320 unsigned vd = INSTR (4, 0);
4321 unsigned full = INSTR (30, 30);
4322 unsigned min = INSTR (23, 23);
4325 NYI_assert (29, 24, 0x0E);
4326 NYI_assert (21, 21, 1);
4327 NYI_assert (15, 14, 3);
4328 NYI_assert (11, 10, 1);
4332 double (* func)(double, double);
4337 if (INSTR (13, 12) == 0)
4338 func = min ? dminnm : dmaxnm;
4339 else if (INSTR (13, 12) == 3)
4340 func = min ? fmin : fmax;
4344 for (i = 0; i < 2; i++)
4345 aarch64_set_vec_double (cpu, vd, i,
4346 func (aarch64_get_vec_double (cpu, vn, i),
4347 aarch64_get_vec_double (cpu, vm, i)));
4351 float (* func)(float, float);
4353 if (INSTR (13, 12) == 0)
4354 func = min ? fminnm : fmaxnm;
4355 else if (INSTR (13, 12) == 3)
4356 func = min ? fminf : fmaxf;
4360 for (i = 0; i < (full ? 4 : 2); i++)
4361 aarch64_set_vec_float (cpu, vd, i,
4362 func (aarch64_get_vec_float (cpu, vn, i),
4363 aarch64_get_vec_float (cpu, vm, i)));
4368 do_vec_SCVTF (sim_cpu *cpu)
4372 instr[29,23] = 00 1110 0
4373 instr[22] = float(0)/double(1)
4374 instr[21,10] = 10 0001 1101 10
4378 unsigned vn = INSTR (9, 5);
4379 unsigned vd = INSTR (4, 0);
4380 unsigned full = INSTR (30, 30);
4381 unsigned size = INSTR (22, 22);
4384 NYI_assert (29, 23, 0x1C);
4385 NYI_assert (21, 10, 0x876);
4392 for (i = 0; i < 2; i++)
4394 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4395 aarch64_set_vec_double (cpu, vd, i, val);
4400 for (i = 0; i < (full ? 4 : 2); i++)
4402 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4403 aarch64_set_vec_float (cpu, vd, i, val);
4408 #define VEC_CMP(SOURCE, CMP) \
4414 for (i = 0; i < (full ? 16 : 8); i++) \
4415 aarch64_set_vec_u8 (cpu, vd, i, \
4416 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4418 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4422 for (i = 0; i < (full ? 8 : 4); i++) \
4423 aarch64_set_vec_u16 (cpu, vd, i, \
4424 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4426 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4430 for (i = 0; i < (full ? 4 : 2); i++) \
4431 aarch64_set_vec_u32 (cpu, vd, i, \
4432 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4434 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4440 for (i = 0; i < 2; i++) \
4441 aarch64_set_vec_u64 (cpu, vd, i, \
4442 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4444 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4451 #define VEC_CMP0(SOURCE, CMP) \
4457 for (i = 0; i < (full ? 16 : 8); i++) \
4458 aarch64_set_vec_u8 (cpu, vd, i, \
4459 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4463 for (i = 0; i < (full ? 8 : 4); i++) \
4464 aarch64_set_vec_u16 (cpu, vd, i, \
4465 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4469 for (i = 0; i < (full ? 4 : 2); i++) \
4470 aarch64_set_vec_u32 (cpu, vd, i, \
4471 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4477 for (i = 0; i < 2; i++) \
4478 aarch64_set_vec_u64 (cpu, vd, i, \
4479 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4480 CMP 0 ? -1ULL : 0); \
4486 #define VEC_FCMP0(CMP) \
4491 if (INSTR (22, 22)) \
4495 for (i = 0; i < 2; i++) \
4496 aarch64_set_vec_u64 (cpu, vd, i, \
4497 aarch64_get_vec_double (cpu, vn, i) \
4498 CMP 0.0 ? -1 : 0); \
4502 for (i = 0; i < (full ? 4 : 2); i++) \
4503 aarch64_set_vec_u32 (cpu, vd, i, \
4504 aarch64_get_vec_float (cpu, vn, i) \
4505 CMP 0.0 ? -1 : 0); \
4511 #define VEC_FCMP(CMP) \
4514 if (INSTR (22, 22)) \
4518 for (i = 0; i < 2; i++) \
4519 aarch64_set_vec_u64 (cpu, vd, i, \
4520 aarch64_get_vec_double (cpu, vn, i) \
4522 aarch64_get_vec_double (cpu, vm, i) \
4527 for (i = 0; i < (full ? 4 : 2); i++) \
4528 aarch64_set_vec_u32 (cpu, vd, i, \
4529 aarch64_get_vec_float (cpu, vn, i) \
4531 aarch64_get_vec_float (cpu, vm, i) \
4539 do_vec_compare (sim_cpu *cpu)
4542 instr[30] = half(0)/full(1)
4543 instr[29] = part-of-comparison-type
4544 instr[28,24] = 0 1110
4545 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4546 type of float compares: single (-0) / double (-1)
4548 instr[20,16] = Vm or 00000 (compare vs 0)
4549 instr[15,10] = part-of-comparison-type
4553 int full = INSTR (30, 30);
4554 int size = INSTR (23, 22);
4555 unsigned vm = INSTR (20, 16);
4556 unsigned vn = INSTR (9, 5);
4557 unsigned vd = INSTR (4, 0);
4560 NYI_assert (28, 24, 0x0E);
4561 NYI_assert (21, 21, 1);
4565 || ((INSTR (11, 11) == 0
4566 && INSTR (10, 10) == 0)))
4568 /* A compare vs 0. */
4571 if (INSTR (15, 10) == 0x2A)
4573 else if (INSTR (15, 10) == 0x32
4574 || INSTR (15, 10) == 0x3E)
4575 do_vec_fminmaxV (cpu);
4576 else if (INSTR (29, 23) == 0x1C
4577 && INSTR (21, 10) == 0x876)
4587 /* A floating point compare. */
4588 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4591 NYI_assert (15, 15, 1);
4595 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4596 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4597 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4598 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4599 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4600 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4601 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4602 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4610 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4614 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4615 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4616 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4617 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4618 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4619 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4620 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4621 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4622 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4623 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4633 do_vec_SSHL (sim_cpu *cpu)
4636 instr[30] = first part (0)/ second part (1)
4637 instr[29,24] = 00 1110
4638 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4641 instr[15,10] = 0100 01
4645 unsigned full = INSTR (30, 30);
4646 unsigned vm = INSTR (20, 16);
4647 unsigned vn = INSTR (9, 5);
4648 unsigned vd = INSTR (4, 0);
4652 NYI_assert (29, 24, 0x0E);
4653 NYI_assert (21, 21, 1);
4654 NYI_assert (15, 10, 0x11);
4656 /* FIXME: What is a signed shift left in this context ?. */
4658 switch (INSTR (23, 22))
4661 for (i = 0; i < (full ? 16 : 8); i++)
4663 shift = aarch64_get_vec_s8 (cpu, vm, i);
4665 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4668 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4674 for (i = 0; i < (full ? 8 : 4); i++)
4676 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4678 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4681 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4687 for (i = 0; i < (full ? 4 : 2); i++)
4689 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4691 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4694 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4702 for (i = 0; i < 2; i++)
4704 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4706 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4709 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4717 do_vec_USHL (sim_cpu *cpu)
4720 instr[30] = first part (0)/ second part (1)
4721 instr[29,24] = 10 1110
4722 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4725 instr[15,10] = 0100 01
4729 unsigned full = INSTR (30, 30);
4730 unsigned vm = INSTR (20, 16);
4731 unsigned vn = INSTR (9, 5);
4732 unsigned vd = INSTR (4, 0);
4736 NYI_assert (29, 24, 0x2E);
4737 NYI_assert (15, 10, 0x11);
4739 switch (INSTR (23, 22))
4742 for (i = 0; i < (full ? 16 : 8); i++)
4744 shift = aarch64_get_vec_s8 (cpu, vm, i);
4746 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4749 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4755 for (i = 0; i < (full ? 8 : 4); i++)
4757 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4759 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4762 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4768 for (i = 0; i < (full ? 4 : 2); i++)
4770 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4772 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4775 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4783 for (i = 0; i < 2; i++)
4785 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4787 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4790 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4798 do_vec_FMLA (sim_cpu *cpu)
4801 instr[30] = full/half selector
4802 instr[29,23] = 0011100
4803 instr[22] = size: 0=>float, 1=>double
4806 instr[15,10] = 1100 11
4810 unsigned vm = INSTR (20, 16);
4811 unsigned vn = INSTR (9, 5);
4812 unsigned vd = INSTR (4, 0);
4814 int full = INSTR (30, 30);
4816 NYI_assert (29, 23, 0x1C);
4817 NYI_assert (21, 21, 1);
4818 NYI_assert (15, 10, 0x33);
4824 for (i = 0; i < 2; i++)
4825 aarch64_set_vec_double (cpu, vd, i,
4826 aarch64_get_vec_double (cpu, vn, i) *
4827 aarch64_get_vec_double (cpu, vm, i) +
4828 aarch64_get_vec_double (cpu, vd, i));
4832 for (i = 0; i < (full ? 4 : 2); i++)
4833 aarch64_set_vec_float (cpu, vd, i,
4834 aarch64_get_vec_float (cpu, vn, i) *
4835 aarch64_get_vec_float (cpu, vm, i) +
4836 aarch64_get_vec_float (cpu, vd, i));
4841 do_vec_max (sim_cpu *cpu)
4844 instr[30] = full/half selector
4845 instr[29] = SMAX (0) / UMAX (1)
4846 instr[28,24] = 0 1110
4847 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4850 instr[15,10] = 0110 01
4854 unsigned vm = INSTR (20, 16);
4855 unsigned vn = INSTR (9, 5);
4856 unsigned vd = INSTR (4, 0);
4858 int full = INSTR (30, 30);
4860 NYI_assert (28, 24, 0x0E);
4861 NYI_assert (21, 21, 1);
4862 NYI_assert (15, 10, 0x19);
4866 switch (INSTR (23, 22))
4869 for (i = 0; i < (full ? 16 : 8); i++)
4870 aarch64_set_vec_u8 (cpu, vd, i,
4871 aarch64_get_vec_u8 (cpu, vn, i)
4872 > aarch64_get_vec_u8 (cpu, vm, i)
4873 ? aarch64_get_vec_u8 (cpu, vn, i)
4874 : aarch64_get_vec_u8 (cpu, vm, i));
4878 for (i = 0; i < (full ? 8 : 4); i++)
4879 aarch64_set_vec_u16 (cpu, vd, i,
4880 aarch64_get_vec_u16 (cpu, vn, i)
4881 > aarch64_get_vec_u16 (cpu, vm, i)
4882 ? aarch64_get_vec_u16 (cpu, vn, i)
4883 : aarch64_get_vec_u16 (cpu, vm, i));
4887 for (i = 0; i < (full ? 4 : 2); i++)
4888 aarch64_set_vec_u32 (cpu, vd, i,
4889 aarch64_get_vec_u32 (cpu, vn, i)
4890 > aarch64_get_vec_u32 (cpu, vm, i)
4891 ? aarch64_get_vec_u32 (cpu, vn, i)
4892 : aarch64_get_vec_u32 (cpu, vm, i));
4901 switch (INSTR (23, 22))
4904 for (i = 0; i < (full ? 16 : 8); i++)
4905 aarch64_set_vec_s8 (cpu, vd, i,
4906 aarch64_get_vec_s8 (cpu, vn, i)
4907 > aarch64_get_vec_s8 (cpu, vm, i)
4908 ? aarch64_get_vec_s8 (cpu, vn, i)
4909 : aarch64_get_vec_s8 (cpu, vm, i));
4913 for (i = 0; i < (full ? 8 : 4); i++)
4914 aarch64_set_vec_s16 (cpu, vd, i,
4915 aarch64_get_vec_s16 (cpu, vn, i)
4916 > aarch64_get_vec_s16 (cpu, vm, i)
4917 ? aarch64_get_vec_s16 (cpu, vn, i)
4918 : aarch64_get_vec_s16 (cpu, vm, i));
4922 for (i = 0; i < (full ? 4 : 2); i++)
4923 aarch64_set_vec_s32 (cpu, vd, i,
4924 aarch64_get_vec_s32 (cpu, vn, i)
4925 > aarch64_get_vec_s32 (cpu, vm, i)
4926 ? aarch64_get_vec_s32 (cpu, vn, i)
4927 : aarch64_get_vec_s32 (cpu, vm, i));
4937 do_vec_min (sim_cpu *cpu)
4940 instr[30] = full/half selector
4941 instr[29] = SMIN (0) / UMIN (1)
4942 instr[28,24] = 0 1110
4943 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4946 instr[15,10] = 0110 11
4950 unsigned vm = INSTR (20, 16);
4951 unsigned vn = INSTR (9, 5);
4952 unsigned vd = INSTR (4, 0);
4954 int full = INSTR (30, 30);
4956 NYI_assert (28, 24, 0x0E);
4957 NYI_assert (21, 21, 1);
4958 NYI_assert (15, 10, 0x1B);
4962 switch (INSTR (23, 22))
4965 for (i = 0; i < (full ? 16 : 8); i++)
4966 aarch64_set_vec_u8 (cpu, vd, i,
4967 aarch64_get_vec_u8 (cpu, vn, i)
4968 < aarch64_get_vec_u8 (cpu, vm, i)
4969 ? aarch64_get_vec_u8 (cpu, vn, i)
4970 : aarch64_get_vec_u8 (cpu, vm, i));
4974 for (i = 0; i < (full ? 8 : 4); i++)
4975 aarch64_set_vec_u16 (cpu, vd, i,
4976 aarch64_get_vec_u16 (cpu, vn, i)
4977 < aarch64_get_vec_u16 (cpu, vm, i)
4978 ? aarch64_get_vec_u16 (cpu, vn, i)
4979 : aarch64_get_vec_u16 (cpu, vm, i));
4983 for (i = 0; i < (full ? 4 : 2); i++)
4984 aarch64_set_vec_u32 (cpu, vd, i,
4985 aarch64_get_vec_u32 (cpu, vn, i)
4986 < aarch64_get_vec_u32 (cpu, vm, i)
4987 ? aarch64_get_vec_u32 (cpu, vn, i)
4988 : aarch64_get_vec_u32 (cpu, vm, i));
4997 switch (INSTR (23, 22))
5000 for (i = 0; i < (full ? 16 : 8); i++)
5001 aarch64_set_vec_s8 (cpu, vd, i,
5002 aarch64_get_vec_s8 (cpu, vn, i)
5003 < aarch64_get_vec_s8 (cpu, vm, i)
5004 ? aarch64_get_vec_s8 (cpu, vn, i)
5005 : aarch64_get_vec_s8 (cpu, vm, i));
5009 for (i = 0; i < (full ? 8 : 4); i++)
5010 aarch64_set_vec_s16 (cpu, vd, i,
5011 aarch64_get_vec_s16 (cpu, vn, i)
5012 < aarch64_get_vec_s16 (cpu, vm, i)
5013 ? aarch64_get_vec_s16 (cpu, vn, i)
5014 : aarch64_get_vec_s16 (cpu, vm, i));
5018 for (i = 0; i < (full ? 4 : 2); i++)
5019 aarch64_set_vec_s32 (cpu, vd, i,
5020 aarch64_get_vec_s32 (cpu, vn, i)
5021 < aarch64_get_vec_s32 (cpu, vm, i)
5022 ? aarch64_get_vec_s32 (cpu, vn, i)
5023 : aarch64_get_vec_s32 (cpu, vm, i));
5033 do_vec_sub_long (sim_cpu *cpu)
5036 instr[30] = lower (0) / upper (1)
5037 instr[29] = signed (0) / unsigned (1)
5038 instr[28,24] = 0 1110
5039 instr[23,22] = size: bytes (00), half (01), word (10)
5042 instr[15,10] = 0010 00
5044 instr[4,0] = V dest. */
5046 unsigned size = INSTR (23, 22);
5047 unsigned vm = INSTR (20, 16);
5048 unsigned vn = INSTR (9, 5);
5049 unsigned vd = INSTR (4, 0);
5053 NYI_assert (28, 24, 0x0E);
5054 NYI_assert (21, 21, 1);
5055 NYI_assert (15, 10, 0x08);
5060 switch (INSTR (30, 29))
5062 case 2: /* SSUBL2. */
5064 case 0: /* SSUBL. */
5069 for (i = 0; i < 8; i++)
5070 aarch64_set_vec_s16 (cpu, vd, i,
5071 aarch64_get_vec_s8 (cpu, vn, i + bias)
5072 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5077 for (i = 0; i < 4; i++)
5078 aarch64_set_vec_s32 (cpu, vd, i,
5079 aarch64_get_vec_s16 (cpu, vn, i + bias)
5080 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5084 for (i = 0; i < 2; i++)
5085 aarch64_set_vec_s64 (cpu, vd, i,
5086 aarch64_get_vec_s32 (cpu, vn, i + bias)
5087 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5095 case 3: /* USUBL2. */
5097 case 1: /* USUBL. */
5102 for (i = 0; i < 8; i++)
5103 aarch64_set_vec_u16 (cpu, vd, i,
5104 aarch64_get_vec_u8 (cpu, vn, i + bias)
5105 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5110 for (i = 0; i < 4; i++)
5111 aarch64_set_vec_u32 (cpu, vd, i,
5112 aarch64_get_vec_u16 (cpu, vn, i + bias)
5113 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5117 for (i = 0; i < 2; i++)
5118 aarch64_set_vec_u64 (cpu, vd, i,
5119 aarch64_get_vec_u32 (cpu, vn, i + bias)
5120 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5131 do_vec_ADDP (sim_cpu *cpu)
5134 instr[30] = half(0)/full(1)
5135 instr[29,24] = 00 1110
5136 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5139 instr[15,10] = 1011 11
5141 instr[4,0] = V dest. */
5145 unsigned full = INSTR (30, 30);
5146 unsigned size = INSTR (23, 22);
5147 unsigned vm = INSTR (20, 16);
5148 unsigned vn = INSTR (9, 5);
5149 unsigned vd = INSTR (4, 0);
5152 NYI_assert (29, 24, 0x0E);
5153 NYI_assert (21, 21, 1);
5154 NYI_assert (15, 10, 0x2F);
5156 /* Make copies of the source registers in case vd == vn/vm. */
5157 copy_vn = cpu->fr[vn];
5158 copy_vm = cpu->fr[vm];
5163 range = full ? 8 : 4;
5164 for (i = 0; i < range; i++)
5166 aarch64_set_vec_u8 (cpu, vd, i,
5167 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5168 aarch64_set_vec_u8 (cpu, vd, i + range,
5169 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5174 range = full ? 4 : 2;
5175 for (i = 0; i < range; i++)
5177 aarch64_set_vec_u16 (cpu, vd, i,
5178 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5179 aarch64_set_vec_u16 (cpu, vd, i + range,
5180 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5185 range = full ? 2 : 1;
5186 for (i = 0; i < range; i++)
5188 aarch64_set_vec_u32 (cpu, vd, i,
5189 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5190 aarch64_set_vec_u32 (cpu, vd, i + range,
5191 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5198 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5199 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5205 do_vec_UMOV (sim_cpu *cpu)
5208 instr[30] = 32-bit(0)/64-bit(1)
5209 instr[29,21] = 00 1110 000
5210 insrt[20,16] = size & index
5211 instr[15,10] = 0011 11
5212 instr[9,5] = V source
5213 instr[4,0] = R dest. */
5215 unsigned vs = INSTR (9, 5);
5216 unsigned rd = INSTR (4, 0);
5219 NYI_assert (29, 21, 0x070);
5220 NYI_assert (15, 10, 0x0F);
5224 /* Byte transfer. */
5225 index = INSTR (20, 17);
5226 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5227 aarch64_get_vec_u8 (cpu, vs, index));
5229 else if (INSTR (17, 17))
5231 index = INSTR (20, 18);
5232 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5233 aarch64_get_vec_u16 (cpu, vs, index));
5235 else if (INSTR (18, 18))
5237 index = INSTR (20, 19);
5238 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5239 aarch64_get_vec_u32 (cpu, vs, index));
5243 if (INSTR (30, 30) != 1)
5246 index = INSTR (20, 20);
5247 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5248 aarch64_get_vec_u64 (cpu, vs, index));
5253 do_vec_FABS (sim_cpu *cpu)
5256 instr[30] = half(0)/full(1)
5257 instr[29,23] = 00 1110 1
5258 instr[22] = float(0)/double(1)
5259 instr[21,16] = 10 0000
5260 instr[15,10] = 1111 10
5264 unsigned vn = INSTR (9, 5);
5265 unsigned vd = INSTR (4, 0);
5266 unsigned full = INSTR (30, 30);
5269 NYI_assert (29, 23, 0x1D);
5270 NYI_assert (21, 10, 0x83E);
5277 for (i = 0; i < 2; i++)
5278 aarch64_set_vec_double (cpu, vd, i,
5279 fabs (aarch64_get_vec_double (cpu, vn, i)));
5283 for (i = 0; i < (full ? 4 : 2); i++)
5284 aarch64_set_vec_float (cpu, vd, i,
5285 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5290 do_vec_FCVTZS (sim_cpu *cpu)
5293 instr[30] = half (0) / all (1)
5294 instr[29,23] = 00 1110 1
5295 instr[22] = single (0) / double (1)
5296 instr[21,10] = 10 0001 1011 10
5300 unsigned rn = INSTR (9, 5);
5301 unsigned rd = INSTR (4, 0);
5302 unsigned full = INSTR (30, 30);
5305 NYI_assert (31, 31, 0);
5306 NYI_assert (29, 23, 0x1D);
5307 NYI_assert (21, 10, 0x86E);
5314 for (i = 0; i < 2; i++)
5315 aarch64_set_vec_s64 (cpu, rd, i,
5316 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5319 for (i = 0; i < (full ? 4 : 2); i++)
5320 aarch64_set_vec_s32 (cpu, rd, i,
5321 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5325 do_vec_REV64 (sim_cpu *cpu)
5328 instr[30] = full/half
5329 instr[29,24] = 00 1110
5331 instr[21,10] = 10 0000 0000 10
5335 unsigned rn = INSTR (9, 5);
5336 unsigned rd = INSTR (4, 0);
5337 unsigned size = INSTR (23, 22);
5338 unsigned full = INSTR (30, 30);
5342 NYI_assert (29, 24, 0x0E);
5343 NYI_assert (21, 10, 0x802);
5348 for (i = 0; i < (full ? 16 : 8); i++)
5349 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5353 for (i = 0; i < (full ? 8 : 4); i++)
5354 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5358 for (i = 0; i < (full ? 4 : 2); i++)
5359 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5366 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5368 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5372 do_vec_REV16 (sim_cpu *cpu)
5375 instr[30] = full/half
5376 instr[29,24] = 00 1110
5378 instr[21,10] = 10 0000 0001 10
5382 unsigned rn = INSTR (9, 5);
5383 unsigned rd = INSTR (4, 0);
5384 unsigned size = INSTR (23, 22);
5385 unsigned full = INSTR (30, 30);
5389 NYI_assert (29, 24, 0x0E);
5390 NYI_assert (21, 10, 0x806);
5395 for (i = 0; i < (full ? 16 : 8); i++)
5396 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5403 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5405 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5409 do_vec_op1 (sim_cpu *cpu)
5412 instr[30] = half/full
5413 instr[29,24] = 00 1110
5416 instr[15,10] = sub-opcode
5419 NYI_assert (29, 24, 0x0E);
5421 if (INSTR (21, 21) == 0)
5423 if (INSTR (23, 22) == 0)
5425 if (INSTR (30, 30) == 1
5426 && INSTR (17, 14) == 0
5427 && INSTR (12, 10) == 7)
5428 return do_vec_ins_2 (cpu);
5430 switch (INSTR (15, 10))
5432 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5433 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5434 case 0x07: do_vec_INS (cpu); return;
5435 case 0x0A: do_vec_TRN (cpu); return;
5438 if (INSTR (17, 16) == 0)
5440 do_vec_MOV_into_scalar (cpu);
5449 do_vec_TBL (cpu); return;
5453 do_vec_UZP (cpu); return;
5457 do_vec_ZIP (cpu); return;
5464 switch (INSTR (13, 10))
5466 case 0x6: do_vec_UZP (cpu); return;
5467 case 0xE: do_vec_ZIP (cpu); return;
5468 case 0xA: do_vec_TRN (cpu); return;
5469 case 0xF: do_vec_UMOV (cpu); return;
5474 switch (INSTR (15, 10))
5476 case 0x02: do_vec_REV64 (cpu); return;
5477 case 0x06: do_vec_REV16 (cpu); return;
5480 switch (INSTR (23, 21))
5482 case 1: do_vec_AND (cpu); return;
5483 case 3: do_vec_BIC (cpu); return;
5484 case 5: do_vec_ORR (cpu); return;
5485 case 7: do_vec_ORN (cpu); return;
5489 case 0x08: do_vec_sub_long (cpu); return;
5490 case 0x0a: do_vec_XTN (cpu); return;
5491 case 0x11: do_vec_SSHL (cpu); return;
5492 case 0x19: do_vec_max (cpu); return;
5493 case 0x1B: do_vec_min (cpu); return;
5494 case 0x21: do_vec_add (cpu); return;
5495 case 0x25: do_vec_MLA (cpu); return;
5496 case 0x27: do_vec_mul (cpu); return;
5497 case 0x2F: do_vec_ADDP (cpu); return;
5498 case 0x30: do_vec_mull (cpu); return;
5499 case 0x33: do_vec_FMLA (cpu); return;
5500 case 0x35: do_vec_fadd (cpu); return;
5503 switch (INSTR (20, 16))
5505 case 0x00: do_vec_ABS (cpu); return;
5506 case 0x01: do_vec_FCVTZS (cpu); return;
5507 case 0x11: do_vec_ADDV (cpu); return;
5513 do_vec_Fminmax (cpu); return;
5525 do_vec_compare (cpu); return;
5528 do_vec_FABS (cpu); return;
5536 do_vec_xtl (sim_cpu *cpu)
5539 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5540 instr[28,22] = 0 1111 00
5541 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5542 instr[15,10] = 1010 01
5543 instr[9,5] = V source
5544 instr[4,0] = V dest. */
5546 unsigned vs = INSTR (9, 5);
5547 unsigned vd = INSTR (4, 0);
5548 unsigned i, shift, bias = 0;
5550 NYI_assert (28, 22, 0x3C);
5551 NYI_assert (15, 10, 0x29);
5553 switch (INSTR (30, 29))
5555 case 2: /* SXTL2, SSHLL2. */
5557 case 0: /* SXTL, SSHLL. */
5562 shift = INSTR (20, 16);
5563 /* Get the source values before setting the destination values
5564 in case the source and destination are the same. */
5565 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5566 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5567 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5568 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5570 else if (INSTR (20, 20))
5573 int32_t v1,v2,v3,v4;
5575 shift = INSTR (19, 16);
5577 for (i = 0; i < 4; i++)
5578 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5579 for (i = 0; i < 4; i++)
5580 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5585 NYI_assert (19, 19, 1);
5587 shift = INSTR (18, 16);
5589 for (i = 0; i < 8; i++)
5590 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5591 for (i = 0; i < 8; i++)
5592 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5596 case 3: /* UXTL2, USHLL2. */
5598 case 1: /* UXTL, USHLL. */
5602 shift = INSTR (20, 16);
5603 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5604 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5605 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5606 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5608 else if (INSTR (20, 20))
5611 shift = INSTR (19, 16);
5613 for (i = 0; i < 4; i++)
5614 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5615 for (i = 0; i < 4; i++)
5616 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5621 NYI_assert (19, 19, 1);
5623 shift = INSTR (18, 16);
5625 for (i = 0; i < 8; i++)
5626 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5627 for (i = 0; i < 8; i++)
5628 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5635 do_vec_SHL (sim_cpu *cpu)
5638 instr [30] = half(0)/full(1)
5639 instr [29,23] = 001 1110
5640 instr [22,16] = size and shift amount
5641 instr [15,10] = 01 0101
5643 instr [4, 0] = Vd. */
5646 int full = INSTR (30, 30);
5647 unsigned vs = INSTR (9, 5);
5648 unsigned vd = INSTR (4, 0);
5651 NYI_assert (29, 23, 0x1E);
5652 NYI_assert (15, 10, 0x15);
5656 shift = INSTR (21, 16);
5661 for (i = 0; i < 2; i++)
5663 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5664 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5672 shift = INSTR (20, 16);
5674 for (i = 0; i < (full ? 4 : 2); i++)
5676 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5677 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5685 shift = INSTR (19, 16);
5687 for (i = 0; i < (full ? 8 : 4); i++)
5689 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5690 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5696 if (INSTR (19, 19) == 0)
5699 shift = INSTR (18, 16);
5701 for (i = 0; i < (full ? 16 : 8); i++)
5703 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5704 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5709 do_vec_SSHR_USHR (sim_cpu *cpu)
5712 instr [30] = half(0)/full(1)
5713 instr [29] = signed(0)/unsigned(1)
5714 instr [28,23] = 0 1111 0
5715 instr [22,16] = size and shift amount
5716 instr [15,10] = 0000 01
5718 instr [4, 0] = Vd. */
5720 int full = INSTR (30, 30);
5721 int sign = ! INSTR (29, 29);
5722 unsigned shift = INSTR (22, 16);
5723 unsigned vs = INSTR (9, 5);
5724 unsigned vd = INSTR (4, 0);
5727 NYI_assert (28, 23, 0x1E);
5728 NYI_assert (15, 10, 0x01);
5732 shift = 128 - shift;
5738 for (i = 0; i < 2; i++)
5740 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5741 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5744 for (i = 0; i < 2; i++)
5746 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5747 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5758 for (i = 0; i < (full ? 4 : 2); i++)
5760 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5761 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5764 for (i = 0; i < (full ? 4 : 2); i++)
5766 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5767 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5778 for (i = 0; i < (full ? 8 : 4); i++)
5780 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5781 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5784 for (i = 0; i < (full ? 8 : 4); i++)
5786 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5787 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5793 if (INSTR (19, 19) == 0)
5799 for (i = 0; i < (full ? 16 : 8); i++)
5801 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5802 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5805 for (i = 0; i < (full ? 16 : 8); i++)
5807 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5808 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5813 do_vec_MUL_by_element (sim_cpu *cpu)
5816 instr[30] = half/full
5817 instr[29,24] = 00 1111
5828 unsigned full = INSTR (30, 30);
5829 unsigned L = INSTR (21, 21);
5830 unsigned H = INSTR (11, 11);
5831 unsigned vn = INSTR (9, 5);
5832 unsigned vd = INSTR (4, 0);
5833 unsigned size = INSTR (23, 22);
5838 NYI_assert (29, 24, 0x0F);
5839 NYI_assert (15, 12, 0x8);
5840 NYI_assert (10, 10, 0);
5846 /* 16 bit products. */
5851 index = (H << 2) | (L << 1) | INSTR (20, 20);
5852 vm = INSTR (19, 16);
5853 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5855 for (e = 0; e < (full ? 8 : 4); e ++)
5857 element1 = aarch64_get_vec_u16 (cpu, vn, e);
5858 product = element1 * element2;
5859 aarch64_set_vec_u16 (cpu, vd, e, product);
5866 /* 32 bit products. */
5871 index = (H << 1) | L;
5872 vm = INSTR (20, 16);
5873 element2 = aarch64_get_vec_u32 (cpu, vm, index);
5875 for (e = 0; e < (full ? 4 : 2); e ++)
5877 element1 = aarch64_get_vec_u32 (cpu, vn, e);
5878 product = element1 * element2;
5879 aarch64_set_vec_u32 (cpu, vd, e, product);
5890 do_vec_op2 (sim_cpu *cpu)
5893 instr[30] = half/full
5894 instr[29,24] = 00 1111
5896 instr[22,16] = element size & index
5897 instr[15,10] = sub-opcode
5901 NYI_assert (29, 24, 0x0F);
5903 if (INSTR (23, 23) != 0)
5905 switch (INSTR (15, 10))
5908 case 0x22: do_vec_MUL_by_element (cpu); return;
5914 switch (INSTR (15, 10))
5916 case 0x01: do_vec_SSHR_USHR (cpu); return;
5917 case 0x15: do_vec_SHL (cpu); return;
5919 case 0x22: do_vec_MUL_by_element (cpu); return;
5920 case 0x29: do_vec_xtl (cpu); return;
5927 do_vec_neg (sim_cpu *cpu)
5930 instr[30] = full(1)/half(0)
5931 instr[29,24] = 10 1110
5932 instr[23,22] = size: byte(00), half (01), word (10), long (11)
5933 instr[21,10] = 1000 0010 1110
5937 int full = INSTR (30, 30);
5938 unsigned vs = INSTR (9, 5);
5939 unsigned vd = INSTR (4, 0);
5942 NYI_assert (29, 24, 0x2E);
5943 NYI_assert (21, 10, 0x82E);
5945 switch (INSTR (23, 22))
5948 for (i = 0; i < (full ? 16 : 8); i++)
5949 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
5953 for (i = 0; i < (full ? 8 : 4); i++)
5954 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
5958 for (i = 0; i < (full ? 4 : 2); i++)
5959 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
5965 for (i = 0; i < 2; i++)
5966 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
5972 do_vec_sqrt (sim_cpu *cpu)
5975 instr[30] = full(1)/half(0)
5976 instr[29,23] = 101 1101
5977 instr[22] = single(0)/double(1)
5978 instr[21,10] = 1000 0111 1110
5982 int full = INSTR (30, 30);
5983 unsigned vs = INSTR (9, 5);
5984 unsigned vd = INSTR (4, 0);
5987 NYI_assert (29, 23, 0x5B);
5988 NYI_assert (21, 10, 0x87E);
5990 if (INSTR (22, 22) == 0)
5991 for (i = 0; i < (full ? 4 : 2); i++)
5992 aarch64_set_vec_float (cpu, vd, i,
5993 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
5995 for (i = 0; i < 2; i++)
5996 aarch64_set_vec_double (cpu, vd, i,
5997 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6001 do_vec_mls_indexed (sim_cpu *cpu)
6004 instr[30] = half(0)/full(1)
6005 instr[29,24] = 10 1111
6006 instr[23,22] = 16-bit(01)/32-bit(10)
6007 instr[21,20+11] = index (if 16-bit)
6008 instr[21+11] = index (if 32-bit)
6011 instr[11] = part of index
6016 int full = INSTR (30, 30);
6017 unsigned vs = INSTR (9, 5);
6018 unsigned vd = INSTR (4, 0);
6019 unsigned vm = INSTR (20, 16);
6022 NYI_assert (15, 12, 4);
6023 NYI_assert (10, 10, 0);
6025 switch (INSTR (23, 22))
6035 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6036 val = aarch64_get_vec_u16 (cpu, vm, elem);
6038 for (i = 0; i < (full ? 8 : 4); i++)
6039 aarch64_set_vec_u32 (cpu, vd, i,
6040 aarch64_get_vec_u32 (cpu, vd, i) -
6041 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6047 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6048 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6050 for (i = 0; i < (full ? 4 : 2); i++)
6051 aarch64_set_vec_u64 (cpu, vd, i,
6052 aarch64_get_vec_u64 (cpu, vd, i) -
6053 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6065 do_vec_SUB (sim_cpu *cpu)
6068 instr [30] = half(0)/full(1)
6069 instr [29,24] = 10 1110
6070 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6073 instr [15,10] = 10 0001
6075 instr [4, 0] = Vd. */
6077 unsigned full = INSTR (30, 30);
6078 unsigned vm = INSTR (20, 16);
6079 unsigned vn = INSTR (9, 5);
6080 unsigned vd = INSTR (4, 0);
6083 NYI_assert (29, 24, 0x2E);
6084 NYI_assert (21, 21, 1);
6085 NYI_assert (15, 10, 0x21);
6087 switch (INSTR (23, 22))
6090 for (i = 0; i < (full ? 16 : 8); i++)
6091 aarch64_set_vec_s8 (cpu, vd, i,
6092 aarch64_get_vec_s8 (cpu, vn, i)
6093 - aarch64_get_vec_s8 (cpu, vm, i));
6097 for (i = 0; i < (full ? 8 : 4); i++)
6098 aarch64_set_vec_s16 (cpu, vd, i,
6099 aarch64_get_vec_s16 (cpu, vn, i)
6100 - aarch64_get_vec_s16 (cpu, vm, i));
6104 for (i = 0; i < (full ? 4 : 2); i++)
6105 aarch64_set_vec_s32 (cpu, vd, i,
6106 aarch64_get_vec_s32 (cpu, vn, i)
6107 - aarch64_get_vec_s32 (cpu, vm, i));
6114 for (i = 0; i < 2; i++)
6115 aarch64_set_vec_s64 (cpu, vd, i,
6116 aarch64_get_vec_s64 (cpu, vn, i)
6117 - aarch64_get_vec_s64 (cpu, vm, i));
6123 do_vec_MLS (sim_cpu *cpu)
6126 instr [30] = half(0)/full(1)
6127 instr [29,24] = 10 1110
6128 instr [23,22] = size: byte(00, half(01), word (10)
6131 instr [15,10] = 10 0101
6133 instr [4, 0] = Vd. */
6135 unsigned full = INSTR (30, 30);
6136 unsigned vm = INSTR (20, 16);
6137 unsigned vn = INSTR (9, 5);
6138 unsigned vd = INSTR (4, 0);
6141 NYI_assert (29, 24, 0x2E);
6142 NYI_assert (21, 21, 1);
6143 NYI_assert (15, 10, 0x25);
6145 switch (INSTR (23, 22))
6148 for (i = 0; i < (full ? 16 : 8); i++)
6149 aarch64_set_vec_u8 (cpu, vd, i,
6150 (aarch64_get_vec_u8 (cpu, vn, i)
6151 * aarch64_get_vec_u8 (cpu, vm, i))
6152 - aarch64_get_vec_u8 (cpu, vd, i));
6156 for (i = 0; i < (full ? 8 : 4); i++)
6157 aarch64_set_vec_u16 (cpu, vd, i,
6158 (aarch64_get_vec_u16 (cpu, vn, i)
6159 * aarch64_get_vec_u16 (cpu, vm, i))
6160 - aarch64_get_vec_u16 (cpu, vd, i));
6164 for (i = 0; i < (full ? 4 : 2); i++)
6165 aarch64_set_vec_u32 (cpu, vd, i,
6166 (aarch64_get_vec_u32 (cpu, vn, i)
6167 * aarch64_get_vec_u32 (cpu, vm, i))
6168 - aarch64_get_vec_u32 (cpu, vd, i));
6177 do_vec_FDIV (sim_cpu *cpu)
6180 instr [30] = half(0)/full(1)
6181 instr [29,23] = 10 1110 0
6182 instr [22] = float()/double(1)
6185 instr [15,10] = 1111 11
6187 instr [4, 0] = Vd. */
6189 unsigned full = INSTR (30, 30);
6190 unsigned vm = INSTR (20, 16);
6191 unsigned vn = INSTR (9, 5);
6192 unsigned vd = INSTR (4, 0);
6195 NYI_assert (29, 23, 0x5C);
6196 NYI_assert (21, 21, 1);
6197 NYI_assert (15, 10, 0x3F);
6204 for (i = 0; i < 2; i++)
6205 aarch64_set_vec_double (cpu, vd, i,
6206 aarch64_get_vec_double (cpu, vn, i)
6207 / aarch64_get_vec_double (cpu, vm, i));
6210 for (i = 0; i < (full ? 4 : 2); i++)
6211 aarch64_set_vec_float (cpu, vd, i,
6212 aarch64_get_vec_float (cpu, vn, i)
6213 / aarch64_get_vec_float (cpu, vm, i));
6217 do_vec_FMUL (sim_cpu *cpu)
6220 instr [30] = half(0)/full(1)
6221 instr [29,23] = 10 1110 0
6222 instr [22] = float(0)/double(1)
6225 instr [15,10] = 1101 11
6227 instr [4, 0] = Vd. */
6229 unsigned full = INSTR (30, 30);
6230 unsigned vm = INSTR (20, 16);
6231 unsigned vn = INSTR (9, 5);
6232 unsigned vd = INSTR (4, 0);
6235 NYI_assert (29, 23, 0x5C);
6236 NYI_assert (21, 21, 1);
6237 NYI_assert (15, 10, 0x37);
6244 for (i = 0; i < 2; i++)
6245 aarch64_set_vec_double (cpu, vd, i,
6246 aarch64_get_vec_double (cpu, vn, i)
6247 * aarch64_get_vec_double (cpu, vm, i));
6250 for (i = 0; i < (full ? 4 : 2); i++)
6251 aarch64_set_vec_float (cpu, vd, i,
6252 aarch64_get_vec_float (cpu, vn, i)
6253 * aarch64_get_vec_float (cpu, vm, i));
6257 do_vec_FADDP (sim_cpu *cpu)
6260 instr [30] = half(0)/full(1)
6261 instr [29,23] = 10 1110 0
6262 instr [22] = float(0)/double(1)
6265 instr [15,10] = 1101 01
6267 instr [4, 0] = Vd. */
6269 unsigned full = INSTR (30, 30);
6270 unsigned vm = INSTR (20, 16);
6271 unsigned vn = INSTR (9, 5);
6272 unsigned vd = INSTR (4, 0);
6274 NYI_assert (29, 23, 0x5C);
6275 NYI_assert (21, 21, 1);
6276 NYI_assert (15, 10, 0x35);
6280 /* Extract values before adding them incase vd == vn/vm. */
6281 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6282 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6283 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6284 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6289 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6290 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6294 /* Extract values before adding them incase vd == vn/vm. */
6295 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6296 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6297 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6298 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6302 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6303 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6304 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6305 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6307 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6308 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6309 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6310 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6314 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6315 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6321 do_vec_FSQRT (sim_cpu *cpu)
6324 instr[30] = half(0)/full(1)
6325 instr[29,23] = 10 1110 1
6326 instr[22] = single(0)/double(1)
6327 instr[21,10] = 10 0001 1111 10
6329 instr[4,0] = Vdest. */
6331 unsigned vn = INSTR (9, 5);
6332 unsigned vd = INSTR (4, 0);
6333 unsigned full = INSTR (30, 30);
6336 NYI_assert (29, 23, 0x5D);
6337 NYI_assert (21, 10, 0x87E);
6344 for (i = 0; i < 2; i++)
6345 aarch64_set_vec_double (cpu, vd, i,
6346 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6350 for (i = 0; i < (full ? 4 : 2); i++)
6351 aarch64_set_vec_float (cpu, vd, i,
6352 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6357 do_vec_FNEG (sim_cpu *cpu)
6360 instr[30] = half (0)/full (1)
6361 instr[29,23] = 10 1110 1
6362 instr[22] = single (0)/double (1)
6363 instr[21,10] = 10 0000 1111 10
6365 instr[4,0] = Vdest. */
6367 unsigned vn = INSTR (9, 5);
6368 unsigned vd = INSTR (4, 0);
6369 unsigned full = INSTR (30, 30);
6372 NYI_assert (29, 23, 0x5D);
6373 NYI_assert (21, 10, 0x83E);
6380 for (i = 0; i < 2; i++)
6381 aarch64_set_vec_double (cpu, vd, i,
6382 - aarch64_get_vec_double (cpu, vn, i));
6386 for (i = 0; i < (full ? 4 : 2); i++)
6387 aarch64_set_vec_float (cpu, vd, i,
6388 - aarch64_get_vec_float (cpu, vn, i));
6393 do_vec_NOT (sim_cpu *cpu)
6396 instr[30] = half (0)/full (1)
6397 instr[29,10] = 10 1110 0010 0000 0101 10
6401 unsigned vn = INSTR (9, 5);
6402 unsigned vd = INSTR (4, 0);
6404 int full = INSTR (30, 30);
6406 NYI_assert (29, 10, 0xB8816);
6408 for (i = 0; i < (full ? 16 : 8); i++)
6409 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6413 clz (uint64_t val, unsigned size)
6418 mask <<= (size - 1);
6433 do_vec_CLZ (sim_cpu *cpu)
6436 instr[30] = half (0)/full (1)
6437 instr[29,24] = 10 1110
6439 instr[21,10] = 10 0000 0100 10
6443 unsigned vn = INSTR (9, 5);
6444 unsigned vd = INSTR (4, 0);
6446 int full = INSTR (30,30);
6448 NYI_assert (29, 24, 0x2E);
6449 NYI_assert (21, 10, 0x812);
6451 switch (INSTR (23, 22))
6454 for (i = 0; i < (full ? 16 : 8); i++)
6455 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6458 for (i = 0; i < (full ? 8 : 4); i++)
6459 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6462 for (i = 0; i < (full ? 4 : 2); i++)
6463 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6468 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6469 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6475 do_vec_MOV_element (sim_cpu *cpu)
6477 /* instr[31,21] = 0110 1110 000
6478 instr[20,16] = size & dest index
6480 instr[14,11] = source index
6485 unsigned vs = INSTR (9, 5);
6486 unsigned vd = INSTR (4, 0);
6490 NYI_assert (31, 21, 0x370);
6491 NYI_assert (15, 15, 0);
6492 NYI_assert (10, 10, 1);
6497 src_index = INSTR (14, 11);
6498 dst_index = INSTR (20, 17);
6499 aarch64_set_vec_u8 (cpu, vd, dst_index,
6500 aarch64_get_vec_u8 (cpu, vs, src_index));
6502 else if (INSTR (17, 17))
6505 NYI_assert (11, 11, 0);
6506 src_index = INSTR (14, 12);
6507 dst_index = INSTR (20, 18);
6508 aarch64_set_vec_u16 (cpu, vd, dst_index,
6509 aarch64_get_vec_u16 (cpu, vs, src_index));
6511 else if (INSTR (18, 18))
6514 NYI_assert (12, 11, 0);
6515 src_index = INSTR (14, 13);
6516 dst_index = INSTR (20, 19);
6517 aarch64_set_vec_u32 (cpu, vd, dst_index,
6518 aarch64_get_vec_u32 (cpu, vs, src_index));
6522 NYI_assert (19, 19, 1);
6523 NYI_assert (13, 11, 0);
6524 src_index = INSTR (14, 14);
6525 dst_index = INSTR (20, 20);
6526 aarch64_set_vec_u64 (cpu, vd, dst_index,
6527 aarch64_get_vec_u64 (cpu, vs, src_index));
6532 do_vec_REV32 (sim_cpu *cpu)
6535 instr[30] = full/half
6536 instr[29,24] = 10 1110
6538 instr[21,10] = 10 0000 0000 10
6542 unsigned rn = INSTR (9, 5);
6543 unsigned rd = INSTR (4, 0);
6544 unsigned size = INSTR (23, 22);
6545 unsigned full = INSTR (30, 30);
6549 NYI_assert (29, 24, 0x2E);
6550 NYI_assert (21, 10, 0x802);
6555 for (i = 0; i < (full ? 16 : 8); i++)
6556 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6560 for (i = 0; i < (full ? 8 : 4); i++)
6561 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6568 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6570 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6574 do_vec_EXT (sim_cpu *cpu)
6577 instr[30] = full/half
6578 instr[29,21] = 10 1110 000
6581 instr[14,11] = source index
6586 unsigned vm = INSTR (20, 16);
6587 unsigned vn = INSTR (9, 5);
6588 unsigned vd = INSTR (4, 0);
6589 unsigned src_index = INSTR (14, 11);
6590 unsigned full = INSTR (30, 30);
6595 NYI_assert (31, 21, 0x370);
6596 NYI_assert (15, 15, 0);
6597 NYI_assert (10, 10, 0);
6599 if (!full && (src_index & 0x8))
6604 for (i = src_index; i < (full ? 16 : 8); i++)
6605 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6606 for (i = 0; i < src_index; i++)
6607 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6609 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6611 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6615 dexAdvSIMD0 (sim_cpu *cpu)
6617 /* instr [28,25] = 0 111. */
6618 if ( INSTR (15, 10) == 0x07
6622 if (INSTR (31, 21) == 0x075
6623 || INSTR (31, 21) == 0x275)
6625 do_vec_MOV_whole_vector (cpu);
6630 if (INSTR (29, 19) == 0x1E0)
6632 do_vec_MOV_immediate (cpu);
6636 if (INSTR (29, 19) == 0x5E0)
6642 if (INSTR (29, 19) == 0x1C0
6643 || INSTR (29, 19) == 0x1C1)
6645 if (INSTR (15, 10) == 0x03)
6647 do_vec_DUP_scalar_into_vector (cpu);
6652 switch (INSTR (29, 24))
6654 case 0x0E: do_vec_op1 (cpu); return;
6655 case 0x0F: do_vec_op2 (cpu); return;
6658 if (INSTR (21, 21) == 1)
6660 switch (INSTR (15, 10))
6667 switch (INSTR (23, 22))
6669 case 0: do_vec_EOR (cpu); return;
6670 case 1: do_vec_BSL (cpu); return;
6672 case 3: do_vec_bit (cpu); return;
6676 case 0x08: do_vec_sub_long (cpu); return;
6677 case 0x11: do_vec_USHL (cpu); return;
6678 case 0x12: do_vec_CLZ (cpu); return;
6679 case 0x16: do_vec_NOT (cpu); return;
6680 case 0x19: do_vec_max (cpu); return;
6681 case 0x1B: do_vec_min (cpu); return;
6682 case 0x21: do_vec_SUB (cpu); return;
6683 case 0x25: do_vec_MLS (cpu); return;
6684 case 0x31: do_vec_FminmaxNMP (cpu); return;
6685 case 0x35: do_vec_FADDP (cpu); return;
6686 case 0x37: do_vec_FMUL (cpu); return;
6687 case 0x3F: do_vec_FDIV (cpu); return;
6690 switch (INSTR (20, 16))
6692 case 0x00: do_vec_FNEG (cpu); return;
6693 case 0x01: do_vec_FSQRT (cpu); return;
6707 do_vec_compare (cpu); return;
6714 if (INSTR (31, 21) == 0x370)
6717 do_vec_MOV_element (cpu);
6723 switch (INSTR (21, 10))
6725 case 0x82E: do_vec_neg (cpu); return;
6726 case 0x87E: do_vec_sqrt (cpu); return;
6728 if (INSTR (15, 10) == 0x30)
6738 switch (INSTR (15, 10))
6740 case 0x01: do_vec_SSHR_USHR (cpu); return;
6742 case 0x12: do_vec_mls_indexed (cpu); return;
6743 case 0x29: do_vec_xtl (cpu); return;
6757 /* Float multiply add. */
6759 fmadds (sim_cpu *cpu)
6761 unsigned sa = INSTR (14, 10);
6762 unsigned sm = INSTR (20, 16);
6763 unsigned sn = INSTR ( 9, 5);
6764 unsigned sd = INSTR ( 4, 0);
6766 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6767 + aarch64_get_FP_float (cpu, sn)
6768 * aarch64_get_FP_float (cpu, sm));
6771 /* Double multiply add. */
6773 fmaddd (sim_cpu *cpu)
6775 unsigned sa = INSTR (14, 10);
6776 unsigned sm = INSTR (20, 16);
6777 unsigned sn = INSTR ( 9, 5);
6778 unsigned sd = INSTR ( 4, 0);
6780 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6781 + aarch64_get_FP_double (cpu, sn)
6782 * aarch64_get_FP_double (cpu, sm));
6785 /* Float multiply subtract. */
6787 fmsubs (sim_cpu *cpu)
6789 unsigned sa = INSTR (14, 10);
6790 unsigned sm = INSTR (20, 16);
6791 unsigned sn = INSTR ( 9, 5);
6792 unsigned sd = INSTR ( 4, 0);
6794 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6795 - aarch64_get_FP_float (cpu, sn)
6796 * aarch64_get_FP_float (cpu, sm));
6799 /* Double multiply subtract. */
6801 fmsubd (sim_cpu *cpu)
6803 unsigned sa = INSTR (14, 10);
6804 unsigned sm = INSTR (20, 16);
6805 unsigned sn = INSTR ( 9, 5);
6806 unsigned sd = INSTR ( 4, 0);
6808 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6809 - aarch64_get_FP_double (cpu, sn)
6810 * aarch64_get_FP_double (cpu, sm));
6813 /* Float negative multiply add. */
6815 fnmadds (sim_cpu *cpu)
6817 unsigned sa = INSTR (14, 10);
6818 unsigned sm = INSTR (20, 16);
6819 unsigned sn = INSTR ( 9, 5);
6820 unsigned sd = INSTR ( 4, 0);
6822 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6823 + (- aarch64_get_FP_float (cpu, sn))
6824 * aarch64_get_FP_float (cpu, sm));
6827 /* Double negative multiply add. */
6829 fnmaddd (sim_cpu *cpu)
6831 unsigned sa = INSTR (14, 10);
6832 unsigned sm = INSTR (20, 16);
6833 unsigned sn = INSTR ( 9, 5);
6834 unsigned sd = INSTR ( 4, 0);
6836 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6837 + (- aarch64_get_FP_double (cpu, sn))
6838 * aarch64_get_FP_double (cpu, sm));
6841 /* Float negative multiply subtract. */
6843 fnmsubs (sim_cpu *cpu)
6845 unsigned sa = INSTR (14, 10);
6846 unsigned sm = INSTR (20, 16);
6847 unsigned sn = INSTR ( 9, 5);
6848 unsigned sd = INSTR ( 4, 0);
6850 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6851 + aarch64_get_FP_float (cpu, sn)
6852 * aarch64_get_FP_float (cpu, sm));
6855 /* Double negative multiply subtract. */
6857 fnmsubd (sim_cpu *cpu)
6859 unsigned sa = INSTR (14, 10);
6860 unsigned sm = INSTR (20, 16);
6861 unsigned sn = INSTR ( 9, 5);
6862 unsigned sd = INSTR ( 4, 0);
6864 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6865 + aarch64_get_FP_double (cpu, sn)
6866 * aarch64_get_FP_double (cpu, sm));
6870 dexSimpleFPDataProc3Source (sim_cpu *cpu)
6872 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
6874 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
6877 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6878 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
6879 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
6881 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
6882 /* dispatch on combined type:o1:o2. */
6883 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
6890 case 0: fmadds (cpu); return;
6891 case 1: fmsubs (cpu); return;
6892 case 2: fnmadds (cpu); return;
6893 case 3: fnmsubs (cpu); return;
6894 case 4: fmaddd (cpu); return;
6895 case 5: fmsubd (cpu); return;
6896 case 6: fnmaddd (cpu); return;
6897 case 7: fnmsubd (cpu); return;
6899 /* type > 1 is currently unallocated. */
6905 dexSimpleFPFixedConvert (sim_cpu *cpu)
6911 dexSimpleFPCondCompare (sim_cpu *cpu)
6913 /* instr [31,23] = 0001 1110 0
6917 instr [15,12] = condition
6921 instr [3,0] = nzcv */
6923 unsigned rm = INSTR (20, 16);
6924 unsigned rn = INSTR (9, 5);
6926 NYI_assert (31, 23, 0x3C);
6927 NYI_assert (11, 10, 0x1);
6928 NYI_assert (4, 4, 0);
6930 if (! testConditionCode (cpu, INSTR (15, 12)))
6932 aarch64_set_CPSR (cpu, INSTR (3, 0));
6938 /* Double precision. */
6939 double val1 = aarch64_get_vec_double (cpu, rn, 0);
6940 double val2 = aarch64_get_vec_double (cpu, rm, 0);
6942 /* FIXME: Check for NaNs. */
6944 aarch64_set_CPSR (cpu, (Z | C));
6945 else if (val1 < val2)
6946 aarch64_set_CPSR (cpu, N);
6947 else /* val1 > val2 */
6948 aarch64_set_CPSR (cpu, C);
6952 /* Single precision. */
6953 float val1 = aarch64_get_vec_float (cpu, rn, 0);
6954 float val2 = aarch64_get_vec_float (cpu, rm, 0);
6956 /* FIXME: Check for NaNs. */
6958 aarch64_set_CPSR (cpu, (Z | C));
6959 else if (val1 < val2)
6960 aarch64_set_CPSR (cpu, N);
6961 else /* val1 > val2 */
6962 aarch64_set_CPSR (cpu, C);
6970 fadds (sim_cpu *cpu)
6972 unsigned sm = INSTR (20, 16);
6973 unsigned sn = INSTR ( 9, 5);
6974 unsigned sd = INSTR ( 4, 0);
6976 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6977 + aarch64_get_FP_float (cpu, sm));
6982 faddd (sim_cpu *cpu)
6984 unsigned sm = INSTR (20, 16);
6985 unsigned sn = INSTR ( 9, 5);
6986 unsigned sd = INSTR ( 4, 0);
6988 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6989 + aarch64_get_FP_double (cpu, sm));
6994 fdivs (sim_cpu *cpu)
6996 unsigned sm = INSTR (20, 16);
6997 unsigned sn = INSTR ( 9, 5);
6998 unsigned sd = INSTR ( 4, 0);
7000 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7001 / aarch64_get_FP_float (cpu, sm));
7004 /* Double divide. */
7006 fdivd (sim_cpu *cpu)
7008 unsigned sm = INSTR (20, 16);
7009 unsigned sn = INSTR ( 9, 5);
7010 unsigned sd = INSTR ( 4, 0);
7012 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7013 / aarch64_get_FP_double (cpu, sm));
7016 /* Float multiply. */
7018 fmuls (sim_cpu *cpu)
7020 unsigned sm = INSTR (20, 16);
7021 unsigned sn = INSTR ( 9, 5);
7022 unsigned sd = INSTR ( 4, 0);
7024 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7025 * aarch64_get_FP_float (cpu, sm));
7028 /* Double multiply. */
7030 fmuld (sim_cpu *cpu)
7032 unsigned sm = INSTR (20, 16);
7033 unsigned sn = INSTR ( 9, 5);
7034 unsigned sd = INSTR ( 4, 0);
7036 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7037 * aarch64_get_FP_double (cpu, sm));
7040 /* Float negate and multiply. */
7042 fnmuls (sim_cpu *cpu)
7044 unsigned sm = INSTR (20, 16);
7045 unsigned sn = INSTR ( 9, 5);
7046 unsigned sd = INSTR ( 4, 0);
7048 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7049 * aarch64_get_FP_float (cpu, sm)));
7052 /* Double negate and multiply. */
7054 fnmuld (sim_cpu *cpu)
7056 unsigned sm = INSTR (20, 16);
7057 unsigned sn = INSTR ( 9, 5);
7058 unsigned sd = INSTR ( 4, 0);
7060 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7061 * aarch64_get_FP_double (cpu, sm)));
7064 /* Float subtract. */
7066 fsubs (sim_cpu *cpu)
7068 unsigned sm = INSTR (20, 16);
7069 unsigned sn = INSTR ( 9, 5);
7070 unsigned sd = INSTR ( 4, 0);
7072 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7073 - aarch64_get_FP_float (cpu, sm));
7076 /* Double subtract. */
7078 fsubd (sim_cpu *cpu)
7080 unsigned sm = INSTR (20, 16);
7081 unsigned sn = INSTR ( 9, 5);
7082 unsigned sd = INSTR ( 4, 0);
7084 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7085 - aarch64_get_FP_double (cpu, sm));
7089 do_FMINNM (sim_cpu *cpu)
7091 /* instr[31,23] = 0 0011 1100
7092 instr[22] = float(0)/double(1)
7095 instr[15,10] = 01 1110
7099 unsigned sm = INSTR (20, 16);
7100 unsigned sn = INSTR ( 9, 5);
7101 unsigned sd = INSTR ( 4, 0);
7103 NYI_assert (31, 23, 0x03C);
7104 NYI_assert (15, 10, 0x1E);
7107 aarch64_set_FP_double (cpu, sd,
7108 dminnm (aarch64_get_FP_double (cpu, sn),
7109 aarch64_get_FP_double (cpu, sm)));
7111 aarch64_set_FP_float (cpu, sd,
7112 fminnm (aarch64_get_FP_float (cpu, sn),
7113 aarch64_get_FP_float (cpu, sm)));
7117 do_FMAXNM (sim_cpu *cpu)
7119 /* instr[31,23] = 0 0011 1100
7120 instr[22] = float(0)/double(1)
7123 instr[15,10] = 01 1010
7127 unsigned sm = INSTR (20, 16);
7128 unsigned sn = INSTR ( 9, 5);
7129 unsigned sd = INSTR ( 4, 0);
7131 NYI_assert (31, 23, 0x03C);
7132 NYI_assert (15, 10, 0x1A);
7135 aarch64_set_FP_double (cpu, sd,
7136 dmaxnm (aarch64_get_FP_double (cpu, sn),
7137 aarch64_get_FP_double (cpu, sm)));
7139 aarch64_set_FP_float (cpu, sd,
7140 fmaxnm (aarch64_get_FP_float (cpu, sn),
7141 aarch64_get_FP_float (cpu, sm)));
7145 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7147 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7149 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7152 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7155 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7156 0010 ==> FADD, 0011 ==> FSUB,
7157 0100 ==> FMAX, 0101 ==> FMIN
7158 0110 ==> FMAXNM, 0111 ==> FMINNM
7159 1000 ==> FNMUL, ow ==> UNALLOC
7164 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7165 uint32_t type = INSTR (23, 22);
7166 /* Dispatch on opcode. */
7167 uint32_t dispatch = INSTR (15, 12);
7178 case 0: fmuld (cpu); return;
7179 case 1: fdivd (cpu); return;
7180 case 2: faddd (cpu); return;
7181 case 3: fsubd (cpu); return;
7182 case 6: do_FMAXNM (cpu); return;
7183 case 7: do_FMINNM (cpu); return;
7184 case 8: fnmuld (cpu); return;
7186 /* Have not yet implemented fmax and fmin. */
7194 else /* type == 0 => floats. */
7197 case 0: fmuls (cpu); return;
7198 case 1: fdivs (cpu); return;
7199 case 2: fadds (cpu); return;
7200 case 3: fsubs (cpu); return;
7201 case 6: do_FMAXNM (cpu); return;
7202 case 7: do_FMINNM (cpu); return;
7203 case 8: fnmuls (cpu); return;
7215 dexSimpleFPCondSelect (sim_cpu *cpu)
7218 instr[31,23] = 0 0011 1100
7219 instr[22] = 0=>single 1=>double
7226 unsigned sm = INSTR (20, 16);
7227 unsigned sn = INSTR ( 9, 5);
7228 unsigned sd = INSTR ( 4, 0);
7229 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7231 NYI_assert (31, 23, 0x03C);
7232 NYI_assert (11, 10, 0x3);
7235 aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7237 aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7240 /* Store 32 bit unscaled signed 9 bit. */
7242 fsturs (sim_cpu *cpu, int32_t offset)
7244 unsigned int rn = INSTR (9, 5);
7245 unsigned int st = INSTR (4, 0);
7247 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7248 aarch64_get_vec_u32 (cpu, rn, 0));
7251 /* Store 64 bit unscaled signed 9 bit. */
7253 fsturd (sim_cpu *cpu, int32_t offset)
7255 unsigned int rn = INSTR (9, 5);
7256 unsigned int st = INSTR (4, 0);
7258 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7259 aarch64_get_vec_u64 (cpu, rn, 0));
7262 /* Store 128 bit unscaled signed 9 bit. */
7264 fsturq (sim_cpu *cpu, int32_t offset)
7266 unsigned int rn = INSTR (9, 5);
7267 unsigned int st = INSTR (4, 0);
7270 aarch64_get_FP_long_double (cpu, rn, & a);
7271 aarch64_set_mem_long_double (cpu,
7272 aarch64_get_reg_u64 (cpu, st, 1)
7276 /* TODO FP move register. */
7278 /* 32 bit fp to fp move register. */
7280 ffmovs (sim_cpu *cpu)
7282 unsigned int rn = INSTR (9, 5);
7283 unsigned int st = INSTR (4, 0);
7285 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7288 /* 64 bit fp to fp move register. */
7290 ffmovd (sim_cpu *cpu)
7292 unsigned int rn = INSTR (9, 5);
7293 unsigned int st = INSTR (4, 0);
7295 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7298 /* 32 bit GReg to Vec move register. */
7300 fgmovs (sim_cpu *cpu)
7302 unsigned int rn = INSTR (9, 5);
7303 unsigned int st = INSTR (4, 0);
7305 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7308 /* 64 bit g to fp move register. */
7310 fgmovd (sim_cpu *cpu)
7312 unsigned int rn = INSTR (9, 5);
7313 unsigned int st = INSTR (4, 0);
7315 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7318 /* 32 bit fp to g move register. */
7320 gfmovs (sim_cpu *cpu)
7322 unsigned int rn = INSTR (9, 5);
7323 unsigned int st = INSTR (4, 0);
7325 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7328 /* 64 bit fp to g move register. */
7330 gfmovd (sim_cpu *cpu)
7332 unsigned int rn = INSTR (9, 5);
7333 unsigned int st = INSTR (4, 0);
7335 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7338 /* FP move immediate
7340 These install an immediate 8 bit value in the target register
7341 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7345 fmovs (sim_cpu *cpu)
7347 unsigned int sd = INSTR (4, 0);
7348 uint32_t imm = INSTR (20, 13);
7349 float f = fp_immediate_for_encoding_32 (imm);
7351 aarch64_set_FP_float (cpu, sd, f);
7355 fmovd (sim_cpu *cpu)
7357 unsigned int sd = INSTR (4, 0);
7358 uint32_t imm = INSTR (20, 13);
7359 double d = fp_immediate_for_encoding_64 (imm);
7361 aarch64_set_FP_double (cpu, sd, d);
7365 dexSimpleFPImmediate (sim_cpu *cpu)
7367 /* instr[31,23] == 00111100
7368 instr[22] == type : single(0)/double(1)
7370 instr[20,13] == imm8
7372 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7374 uint32_t imm5 = INSTR (9, 5);
7376 NYI_assert (31, 23, 0x3C);
7387 /* TODO specific decode and execute for group Load Store. */
7389 /* TODO FP load/store single register (unscaled offset). */
7391 /* TODO load 8 bit unscaled signed 9 bit. */
7392 /* TODO load 16 bit unscaled signed 9 bit. */
7394 /* Load 32 bit unscaled signed 9 bit. */
7396 fldurs (sim_cpu *cpu, int32_t offset)
7398 unsigned int rn = INSTR (9, 5);
7399 unsigned int st = INSTR (4, 0);
7401 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7402 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7405 /* Load 64 bit unscaled signed 9 bit. */
7407 fldurd (sim_cpu *cpu, int32_t offset)
7409 unsigned int rn = INSTR (9, 5);
7410 unsigned int st = INSTR (4, 0);
7412 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7413 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7416 /* Load 128 bit unscaled signed 9 bit. */
7418 fldurq (sim_cpu *cpu, int32_t offset)
7420 unsigned int rn = INSTR (9, 5);
7421 unsigned int st = INSTR (4, 0);
7423 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7425 aarch64_get_mem_long_double (cpu, addr, & a);
7426 aarch64_set_FP_long_double (cpu, st, a);
7429 /* TODO store 8 bit unscaled signed 9 bit. */
7430 /* TODO store 16 bit unscaled signed 9 bit. */
7435 /* Float absolute value. */
7437 fabss (sim_cpu *cpu)
7439 unsigned sn = INSTR (9, 5);
7440 unsigned sd = INSTR (4, 0);
7441 float value = aarch64_get_FP_float (cpu, sn);
7443 aarch64_set_FP_float (cpu, sd, fabsf (value));
7446 /* Double absolute value. */
7448 fabcpu (sim_cpu *cpu)
7450 unsigned sn = INSTR (9, 5);
7451 unsigned sd = INSTR (4, 0);
7452 double value = aarch64_get_FP_double (cpu, sn);
7454 aarch64_set_FP_double (cpu, sd, fabs (value));
7457 /* Float negative value. */
7459 fnegs (sim_cpu *cpu)
7461 unsigned sn = INSTR (9, 5);
7462 unsigned sd = INSTR (4, 0);
7464 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7467 /* Double negative value. */
7469 fnegd (sim_cpu *cpu)
7471 unsigned sn = INSTR (9, 5);
7472 unsigned sd = INSTR (4, 0);
7474 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7477 /* Float square root. */
7479 fsqrts (sim_cpu *cpu)
7481 unsigned sn = INSTR (9, 5);
7482 unsigned sd = INSTR (4, 0);
7484 aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn)));
7487 /* Double square root. */
7489 fsqrtd (sim_cpu *cpu)
7491 unsigned sn = INSTR (9, 5);
7492 unsigned sd = INSTR (4, 0);
7494 aarch64_set_FP_double (cpu, sd,
7495 sqrt (aarch64_get_FP_double (cpu, sn)));
7498 /* Convert double to float. */
7500 fcvtds (sim_cpu *cpu)
7502 unsigned sn = INSTR (9, 5);
7503 unsigned sd = INSTR (4, 0);
7505 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7508 /* Convert float to double. */
7510 fcvtcpu (sim_cpu *cpu)
7512 unsigned sn = INSTR (9, 5);
7513 unsigned sd = INSTR (4, 0);
7515 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7519 do_FRINT (sim_cpu *cpu)
7521 /* instr[31,23] = 0001 1110 0
7522 instr[22] = single(0)/double(1)
7524 instr[17,15] = rounding mode
7525 instr[14,10] = 10000
7527 instr[4,0] = dest */
7530 unsigned rs = INSTR (9, 5);
7531 unsigned rd = INSTR (4, 0);
7532 unsigned int rmode = INSTR (17, 15);
7534 NYI_assert (31, 23, 0x03C);
7535 NYI_assert (21, 18, 0x9);
7536 NYI_assert (14, 10, 0x10);
7538 if (rmode == 6 || rmode == 7)
7539 /* FIXME: Add support for rmode == 6 exactness check. */
7540 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7544 double val = aarch64_get_FP_double (cpu, rs);
7548 case 0: /* mode N: nearest or even. */
7550 double rval = round (val);
7552 if (val - rval == 0.5)
7554 if (((rval / 2.0) * 2.0) != rval)
7558 aarch64_set_FP_double (cpu, rd, round (val));
7562 case 1: /* mode P: towards +inf. */
7564 aarch64_set_FP_double (cpu, rd, trunc (val));
7566 aarch64_set_FP_double (cpu, rd, round (val));
7569 case 2: /* mode M: towards -inf. */
7571 aarch64_set_FP_double (cpu, rd, round (val));
7573 aarch64_set_FP_double (cpu, rd, trunc (val));
7576 case 3: /* mode Z: towards 0. */
7577 aarch64_set_FP_double (cpu, rd, trunc (val));
7580 case 4: /* mode A: away from 0. */
7581 aarch64_set_FP_double (cpu, rd, round (val));
7584 case 6: /* mode X: use FPCR with exactness check. */
7585 case 7: /* mode I: use FPCR mode. */
7593 val = aarch64_get_FP_float (cpu, rs);
7597 case 0: /* mode N: nearest or even. */
7599 float rval = roundf (val);
7601 if (val - rval == 0.5)
7603 if (((rval / 2.0) * 2.0) != rval)
7607 aarch64_set_FP_float (cpu, rd, rval);
7611 case 1: /* mode P: towards +inf. */
7613 aarch64_set_FP_float (cpu, rd, truncf (val));
7615 aarch64_set_FP_float (cpu, rd, roundf (val));
7618 case 2: /* mode M: towards -inf. */
7620 aarch64_set_FP_float (cpu, rd, truncf (val));
7622 aarch64_set_FP_float (cpu, rd, roundf (val));
7625 case 3: /* mode Z: towards 0. */
7626 aarch64_set_FP_float (cpu, rd, truncf (val));
7629 case 4: /* mode A: away from 0. */
7630 aarch64_set_FP_float (cpu, rd, roundf (val));
7633 case 6: /* mode X: use FPCR with exactness check. */
7634 case 7: /* mode I: use FPCR mode. */
7642 /* Convert half to float. */
7644 do_FCVT_half_to_single (sim_cpu *cpu)
7646 unsigned rn = INSTR (9, 5);
7647 unsigned rd = INSTR (4, 0);
7649 NYI_assert (31, 10, 0x7B890);
7651 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7654 /* Convert half to double. */
7656 do_FCVT_half_to_double (sim_cpu *cpu)
7658 unsigned rn = INSTR (9, 5);
7659 unsigned rd = INSTR (4, 0);
7661 NYI_assert (31, 10, 0x7B8B0);
7663 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7667 do_FCVT_single_to_half (sim_cpu *cpu)
7669 unsigned rn = INSTR (9, 5);
7670 unsigned rd = INSTR (4, 0);
7672 NYI_assert (31, 10, 0x788F0);
7674 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7677 /* Convert double to half. */
7679 do_FCVT_double_to_half (sim_cpu *cpu)
7681 unsigned rn = INSTR (9, 5);
7682 unsigned rd = INSTR (4, 0);
7684 NYI_assert (31, 10, 0x798F0);
7686 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7690 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7692 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7694 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7697 instr[23,22] ==> type : 00 ==> source is single,
7698 01 ==> source is double
7700 11 ==> UNALLOC or source is half
7702 instr[20,15] ==> opcode : with type 00 or 01
7703 000000 ==> FMOV, 000001 ==> FABS,
7704 000010 ==> FNEG, 000011 ==> FSQRT,
7705 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7706 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7707 001000 ==> FRINTN, 001001 ==> FRINTP,
7708 001010 ==> FRINTM, 001011 ==> FRINTZ,
7709 001100 ==> FRINTA, 001101 ==> UNALLOC
7710 001110 ==> FRINTX, 001111 ==> FRINTI
7712 000100 ==> FCVT (half-to-single)
7713 000101 ==> FCVT (half-to-double)
7714 instr[14,10] = 10000. */
7716 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7717 uint32_t type = INSTR (23, 22);
7718 uint32_t opcode = INSTR (20, 15);
7726 do_FCVT_half_to_single (cpu);
7727 else if (opcode == 5)
7728 do_FCVT_half_to_double (cpu);
7780 case 8: /* FRINTN etc. */
7792 do_FCVT_double_to_half (cpu);
7794 do_FCVT_single_to_half (cpu);
7805 /* 32 bit signed int to float. */
7807 scvtf32 (sim_cpu *cpu)
7809 unsigned rn = INSTR (9, 5);
7810 unsigned sd = INSTR (4, 0);
7812 aarch64_set_FP_float
7813 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7816 /* signed int to float. */
7818 scvtf (sim_cpu *cpu)
7820 unsigned rn = INSTR (9, 5);
7821 unsigned sd = INSTR (4, 0);
7823 aarch64_set_FP_float
7824 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7827 /* 32 bit signed int to double. */
7829 scvtd32 (sim_cpu *cpu)
7831 unsigned rn = INSTR (9, 5);
7832 unsigned sd = INSTR (4, 0);
7834 aarch64_set_FP_double
7835 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7838 /* signed int to double. */
7840 scvtd (sim_cpu *cpu)
7842 unsigned rn = INSTR (9, 5);
7843 unsigned sd = INSTR (4, 0);
7845 aarch64_set_FP_double
7846 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7849 static const float FLOAT_INT_MAX = (float) INT_MAX;
7850 static const float FLOAT_INT_MIN = (float) INT_MIN;
7851 static const double DOUBLE_INT_MAX = (double) INT_MAX;
7852 static const double DOUBLE_INT_MIN = (double) INT_MIN;
7853 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
7854 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
7855 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
7856 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
7858 /* Check for FP exception conditions:
7861 Out of Range raises IO and IX and saturates value
7862 Denormal raises ID and IX and sets to zero. */
7863 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
7866 switch (fpclassify (F)) \
7870 aarch64_set_FPSR (cpu, IO); \
7872 VALUE = ITYPE##_MAX; \
7874 VALUE = ITYPE##_MIN; \
7878 if (F >= FTYPE##_##ITYPE##_MAX) \
7880 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
7881 VALUE = ITYPE##_MAX; \
7883 else if (F <= FTYPE##_##ITYPE##_MIN) \
7885 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
7886 VALUE = ITYPE##_MIN; \
7890 case FP_SUBNORMAL: \
7891 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
7903 /* 32 bit convert float to signed int truncate towards zero. */
7905 fcvtszs32 (sim_cpu *cpu)
7907 unsigned sn = INSTR (9, 5);
7908 unsigned rd = INSTR (4, 0);
7909 /* TODO : check that this rounds toward zero. */
7910 float f = aarch64_get_FP_float (cpu, sn);
7911 int32_t value = (int32_t) f;
7913 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7915 /* Avoid sign extension to 64 bit. */
7916 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7919 /* 64 bit convert float to signed int truncate towards zero. */
7921 fcvtszs (sim_cpu *cpu)
7923 unsigned sn = INSTR (9, 5);
7924 unsigned rd = INSTR (4, 0);
7925 float f = aarch64_get_FP_float (cpu, sn);
7926 int64_t value = (int64_t) f;
7928 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7930 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7933 /* 32 bit convert double to signed int truncate towards zero. */
7935 fcvtszd32 (sim_cpu *cpu)
7937 unsigned sn = INSTR (9, 5);
7938 unsigned rd = INSTR (4, 0);
7939 /* TODO : check that this rounds toward zero. */
7940 double d = aarch64_get_FP_double (cpu, sn);
7941 int32_t value = (int32_t) d;
7943 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7945 /* Avoid sign extension to 64 bit. */
7946 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7949 /* 64 bit convert double to signed int truncate towards zero. */
7951 fcvtszd (sim_cpu *cpu)
7953 unsigned sn = INSTR (9, 5);
7954 unsigned rd = INSTR (4, 0);
7955 /* TODO : check that this rounds toward zero. */
7956 double d = aarch64_get_FP_double (cpu, sn);
7959 value = (int64_t) d;
7961 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7963 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7967 do_fcvtzu (sim_cpu *cpu)
7969 /* instr[31] = size: 32-bit (0), 64-bit (1)
7970 instr[30,23] = 00111100
7971 instr[22] = type: single (0)/ double (1)
7972 instr[21] = enable (0)/disable(1) precision
7973 instr[20,16] = 11001
7974 instr[15,10] = precision
7978 unsigned rs = INSTR (9, 5);
7979 unsigned rd = INSTR (4, 0);
7981 NYI_assert (30, 23, 0x3C);
7982 NYI_assert (20, 16, 0x19);
7984 if (INSTR (21, 21) != 1)
7985 /* Convert to fixed point. */
7990 /* Convert to unsigned 64-bit integer. */
7993 double d = aarch64_get_FP_double (cpu, rs);
7994 uint64_t value = (uint64_t) d;
7996 /* Do not raise an exception if we have reached ULONG_MAX. */
7997 if (value != (1UL << 63))
7998 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8000 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8004 float f = aarch64_get_FP_float (cpu, rs);
8005 uint64_t value = (uint64_t) f;
8007 /* Do not raise an exception if we have reached ULONG_MAX. */
8008 if (value != (1UL << 63))
8009 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8011 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8018 /* Convert to unsigned 32-bit integer. */
8021 double d = aarch64_get_FP_double (cpu, rs);
8023 value = (uint32_t) d;
8024 /* Do not raise an exception if we have reached UINT_MAX. */
8025 if (value != (1UL << 31))
8026 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8030 float f = aarch64_get_FP_float (cpu, rs);
8032 value = (uint32_t) f;
8033 /* Do not raise an exception if we have reached UINT_MAX. */
8034 if (value != (1UL << 31))
8035 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8043 do_UCVTF (sim_cpu *cpu)
8045 /* instr[31] = size: 32-bit (0), 64-bit (1)
8046 instr[30,23] = 001 1110 0
8047 instr[22] = type: single (0)/ double (1)
8048 instr[21] = enable (0)/disable(1) precision
8049 instr[20,16] = 0 0011
8050 instr[15,10] = precision
8054 unsigned rs = INSTR (9, 5);
8055 unsigned rd = INSTR (4, 0);
8057 NYI_assert (30, 23, 0x3C);
8058 NYI_assert (20, 16, 0x03);
8060 if (INSTR (21, 21) != 1)
8063 /* FIXME: Add exception raising. */
8066 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8069 aarch64_set_FP_double (cpu, rd, (double) value);
8071 aarch64_set_FP_float (cpu, rd, (float) value);
8075 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8078 aarch64_set_FP_double (cpu, rd, (double) value);
8080 aarch64_set_FP_float (cpu, rd, (float) value);
8085 float_vector_move (sim_cpu *cpu)
8087 /* instr[31,17] == 100 1111 0101 0111
8088 instr[16] ==> direction 0=> to GR, 1=> from GR
8090 instr[9,5] ==> source
8091 instr[4,0] ==> dest. */
8093 unsigned rn = INSTR (9, 5);
8094 unsigned rd = INSTR (4, 0);
8096 NYI_assert (31, 17, 0x4F57);
8098 if (INSTR (15, 10) != 0)
8102 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8104 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8108 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8110 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8112 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8115 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8117 instr[20,19] = rmode
8118 instr[18,16] = opcode
8119 instr[15,10] = 10 0000 */
8121 uint32_t rmode_opcode;
8127 if (INSTR (31, 17) == 0x4F57)
8129 float_vector_move (cpu);
8133 size = INSTR (31, 31);
8138 type = INSTR (23, 22);
8142 rmode_opcode = INSTR (20, 16);
8143 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8145 switch (rmode_opcode)
8147 case 2: /* SCVTF. */
8150 case 0: scvtf32 (cpu); return;
8151 case 1: scvtd32 (cpu); return;
8152 case 2: scvtf (cpu); return;
8153 case 3: scvtd (cpu); return;
8156 case 6: /* FMOV GR, Vec. */
8159 case 0: gfmovs (cpu); return;
8160 case 3: gfmovd (cpu); return;
8161 default: HALT_UNALLOC;
8164 case 7: /* FMOV vec, GR. */
8167 case 0: fgmovs (cpu); return;
8168 case 3: fgmovd (cpu); return;
8169 default: HALT_UNALLOC;
8172 case 24: /* FCVTZS. */
8175 case 0: fcvtszs32 (cpu); return;
8176 case 1: fcvtszd32 (cpu); return;
8177 case 2: fcvtszs (cpu); return;
8178 case 3: fcvtszd (cpu); return;
8181 case 25: do_fcvtzu (cpu); return;
8182 case 3: do_UCVTF (cpu); return;
8184 case 0: /* FCVTNS. */
8185 case 1: /* FCVTNU. */
8186 case 4: /* FCVTAS. */
8187 case 5: /* FCVTAU. */
8188 case 8: /* FCVPTS. */
8189 case 9: /* FCVTPU. */
8190 case 16: /* FCVTMS. */
8191 case 17: /* FCVTMU. */
8198 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8202 if (isnan (fvalue1) || isnan (fvalue2))
8206 float result = fvalue1 - fvalue2;
8210 else if (result < 0)
8212 else /* (result > 0). */
8216 aarch64_set_CPSR (cpu, flags);
8220 fcmps (sim_cpu *cpu)
8222 unsigned sm = INSTR (20, 16);
8223 unsigned sn = INSTR ( 9, 5);
8225 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8226 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8228 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8231 /* Float compare to zero -- Invalid Operation exception
8232 only on signaling NaNs. */
8234 fcmpzs (sim_cpu *cpu)
8236 unsigned sn = INSTR ( 9, 5);
8237 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8239 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8242 /* Float compare -- Invalid Operation exception on all NaNs. */
8244 fcmpes (sim_cpu *cpu)
8246 unsigned sm = INSTR (20, 16);
8247 unsigned sn = INSTR ( 9, 5);
8249 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8250 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8252 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8255 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8257 fcmpzes (sim_cpu *cpu)
8259 unsigned sn = INSTR ( 9, 5);
8260 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8262 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8266 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8270 if (isnan (dval1) || isnan (dval2))
8274 double result = dval1 - dval2;
8278 else if (result < 0)
8280 else /* (result > 0). */
8284 aarch64_set_CPSR (cpu, flags);
8287 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8289 fcmpd (sim_cpu *cpu)
8291 unsigned sm = INSTR (20, 16);
8292 unsigned sn = INSTR ( 9, 5);
8294 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8295 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8297 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8300 /* Double compare to zero -- Invalid Operation exception
8301 only on signaling NaNs. */
8303 fcmpzd (sim_cpu *cpu)
8305 unsigned sn = INSTR ( 9, 5);
8306 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8308 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8311 /* Double compare -- Invalid Operation exception on all NaNs. */
8313 fcmped (sim_cpu *cpu)
8315 unsigned sm = INSTR (20, 16);
8316 unsigned sn = INSTR ( 9, 5);
8318 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8319 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8321 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8324 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8326 fcmpzed (sim_cpu *cpu)
8328 unsigned sn = INSTR ( 9, 5);
8329 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8331 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8335 dexSimpleFPCompare (sim_cpu *cpu)
8337 /* assert instr[28,25] == 1111
8338 instr[30:24:21:13,10] = 0011000
8339 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8340 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8341 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8342 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8343 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8344 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8347 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8348 uint32_t type = INSTR (23, 22);
8349 uint32_t op = INSTR (15, 14);
8350 uint32_t op2_2_0 = INSTR (2, 0);
8364 /* dispatch on type and top 2 bits of opcode. */
8365 dispatch = (type << 2) | INSTR (4, 3);
8369 case 0: fcmps (cpu); return;
8370 case 1: fcmpzs (cpu); return;
8371 case 2: fcmpes (cpu); return;
8372 case 3: fcmpzes (cpu); return;
8373 case 4: fcmpd (cpu); return;
8374 case 5: fcmpzd (cpu); return;
8375 case 6: fcmped (cpu); return;
8376 case 7: fcmpzed (cpu); return;
8381 do_scalar_FADDP (sim_cpu *cpu)
8383 /* instr [31,23] = 0111 1110 0
8384 instr [22] = single(0)/double(1)
8385 instr [21,10] = 11 0000 1101 10
8387 instr [4,0] = Fd. */
8389 unsigned Fn = INSTR (9, 5);
8390 unsigned Fd = INSTR (4, 0);
8392 NYI_assert (31, 23, 0x0FC);
8393 NYI_assert (21, 10, 0xC36);
8397 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8398 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8400 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8404 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8405 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8407 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8411 /* Floating point absolute difference. */
8414 do_scalar_FABD (sim_cpu *cpu)
8416 /* instr [31,23] = 0111 1110 1
8417 instr [22] = float(0)/double(1)
8420 instr [15,10] = 1101 01
8422 instr [4, 0] = Rd. */
8424 unsigned rm = INSTR (20, 16);
8425 unsigned rn = INSTR (9, 5);
8426 unsigned rd = INSTR (4, 0);
8428 NYI_assert (31, 23, 0x0FD);
8429 NYI_assert (21, 21, 1);
8430 NYI_assert (15, 10, 0x35);
8433 aarch64_set_FP_double (cpu, rd,
8434 fabs (aarch64_get_FP_double (cpu, rn)
8435 - aarch64_get_FP_double (cpu, rm)));
8437 aarch64_set_FP_float (cpu, rd,
8438 fabsf (aarch64_get_FP_float (cpu, rn)
8439 - aarch64_get_FP_float (cpu, rm)));
8443 do_scalar_CMGT (sim_cpu *cpu)
8445 /* instr [31,21] = 0101 1110 111
8447 instr [15,10] = 00 1101
8449 instr [4, 0] = Rd. */
8451 unsigned rm = INSTR (20, 16);
8452 unsigned rn = INSTR (9, 5);
8453 unsigned rd = INSTR (4, 0);
8455 NYI_assert (31, 21, 0x2F7);
8456 NYI_assert (15, 10, 0x0D);
8458 aarch64_set_vec_u64 (cpu, rd, 0,
8459 aarch64_get_vec_u64 (cpu, rn, 0) >
8460 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8464 do_scalar_USHR (sim_cpu *cpu)
8466 /* instr [31,23] = 0111 1111 0
8467 instr [22,16] = shift amount
8468 instr [15,10] = 0000 01
8470 instr [4, 0] = Rd. */
8472 unsigned amount = 128 - INSTR (22, 16);
8473 unsigned rn = INSTR (9, 5);
8474 unsigned rd = INSTR (4, 0);
8476 NYI_assert (31, 23, 0x0FE);
8477 NYI_assert (15, 10, 0x01);
8479 aarch64_set_vec_u64 (cpu, rd, 0,
8480 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8484 do_scalar_SSHL (sim_cpu *cpu)
8486 /* instr [31,21] = 0101 1110 111
8488 instr [15,10] = 0100 01
8490 instr [4, 0] = Rd. */
8492 unsigned rm = INSTR (20, 16);
8493 unsigned rn = INSTR (9, 5);
8494 unsigned rd = INSTR (4, 0);
8495 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8497 NYI_assert (31, 21, 0x2F7);
8498 NYI_assert (15, 10, 0x11);
8501 aarch64_set_vec_s64 (cpu, rd, 0,
8502 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8504 aarch64_set_vec_s64 (cpu, rd, 0,
8505 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8509 do_scalar_shift (sim_cpu *cpu)
8511 /* instr [31,23] = 0101 1111 0
8512 instr [22,16] = shift amount
8513 instr [15,10] = 0101 01 [SHL]
8514 instr [15,10] = 0000 01 [SSHR]
8516 instr [4, 0] = Rd. */
8518 unsigned rn = INSTR (9, 5);
8519 unsigned rd = INSTR (4, 0);
8522 NYI_assert (31, 23, 0x0BE);
8524 if (INSTR (22, 22) == 0)
8527 switch (INSTR (15, 10))
8529 case 0x01: /* SSHR */
8530 amount = 128 - INSTR (22, 16);
8531 aarch64_set_vec_s64 (cpu, rd, 0,
8532 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8534 case 0x15: /* SHL */
8535 amount = INSTR (22, 16) - 64;
8536 aarch64_set_vec_u64 (cpu, rd, 0,
8537 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8544 /* FCMEQ FCMGT FCMGE. */
8546 do_scalar_FCM (sim_cpu *cpu)
8548 /* instr [31,30] = 01
8550 instr [28,24] = 1 1110
8555 instr [15,12] = 1110
8559 instr [4, 0] = Rd. */
8561 unsigned rm = INSTR (20, 16);
8562 unsigned rn = INSTR (9, 5);
8563 unsigned rd = INSTR (4, 0);
8564 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8569 NYI_assert (31, 30, 1);
8570 NYI_assert (28, 24, 0x1E);
8571 NYI_assert (21, 21, 1);
8572 NYI_assert (15, 12, 0xE);
8573 NYI_assert (10, 10, 1);
8577 double val1 = aarch64_get_FP_double (cpu, rn);
8578 double val2 = aarch64_get_FP_double (cpu, rm);
8583 result = val1 == val2;
8591 result = val1 >= val2;
8599 result = val1 > val2;
8606 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8610 val1 = aarch64_get_FP_float (cpu, rn);
8611 val2 = aarch64_get_FP_float (cpu, rm);
8616 result = val1 == val2;
8620 val1 = fabsf (val1);
8621 val2 = fabsf (val2);
8624 result = val1 >= val2;
8628 val1 = fabsf (val1);
8629 val2 = fabsf (val2);
8632 result = val1 > val2;
8639 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8642 /* An alias of DUP. */
8644 do_scalar_MOV (sim_cpu *cpu)
8646 /* instr [31,21] = 0101 1110 000
8647 instr [20,16] = imm5
8648 instr [15,10] = 0000 01
8650 instr [4, 0] = Rd. */
8652 unsigned rn = INSTR (9, 5);
8653 unsigned rd = INSTR (4, 0);
8656 NYI_assert (31, 21, 0x2F0);
8657 NYI_assert (15, 10, 0x01);
8662 index = INSTR (20, 17);
8664 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8666 else if (INSTR (17, 17))
8669 index = INSTR (20, 18);
8671 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8673 else if (INSTR (18, 18))
8676 index = INSTR (20, 19);
8678 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8680 else if (INSTR (19, 19))
8683 index = INSTR (20, 20);
8685 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8692 do_scalar_NEG (sim_cpu *cpu)
8694 /* instr [31,10] = 0111 1110 1110 0000 1011 10
8696 instr [4, 0] = Rd. */
8698 unsigned rn = INSTR (9, 5);
8699 unsigned rd = INSTR (4, 0);
8701 NYI_assert (31, 10, 0x1FB82E);
8703 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
8707 do_scalar_USHL (sim_cpu *cpu)
8709 /* instr [31,21] = 0111 1110 111
8711 instr [15,10] = 0100 01
8713 instr [4, 0] = Rd. */
8715 unsigned rm = INSTR (20, 16);
8716 unsigned rn = INSTR (9, 5);
8717 unsigned rd = INSTR (4, 0);
8718 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8720 NYI_assert (31, 21, 0x3F7);
8721 NYI_assert (15, 10, 0x11);
8724 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
8726 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
8730 do_double_add (sim_cpu *cpu)
8732 /* instr [31,21] = 0101 1110 111
8734 instr [15,10] = 1000 01
8736 instr [4,0] = Fd. */
8743 NYI_assert (31, 21, 0x2F7);
8744 NYI_assert (15, 10, 0x21);
8748 Fn = INSTR (20, 16);
8750 val1 = aarch64_get_FP_double (cpu, Fm);
8751 val2 = aarch64_get_FP_double (cpu, Fn);
8753 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8757 do_scalar_UCVTF (sim_cpu *cpu)
8759 /* instr [31,23] = 0111 1110 0
8760 instr [22] = single(0)/double(1)
8761 instr [21,10] = 10 0001 1101 10
8763 instr [4,0] = rd. */
8765 unsigned rn = INSTR (9, 5);
8766 unsigned rd = INSTR (4, 0);
8768 NYI_assert (31, 23, 0x0FC);
8769 NYI_assert (21, 10, 0x876);
8773 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
8775 aarch64_set_vec_double (cpu, rd, 0, (double) val);
8779 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
8781 aarch64_set_vec_float (cpu, rd, 0, (float) val);
8786 do_scalar_vec (sim_cpu *cpu)
8788 /* instr [30] = 1. */
8789 /* instr [28,25] = 1111. */
8790 switch (INSTR (31, 23))
8793 switch (INSTR (15, 10))
8795 case 0x01: do_scalar_MOV (cpu); return;
8796 case 0x39: do_scalar_FCM (cpu); return;
8797 case 0x3B: do_scalar_FCM (cpu); return;
8801 case 0xBE: do_scalar_shift (cpu); return;
8804 switch (INSTR (15, 10))
8807 switch (INSTR (21, 16))
8809 case 0x30: do_scalar_FADDP (cpu); return;
8810 case 0x21: do_scalar_UCVTF (cpu); return;
8813 case 0x39: do_scalar_FCM (cpu); return;
8814 case 0x3B: do_scalar_FCM (cpu); return;
8819 switch (INSTR (15, 10))
8821 case 0x0D: do_scalar_CMGT (cpu); return;
8822 case 0x11: do_scalar_USHL (cpu); return;
8823 case 0x2E: do_scalar_NEG (cpu); return;
8824 case 0x35: do_scalar_FABD (cpu); return;
8825 case 0x39: do_scalar_FCM (cpu); return;
8826 case 0x3B: do_scalar_FCM (cpu); return;
8831 case 0xFE: do_scalar_USHR (cpu); return;
8834 switch (INSTR (15, 10))
8836 case 0x21: do_double_add (cpu); return;
8837 case 0x11: do_scalar_SSHL (cpu); return;
8848 dexAdvSIMD1 (sim_cpu *cpu)
8850 /* instr [28,25] = 1 111. */
8852 /* We are currently only interested in the basic
8853 scalar fp routines which all have bit 30 = 0. */
8855 do_scalar_vec (cpu);
8857 /* instr[24] is set for FP data processing 3-source and clear for
8858 all other basic scalar fp instruction groups. */
8859 else if (INSTR (24, 24))
8860 dexSimpleFPDataProc3Source (cpu);
8862 /* instr[21] is clear for floating <-> fixed conversions and set for
8863 all other basic scalar fp instruction groups. */
8864 else if (!INSTR (21, 21))
8865 dexSimpleFPFixedConvert (cpu);
8867 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
8868 11 ==> cond select, 00 ==> other. */
8870 switch (INSTR (11, 10))
8872 case 1: dexSimpleFPCondCompare (cpu); return;
8873 case 2: dexSimpleFPDataProc2Source (cpu); return;
8874 case 3: dexSimpleFPCondSelect (cpu); return;
8877 /* Now an ordered cascade of tests.
8878 FP immediate has instr [12] == 1.
8879 FP compare has instr [13] == 1.
8880 FP Data Proc 1 Source has instr [14] == 1.
8881 FP floating <--> integer conversions has instr [15] == 0. */
8883 dexSimpleFPImmediate (cpu);
8885 else if (INSTR (13, 13))
8886 dexSimpleFPCompare (cpu);
8888 else if (INSTR (14, 14))
8889 dexSimpleFPDataProc1Source (cpu);
8891 else if (!INSTR (15, 15))
8892 dexSimpleFPIntegerConvert (cpu);
8895 /* If we get here then instr[15] == 1 which means UNALLOC. */
8900 /* PC relative addressing. */
8903 pcadr (sim_cpu *cpu)
8905 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
8906 instr[30,29] = immlo
8907 instr[23,5] = immhi. */
8909 unsigned rd = INSTR (4, 0);
8910 uint32_t isPage = INSTR (31, 31);
8911 union { int64_t u64; uint64_t s64; } imm;
8914 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
8916 offset = (offset << 2) | INSTR (30, 29);
8918 address = aarch64_get_PC (cpu);
8926 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
8929 /* Specific decode and execute for group Data Processing Immediate. */
8932 dexPCRelAddressing (sim_cpu *cpu)
8934 /* assert instr[28,24] = 10000. */
8938 /* Immediate logical.
8939 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
8940 16, 32 or 64 bit sequence pulled out at decode and possibly
8943 N.B. the output register (dest) can normally be Xn or SP
8944 the exception occurs for flag setting instructions which may
8945 only use Xn for the output (dest). The input register can
8948 /* 32 bit and immediate. */
8950 and32 (sim_cpu *cpu, uint32_t bimm)
8952 unsigned rn = INSTR (9, 5);
8953 unsigned rd = INSTR (4, 0);
8955 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8956 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
8959 /* 64 bit and immediate. */
8961 and64 (sim_cpu *cpu, uint64_t bimm)
8963 unsigned rn = INSTR (9, 5);
8964 unsigned rd = INSTR (4, 0);
8966 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8967 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
8970 /* 32 bit and immediate set flags. */
8972 ands32 (sim_cpu *cpu, uint32_t bimm)
8974 unsigned rn = INSTR (9, 5);
8975 unsigned rd = INSTR (4, 0);
8977 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8978 uint32_t value2 = bimm;
8980 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8981 set_flags_for_binop32 (cpu, value1 & value2);
8984 /* 64 bit and immediate set flags. */
8986 ands64 (sim_cpu *cpu, uint64_t bimm)
8988 unsigned rn = INSTR (9, 5);
8989 unsigned rd = INSTR (4, 0);
8991 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8992 uint64_t value2 = bimm;
8994 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8995 set_flags_for_binop64 (cpu, value1 & value2);
8998 /* 32 bit exclusive or immediate. */
9000 eor32 (sim_cpu *cpu, uint32_t bimm)
9002 unsigned rn = INSTR (9, 5);
9003 unsigned rd = INSTR (4, 0);
9005 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9006 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9009 /* 64 bit exclusive or immediate. */
9011 eor64 (sim_cpu *cpu, uint64_t bimm)
9013 unsigned rn = INSTR (9, 5);
9014 unsigned rd = INSTR (4, 0);
9016 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9017 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9020 /* 32 bit or immediate. */
9022 orr32 (sim_cpu *cpu, uint32_t bimm)
9024 unsigned rn = INSTR (9, 5);
9025 unsigned rd = INSTR (4, 0);
9027 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9028 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9031 /* 64 bit or immediate. */
9033 orr64 (sim_cpu *cpu, uint64_t bimm)
9035 unsigned rn = INSTR (9, 5);
9036 unsigned rd = INSTR (4, 0);
9038 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9039 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9042 /* Logical shifted register.
9043 These allow an optional LSL, ASR, LSR or ROR to the second source
9044 register with a count up to the register bit count.
9045 N.B register args may not be SP. */
9047 /* 32 bit AND shifted register. */
9049 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9051 unsigned rm = INSTR (20, 16);
9052 unsigned rn = INSTR (9, 5);
9053 unsigned rd = INSTR (4, 0);
9056 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9057 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9060 /* 64 bit AND shifted register. */
9062 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9064 unsigned rm = INSTR (20, 16);
9065 unsigned rn = INSTR (9, 5);
9066 unsigned rd = INSTR (4, 0);
9069 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9070 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9073 /* 32 bit AND shifted register setting flags. */
9075 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9077 unsigned rm = INSTR (20, 16);
9078 unsigned rn = INSTR (9, 5);
9079 unsigned rd = INSTR (4, 0);
9081 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9082 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9085 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9086 set_flags_for_binop32 (cpu, value1 & value2);
9089 /* 64 bit AND shifted register setting flags. */
9091 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9093 unsigned rm = INSTR (20, 16);
9094 unsigned rn = INSTR (9, 5);
9095 unsigned rd = INSTR (4, 0);
9097 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9098 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9101 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9102 set_flags_for_binop64 (cpu, value1 & value2);
9105 /* 32 bit BIC shifted register. */
9107 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9109 unsigned rm = INSTR (20, 16);
9110 unsigned rn = INSTR (9, 5);
9111 unsigned rd = INSTR (4, 0);
9114 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9115 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9118 /* 64 bit BIC shifted register. */
9120 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9122 unsigned rm = INSTR (20, 16);
9123 unsigned rn = INSTR (9, 5);
9124 unsigned rd = INSTR (4, 0);
9127 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9128 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9131 /* 32 bit BIC shifted register setting flags. */
9133 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9135 unsigned rm = INSTR (20, 16);
9136 unsigned rn = INSTR (9, 5);
9137 unsigned rd = INSTR (4, 0);
9139 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9140 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9143 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9144 set_flags_for_binop32 (cpu, value1 & value2);
9147 /* 64 bit BIC shifted register setting flags. */
9149 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9151 unsigned rm = INSTR (20, 16);
9152 unsigned rn = INSTR (9, 5);
9153 unsigned rd = INSTR (4, 0);
9155 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9156 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9159 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9160 set_flags_for_binop64 (cpu, value1 & value2);
9163 /* 32 bit EON shifted register. */
9165 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9167 unsigned rm = INSTR (20, 16);
9168 unsigned rn = INSTR (9, 5);
9169 unsigned rd = INSTR (4, 0);
9172 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9173 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9176 /* 64 bit EON shifted register. */
9178 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9180 unsigned rm = INSTR (20, 16);
9181 unsigned rn = INSTR (9, 5);
9182 unsigned rd = INSTR (4, 0);
9185 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9186 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9189 /* 32 bit EOR shifted register. */
9191 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9193 unsigned rm = INSTR (20, 16);
9194 unsigned rn = INSTR (9, 5);
9195 unsigned rd = INSTR (4, 0);
9198 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9199 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9202 /* 64 bit EOR shifted register. */
9204 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9206 unsigned rm = INSTR (20, 16);
9207 unsigned rn = INSTR (9, 5);
9208 unsigned rd = INSTR (4, 0);
9211 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9212 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9215 /* 32 bit ORR shifted register. */
9217 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9219 unsigned rm = INSTR (20, 16);
9220 unsigned rn = INSTR (9, 5);
9221 unsigned rd = INSTR (4, 0);
9224 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9225 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9228 /* 64 bit ORR shifted register. */
9230 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9232 unsigned rm = INSTR (20, 16);
9233 unsigned rn = INSTR (9, 5);
9234 unsigned rd = INSTR (4, 0);
9237 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9238 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9241 /* 32 bit ORN shifted register. */
9243 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9245 unsigned rm = INSTR (20, 16);
9246 unsigned rn = INSTR (9, 5);
9247 unsigned rd = INSTR (4, 0);
9250 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9251 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9254 /* 64 bit ORN shifted register. */
9256 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9258 unsigned rm = INSTR (20, 16);
9259 unsigned rn = INSTR (9, 5);
9260 unsigned rd = INSTR (4, 0);
9263 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9264 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9268 dexLogicalImmediate (sim_cpu *cpu)
9270 /* assert instr[28,23] = 1001000
9271 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9272 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9273 instr[22] = N : used to construct immediate mask
9279 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9280 uint32_t size = INSTR (31, 31);
9281 uint32_t N = INSTR (22, 22);
9282 /* uint32_t immr = INSTR (21, 16);. */
9283 /* uint32_t imms = INSTR (15, 10);. */
9284 uint32_t index = INSTR (22, 10);
9285 uint64_t bimm64 = LITable [index];
9286 uint32_t dispatch = INSTR (30, 29);
9296 uint32_t bimm = (uint32_t) bimm64;
9300 case 0: and32 (cpu, bimm); return;
9301 case 1: orr32 (cpu, bimm); return;
9302 case 2: eor32 (cpu, bimm); return;
9303 case 3: ands32 (cpu, bimm); return;
9310 case 0: and64 (cpu, bimm64); return;
9311 case 1: orr64 (cpu, bimm64); return;
9312 case 2: eor64 (cpu, bimm64); return;
9313 case 3: ands64 (cpu, bimm64); return;
9320 The uimm argument is a 16 bit value to be inserted into the
9321 target register the pos argument locates the 16 bit word in the
9322 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9324 N.B register arg may not be SP so it should be.
9325 accessed using the setGZRegisterXXX accessors. */
9327 /* 32 bit move 16 bit immediate zero remaining shorts. */
9329 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9331 unsigned rd = INSTR (4, 0);
9333 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9336 /* 64 bit move 16 bit immediate zero remaining shorts. */
9338 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9340 unsigned rd = INSTR (4, 0);
9342 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9345 /* 32 bit move 16 bit immediate negated. */
9347 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9349 unsigned rd = INSTR (4, 0);
9351 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9354 /* 64 bit move 16 bit immediate negated. */
9356 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9358 unsigned rd = INSTR (4, 0);
9361 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9362 ^ 0xffffffffffffffffULL));
9365 /* 32 bit move 16 bit immediate keep remaining shorts. */
9367 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9369 unsigned rd = INSTR (4, 0);
9370 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9371 uint32_t value = val << (pos * 16);
9372 uint32_t mask = ~(0xffffU << (pos * 16));
9374 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9377 /* 64 bit move 16 it immediate keep remaining shorts. */
9379 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9381 unsigned rd = INSTR (4, 0);
9382 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9383 uint64_t value = (uint64_t) val << (pos * 16);
9384 uint64_t mask = ~(0xffffULL << (pos * 16));
9386 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9390 dexMoveWideImmediate (sim_cpu *cpu)
9392 /* assert instr[28:23] = 100101
9393 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9394 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9395 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9396 instr[20,5] = uimm16
9399 /* N.B. the (multiple of 16) shift is applied by the called routine,
9400 we just pass the multiplier. */
9403 uint32_t size = INSTR (31, 31);
9404 uint32_t op = INSTR (30, 29);
9405 uint32_t shift = INSTR (22, 21);
9407 /* 32 bit can only shift 0 or 1 lot of 16.
9408 anything else is an unallocated instruction. */
9409 if (size == 0 && (shift > 1))
9415 imm = INSTR (20, 5);
9420 movn32 (cpu, imm, shift);
9422 movz32 (cpu, imm, shift);
9424 movk32 (cpu, imm, shift);
9429 movn64 (cpu, imm, shift);
9431 movz64 (cpu, imm, shift);
9433 movk64 (cpu, imm, shift);
9437 /* Bitfield operations.
9438 These take a pair of bit positions r and s which are in {0..31}
9439 or {0..63} depending on the instruction word size.
9440 N.B register args may not be SP. */
9442 /* OK, we start with ubfm which just needs to pick
9443 some bits out of source zero the rest and write
9444 the result to dest. Just need two logical shifts. */
9446 /* 32 bit bitfield move, left and right of affected zeroed
9447 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9449 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9452 unsigned rn = INSTR (9, 5);
9453 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9455 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9458 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9459 We want only bits s:xxx:r at the bottom of the word
9460 so we LSL bit s up to bit 31 i.e. by 31 - s
9461 and then we LSR to bring bit 31 down to bit s - r
9462 i.e. by 31 + r - s. */
9464 value >>= 31 + r - s;
9468 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9469 We want only bits s:xxx:0 starting at it 31-(r-1)
9470 so we LSL bit s up to bit 31 i.e. by 31 - s
9471 and then we LSL to bring bit 31 down to 31-(r-1)+s
9472 i.e. by r - (s + 1). */
9474 value >>= r - (s + 1);
9478 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9481 /* 64 bit bitfield move, left and right of affected zeroed
9482 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9484 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9487 unsigned rn = INSTR (9, 5);
9488 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9492 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9493 We want only bits s:xxx:r at the bottom of the word.
9494 So we LSL bit s up to bit 63 i.e. by 63 - s
9495 and then we LSR to bring bit 63 down to bit s - r
9496 i.e. by 63 + r - s. */
9498 value >>= 63 + r - s;
9502 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9503 We want only bits s:xxx:0 starting at it 63-(r-1).
9504 So we LSL bit s up to bit 63 i.e. by 63 - s
9505 and then we LSL to bring bit 63 down to 63-(r-1)+s
9506 i.e. by r - (s + 1). */
9508 value >>= r - (s + 1);
9512 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9515 /* The signed versions need to insert sign bits
9516 on the left of the inserted bit field. so we do
9517 much the same as the unsigned version except we
9518 use an arithmetic shift right -- this just means
9519 we need to operate on signed values. */
9521 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9522 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9524 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9527 unsigned rn = INSTR (9, 5);
9528 /* as per ubfm32 but use an ASR instead of an LSR. */
9529 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9534 value >>= 31 + r - s;
9539 value >>= r - (s + 1);
9543 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9546 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9547 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9549 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9552 unsigned rn = INSTR (9, 5);
9553 /* acpu per ubfm but use an ASR instead of an LSR. */
9554 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9559 value >>= 63 + r - s;
9564 value >>= r - (s + 1);
9568 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9571 /* Finally, these versions leave non-affected bits
9572 as is. so we need to generate the bits as per
9573 ubfm and also generate a mask to pick the
9574 bits from the original and computed values. */
9576 /* 32 bit bitfield move, non-affected bits left as is.
9577 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9579 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9581 unsigned rn = INSTR (9, 5);
9582 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9587 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9590 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9591 We want only bits s:xxx:r at the bottom of the word
9592 so we LSL bit s up to bit 31 i.e. by 31 - s
9593 and then we LSR to bring bit 31 down to bit s - r
9594 i.e. by 31 + r - s. */
9596 value >>= 31 + r - s;
9597 /* the mask must include the same bits. */
9599 mask >>= 31 + r - s;
9603 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9604 We want only bits s:xxx:0 starting at it 31-(r-1)
9605 so we LSL bit s up to bit 31 i.e. by 31 - s
9606 and then we LSL to bring bit 31 down to 31-(r-1)+s
9607 i.e. by r - (s + 1). */
9609 value >>= r - (s + 1);
9610 /* The mask must include the same bits. */
9612 mask >>= r - (s + 1);
9616 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9622 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9625 /* 64 bit bitfield move, non-affected bits left as is.
9626 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9628 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9631 unsigned rn = INSTR (9, 5);
9632 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9633 uint64_t mask = 0xffffffffffffffffULL;
9637 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9638 We want only bits s:xxx:r at the bottom of the word
9639 so we LSL bit s up to bit 63 i.e. by 63 - s
9640 and then we LSR to bring bit 63 down to bit s - r
9641 i.e. by 63 + r - s. */
9643 value >>= 63 + r - s;
9644 /* The mask must include the same bits. */
9646 mask >>= 63 + r - s;
9650 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9651 We want only bits s:xxx:0 starting at it 63-(r-1)
9652 so we LSL bit s up to bit 63 i.e. by 63 - s
9653 and then we LSL to bring bit 63 down to 63-(r-1)+s
9654 i.e. by r - (s + 1). */
9656 value >>= r - (s + 1);
9657 /* The mask must include the same bits. */
9659 mask >>= r - (s + 1);
9664 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
9668 dexBitfieldImmediate (sim_cpu *cpu)
9670 /* assert instr[28:23] = 100110
9671 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9672 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
9673 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
9674 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
9675 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9679 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9682 uint32_t size = INSTR (31, 31);
9683 uint32_t N = INSTR (22, 22);
9684 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
9685 /* or else we have an UNALLOC. */
9686 uint32_t immr = INSTR (21, 16);
9691 if (!size && uimm (immr, 5, 5))
9694 imms = INSTR (15, 10);
9695 if (!size && uimm (imms, 5, 5))
9698 /* Switch on combined size and op. */
9699 dispatch = INSTR (31, 29);
9702 case 0: sbfm32 (cpu, immr, imms); return;
9703 case 1: bfm32 (cpu, immr, imms); return;
9704 case 2: ubfm32 (cpu, immr, imms); return;
9705 case 4: sbfm (cpu, immr, imms); return;
9706 case 5: bfm (cpu, immr, imms); return;
9707 case 6: ubfm (cpu, immr, imms); return;
9708 default: HALT_UNALLOC;
9713 do_EXTR_32 (sim_cpu *cpu)
9715 /* instr[31:21] = 00010011100
9717 instr[15,10] = imms : 0xxxxx for 32 bit
9720 unsigned rm = INSTR (20, 16);
9721 unsigned imms = INSTR (15, 10) & 31;
9722 unsigned rn = INSTR ( 9, 5);
9723 unsigned rd = INSTR ( 4, 0);
9727 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
9729 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9730 val2 <<= (32 - imms);
9732 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
9736 do_EXTR_64 (sim_cpu *cpu)
9738 /* instr[31:21] = 10010011100
9743 unsigned rm = INSTR (20, 16);
9744 unsigned imms = INSTR (15, 10) & 63;
9745 unsigned rn = INSTR ( 9, 5);
9746 unsigned rd = INSTR ( 4, 0);
9749 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
9751 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
9753 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
9757 dexExtractImmediate (sim_cpu *cpu)
9759 /* assert instr[28:23] = 100111
9760 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9761 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
9762 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
9763 instr[21] = op0 : must be 0 or UNALLOC
9765 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9769 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9770 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
9772 uint32_t size = INSTR (31, 31);
9773 uint32_t N = INSTR (22, 22);
9774 /* 32 bit operations must have imms[5] = 0
9775 or else we have an UNALLOC. */
9776 uint32_t imms = INSTR (15, 10);
9781 if (!size && uimm (imms, 5, 5))
9784 /* Switch on combined size and op. */
9785 dispatch = INSTR (31, 29);
9790 else if (dispatch == 4)
9793 else if (dispatch == 1)
9800 dexDPImm (sim_cpu *cpu)
9802 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
9803 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
9804 bits [25,23] of a DPImm are the secondary dispatch vector. */
9805 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
9809 case DPIMM_PCADR_000:
9810 case DPIMM_PCADR_001:
9811 dexPCRelAddressing (cpu);
9814 case DPIMM_ADDSUB_010:
9815 case DPIMM_ADDSUB_011:
9816 dexAddSubtractImmediate (cpu);
9820 dexLogicalImmediate (cpu);
9824 dexMoveWideImmediate (cpu);
9827 case DPIMM_BITF_110:
9828 dexBitfieldImmediate (cpu);
9831 case DPIMM_EXTR_111:
9832 dexExtractImmediate (cpu);
9836 /* Should never reach here. */
9842 dexLoadUnscaledImmediate (sim_cpu *cpu)
9844 /* instr[29,24] == 111_00
9850 instr[20,12] = simm9
9851 instr[9,5] = rn may be SP. */
9852 /* unsigned rt = INSTR (4, 0); */
9853 uint32_t V = INSTR (26, 26);
9854 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
9855 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
9859 /* GReg operations. */
9862 case 0: sturb (cpu, imm); return;
9863 case 1: ldurb32 (cpu, imm); return;
9864 case 2: ldursb64 (cpu, imm); return;
9865 case 3: ldursb32 (cpu, imm); return;
9866 case 4: sturh (cpu, imm); return;
9867 case 5: ldurh32 (cpu, imm); return;
9868 case 6: ldursh64 (cpu, imm); return;
9869 case 7: ldursh32 (cpu, imm); return;
9870 case 8: stur32 (cpu, imm); return;
9871 case 9: ldur32 (cpu, imm); return;
9872 case 10: ldursw (cpu, imm); return;
9873 case 12: stur64 (cpu, imm); return;
9874 case 13: ldur64 (cpu, imm); return;
9887 /* FReg operations. */
9890 case 2: fsturq (cpu, imm); return;
9891 case 3: fldurq (cpu, imm); return;
9892 case 8: fsturs (cpu, imm); return;
9893 case 9: fldurs (cpu, imm); return;
9894 case 12: fsturd (cpu, imm); return;
9895 case 13: fldurd (cpu, imm); return;
9897 case 0: /* STUR 8 bit FP. */
9898 case 1: /* LDUR 8 bit FP. */
9899 case 4: /* STUR 16 bit FP. */
9900 case 5: /* LDUR 8 bit FP. */
9914 /* N.B. A preliminary note regarding all the ldrs<x>32
9917 The signed value loaded by these instructions is cast to unsigned
9918 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
9919 64 bit element of the GReg union. this performs a 32 bit sign extension
9920 (as required) but avoids 64 bit sign extension, thus ensuring that the
9921 top half of the register word is zero. this is what the spec demands
9922 when a 32 bit load occurs. */
9924 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
9926 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
9928 unsigned int rn = INSTR (9, 5);
9929 unsigned int rt = INSTR (4, 0);
9931 /* The target register may not be SP but the source may be
9932 there is no scaling required for a byte load. */
9933 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
9934 aarch64_set_reg_u64 (cpu, rt, NO_SP,
9935 (int64_t) aarch64_get_mem_s8 (cpu, address));
9938 /* 32 bit load sign-extended byte scaled or unscaled zero-
9939 or sign-extended 32-bit register offset. */
9941 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9943 unsigned int rm = INSTR (20, 16);
9944 unsigned int rn = INSTR (9, 5);
9945 unsigned int rt = INSTR (4, 0);
9947 /* rn may reference SP, rm and rt must reference ZR. */
9949 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9950 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9953 /* There is no scaling required for a byte load. */
9955 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
9959 /* 32 bit load sign-extended byte unscaled signed 9 bit with
9960 pre- or post-writeback. */
9962 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9965 unsigned int rn = INSTR (9, 5);
9966 unsigned int rt = INSTR (4, 0);
9968 if (rn == rt && wb != NoWriteBack)
9971 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9976 aarch64_set_reg_u64 (cpu, rt, NO_SP,
9977 (int64_t) aarch64_get_mem_s8 (cpu, address));
9982 if (wb != NoWriteBack)
9983 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
9986 /* 8 bit store scaled. */
9988 fstrb_abs (sim_cpu *cpu, uint32_t offset)
9990 unsigned st = INSTR (4, 0);
9991 unsigned rn = INSTR (9, 5);
9993 aarch64_set_mem_u8 (cpu,
9994 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
9995 aarch64_get_vec_u8 (cpu, st, 0));
9998 /* 8 bit store scaled or unscaled zero- or
9999 sign-extended 8-bit register offset. */
10001 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10003 unsigned rm = INSTR (20, 16);
10004 unsigned rn = INSTR (9, 5);
10005 unsigned st = INSTR (4, 0);
10007 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10008 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10010 uint64_t displacement = scaling == Scaled ? extended : 0;
10013 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10016 /* 16 bit store scaled. */
10018 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10020 unsigned st = INSTR (4, 0);
10021 unsigned rn = INSTR (9, 5);
10023 aarch64_set_mem_u16
10025 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10026 aarch64_get_vec_u16 (cpu, st, 0));
10029 /* 16 bit store scaled or unscaled zero-
10030 or sign-extended 16-bit register offset. */
10032 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10034 unsigned rm = INSTR (20, 16);
10035 unsigned rn = INSTR (9, 5);
10036 unsigned st = INSTR (4, 0);
10038 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10039 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10041 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10043 aarch64_set_mem_u16
10044 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10047 /* 32 bit store scaled unsigned 12 bit. */
10049 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10051 unsigned st = INSTR (4, 0);
10052 unsigned rn = INSTR (9, 5);
10054 aarch64_set_mem_u32
10056 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10057 aarch64_get_vec_u32 (cpu, st, 0));
10060 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10062 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10064 unsigned rn = INSTR (9, 5);
10065 unsigned st = INSTR (4, 0);
10067 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10072 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10077 if (wb != NoWriteBack)
10078 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10081 /* 32 bit store scaled or unscaled zero-
10082 or sign-extended 32-bit register offset. */
10084 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10086 unsigned rm = INSTR (20, 16);
10087 unsigned rn = INSTR (9, 5);
10088 unsigned st = INSTR (4, 0);
10090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10093 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10095 aarch64_set_mem_u32
10096 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10099 /* 64 bit store scaled unsigned 12 bit. */
10101 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10103 unsigned st = INSTR (4, 0);
10104 unsigned rn = INSTR (9, 5);
10106 aarch64_set_mem_u64
10108 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10109 aarch64_get_vec_u64 (cpu, st, 0));
10112 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10114 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10116 unsigned rn = INSTR (9, 5);
10117 unsigned st = INSTR (4, 0);
10119 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10124 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10129 if (wb != NoWriteBack)
10130 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10133 /* 64 bit store scaled or unscaled zero-
10134 or sign-extended 32-bit register offset. */
10136 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10138 unsigned rm = INSTR (20, 16);
10139 unsigned rn = INSTR (9, 5);
10140 unsigned st = INSTR (4, 0);
10142 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10143 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10145 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10147 aarch64_set_mem_u64
10148 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10151 /* 128 bit store scaled unsigned 12 bit. */
10153 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10156 unsigned st = INSTR (4, 0);
10157 unsigned rn = INSTR (9, 5);
10160 aarch64_get_FP_long_double (cpu, st, & a);
10162 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10163 aarch64_set_mem_long_double (cpu, addr, a);
10166 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10168 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10171 unsigned rn = INSTR (9, 5);
10172 unsigned st = INSTR (4, 0);
10173 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10178 aarch64_get_FP_long_double (cpu, st, & a);
10179 aarch64_set_mem_long_double (cpu, address, a);
10184 if (wb != NoWriteBack)
10185 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10188 /* 128 bit store scaled or unscaled zero-
10189 or sign-extended 32-bit register offset. */
10191 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10193 unsigned rm = INSTR (20, 16);
10194 unsigned rn = INSTR (9, 5);
10195 unsigned st = INSTR (4, 0);
10197 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10198 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10200 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10204 aarch64_get_FP_long_double (cpu, st, & a);
10205 aarch64_set_mem_long_double (cpu, address + displacement, a);
10209 dexLoadImmediatePrePost (sim_cpu *cpu)
10211 /* instr[31,30] = size
10217 instr[20,12] = simm9
10218 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10220 instr[9,5] = Rn may be SP.
10223 uint32_t V = INSTR (26, 26);
10224 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10225 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10226 WriteBack wb = INSTR (11, 11);
10230 /* GReg operations. */
10233 case 0: strb_wb (cpu, imm, wb); return;
10234 case 1: ldrb32_wb (cpu, imm, wb); return;
10235 case 2: ldrsb_wb (cpu, imm, wb); return;
10236 case 3: ldrsb32_wb (cpu, imm, wb); return;
10237 case 4: strh_wb (cpu, imm, wb); return;
10238 case 5: ldrh32_wb (cpu, imm, wb); return;
10239 case 6: ldrsh64_wb (cpu, imm, wb); return;
10240 case 7: ldrsh32_wb (cpu, imm, wb); return;
10241 case 8: str32_wb (cpu, imm, wb); return;
10242 case 9: ldr32_wb (cpu, imm, wb); return;
10243 case 10: ldrsw_wb (cpu, imm, wb); return;
10244 case 12: str_wb (cpu, imm, wb); return;
10245 case 13: ldr_wb (cpu, imm, wb); return;
10255 /* FReg operations. */
10258 case 2: fstrq_wb (cpu, imm, wb); return;
10259 case 3: fldrq_wb (cpu, imm, wb); return;
10260 case 8: fstrs_wb (cpu, imm, wb); return;
10261 case 9: fldrs_wb (cpu, imm, wb); return;
10262 case 12: fstrd_wb (cpu, imm, wb); return;
10263 case 13: fldrd_wb (cpu, imm, wb); return;
10265 case 0: /* STUR 8 bit FP. */
10266 case 1: /* LDUR 8 bit FP. */
10267 case 4: /* STUR 16 bit FP. */
10268 case 5: /* LDUR 8 bit FP. */
10283 dexLoadRegisterOffset (sim_cpu *cpu)
10285 /* instr[31,30] = size
10292 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10293 110 ==> SXTW, 111 ==> SXTX,
10298 instr[4,0] = rt. */
10300 uint32_t V = INSTR (26, 26);
10301 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10302 Scaling scale = INSTR (12, 12);
10303 Extension extensionType = INSTR (15, 13);
10305 /* Check for illegal extension types. */
10306 if (uimm (extensionType, 1, 1) == 0)
10309 if (extensionType == UXTX || extensionType == SXTX)
10310 extensionType = NoExtension;
10314 /* GReg operations. */
10317 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10318 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10319 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10320 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10321 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10322 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10323 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10324 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10325 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10326 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10327 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10328 case 12: str_scale_ext (cpu, scale, extensionType); return;
10329 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10330 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10339 /* FReg operations. */
10342 case 1: /* LDUR 8 bit FP. */
10344 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10345 case 5: /* LDUR 8 bit FP. */
10347 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10348 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10350 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10351 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10352 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10353 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10354 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10368 dexLoadUnsignedImmediate (sim_cpu *cpu)
10370 /* instr[29,24] == 111_01
10371 instr[31,30] = size
10374 instr[21,10] = uimm12 : unsigned immediate offset
10375 instr[9,5] = rn may be SP.
10376 instr[4,0] = rt. */
10378 uint32_t V = INSTR (26,26);
10379 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10380 uint32_t imm = INSTR (21, 10);
10384 /* GReg operations. */
10387 case 0: strb_abs (cpu, imm); return;
10388 case 1: ldrb32_abs (cpu, imm); return;
10389 case 2: ldrsb_abs (cpu, imm); return;
10390 case 3: ldrsb32_abs (cpu, imm); return;
10391 case 4: strh_abs (cpu, imm); return;
10392 case 5: ldrh32_abs (cpu, imm); return;
10393 case 6: ldrsh_abs (cpu, imm); return;
10394 case 7: ldrsh32_abs (cpu, imm); return;
10395 case 8: str32_abs (cpu, imm); return;
10396 case 9: ldr32_abs (cpu, imm); return;
10397 case 10: ldrsw_abs (cpu, imm); return;
10398 case 12: str_abs (cpu, imm); return;
10399 case 13: ldr_abs (cpu, imm); return;
10400 case 14: prfm_abs (cpu, imm); return;
10409 /* FReg operations. */
10412 case 0: fstrb_abs (cpu, imm); return;
10413 case 4: fstrh_abs (cpu, imm); return;
10414 case 8: fstrs_abs (cpu, imm); return;
10415 case 12: fstrd_abs (cpu, imm); return;
10416 case 2: fstrq_abs (cpu, imm); return;
10418 case 1: fldrb_abs (cpu, imm); return;
10419 case 5: fldrh_abs (cpu, imm); return;
10420 case 9: fldrs_abs (cpu, imm); return;
10421 case 13: fldrd_abs (cpu, imm); return;
10422 case 3: fldrq_abs (cpu, imm); return;
10436 dexLoadExclusive (sim_cpu *cpu)
10438 /* assert instr[29:24] = 001000;
10439 instr[31,30] = size
10440 instr[23] = 0 if exclusive
10441 instr[22] = L : 1 if load, 0 if store
10442 instr[21] = 1 if pair
10444 instr[15] = o0 : 1 if ordered
10447 instr[4.0] = Rt. */
10449 switch (INSTR (22, 21))
10451 case 2: ldxr (cpu); return;
10452 case 0: stxr (cpu); return;
10458 dexLoadOther (sim_cpu *cpu)
10462 /* instr[29,25] = 111_0
10463 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10464 instr[21:11,10] is the secondary dispatch. */
10465 if (INSTR (24, 24))
10467 dexLoadUnsignedImmediate (cpu);
10471 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10474 case 0: dexLoadUnscaledImmediate (cpu); return;
10475 case 1: dexLoadImmediatePrePost (cpu); return;
10476 case 3: dexLoadImmediatePrePost (cpu); return;
10477 case 6: dexLoadRegisterOffset (cpu); return;
10489 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10491 unsigned rn = INSTR (14, 10);
10492 unsigned rd = INSTR (9, 5);
10493 unsigned rm = INSTR (4, 0);
10494 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10496 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10497 HALT_UNALLOC; /* ??? */
10504 aarch64_set_mem_u32 (cpu, address,
10505 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10506 aarch64_set_mem_u32 (cpu, address + 4,
10507 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10512 if (wb != NoWriteBack)
10513 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10517 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10519 unsigned rn = INSTR (14, 10);
10520 unsigned rd = INSTR (9, 5);
10521 unsigned rm = INSTR (4, 0);
10522 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10524 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10525 HALT_UNALLOC; /* ??? */
10532 aarch64_set_mem_u64 (cpu, address,
10533 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10534 aarch64_set_mem_u64 (cpu, address + 8,
10535 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10540 if (wb != NoWriteBack)
10541 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10545 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10547 unsigned rn = INSTR (14, 10);
10548 unsigned rd = INSTR (9, 5);
10549 unsigned rm = INSTR (4, 0);
10550 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10552 /* Treat this as unalloc to make sure we don't do it. */
10561 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10562 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10567 if (wb != NoWriteBack)
10568 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10572 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10574 unsigned rn = INSTR (14, 10);
10575 unsigned rd = INSTR (9, 5);
10576 unsigned rm = INSTR (4, 0);
10577 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10579 /* Treat this as unalloc to make sure we don't do it. */
10588 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10589 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10594 if (wb != NoWriteBack)
10595 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10599 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10601 unsigned rn = INSTR (14, 10);
10602 unsigned rd = INSTR (9, 5);
10603 unsigned rm = INSTR (4, 0);
10604 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10606 /* Treat this as unalloc to make sure we don't do it. */
10615 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10616 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10621 if (wb != NoWriteBack)
10622 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10626 dex_load_store_pair_gr (sim_cpu *cpu)
10628 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10629 instr[29,25] = instruction encoding: 101_0
10630 instr[26] = V : 1 if fp 0 if gp
10631 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10632 instr[22] = load/store (1=> load)
10633 instr[21,15] = signed, scaled, offset
10636 instr[ 4, 0] = Rm. */
10638 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10639 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10643 case 2: store_pair_u32 (cpu, offset, Post); return;
10644 case 3: load_pair_u32 (cpu, offset, Post); return;
10645 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10646 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
10647 case 6: store_pair_u32 (cpu, offset, Pre); return;
10648 case 7: load_pair_u32 (cpu, offset, Pre); return;
10650 case 11: load_pair_s32 (cpu, offset, Post); return;
10651 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
10652 case 15: load_pair_s32 (cpu, offset, Pre); return;
10654 case 18: store_pair_u64 (cpu, offset, Post); return;
10655 case 19: load_pair_u64 (cpu, offset, Post); return;
10656 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
10657 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
10658 case 22: store_pair_u64 (cpu, offset, Pre); return;
10659 case 23: load_pair_u64 (cpu, offset, Pre); return;
10667 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10669 unsigned rn = INSTR (14, 10);
10670 unsigned rd = INSTR (9, 5);
10671 unsigned rm = INSTR (4, 0);
10672 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10679 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
10680 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
10685 if (wb != NoWriteBack)
10686 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10690 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10692 unsigned rn = INSTR (14, 10);
10693 unsigned rd = INSTR (9, 5);
10694 unsigned rm = INSTR (4, 0);
10695 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10702 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
10703 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
10708 if (wb != NoWriteBack)
10709 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10713 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10716 unsigned rn = INSTR (14, 10);
10717 unsigned rd = INSTR (9, 5);
10718 unsigned rm = INSTR (4, 0);
10719 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10726 aarch64_get_FP_long_double (cpu, rm, & a);
10727 aarch64_set_mem_long_double (cpu, address, a);
10728 aarch64_get_FP_long_double (cpu, rn, & a);
10729 aarch64_set_mem_long_double (cpu, address + 16, a);
10734 if (wb != NoWriteBack)
10735 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10739 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10741 unsigned rn = INSTR (14, 10);
10742 unsigned rd = INSTR (9, 5);
10743 unsigned rm = INSTR (4, 0);
10744 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10754 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
10755 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
10760 if (wb != NoWriteBack)
10761 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10765 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10767 unsigned rn = INSTR (14, 10);
10768 unsigned rd = INSTR (9, 5);
10769 unsigned rm = INSTR (4, 0);
10770 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10780 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
10781 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
10786 if (wb != NoWriteBack)
10787 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10791 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10794 unsigned rn = INSTR (14, 10);
10795 unsigned rd = INSTR (9, 5);
10796 unsigned rm = INSTR (4, 0);
10797 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10807 aarch64_get_mem_long_double (cpu, address, & a);
10808 aarch64_set_FP_long_double (cpu, rm, a);
10809 aarch64_get_mem_long_double (cpu, address + 16, & a);
10810 aarch64_set_FP_long_double (cpu, rn, a);
10815 if (wb != NoWriteBack)
10816 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10820 dex_load_store_pair_fp (sim_cpu *cpu)
10822 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
10823 instr[29,25] = instruction encoding
10824 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10825 instr[22] = load/store (1=> load)
10826 instr[21,15] = signed, scaled, offset
10829 instr[ 4, 0] = Rm */
10831 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10832 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10836 case 2: store_pair_float (cpu, offset, Post); return;
10837 case 3: load_pair_float (cpu, offset, Post); return;
10838 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
10839 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
10840 case 6: store_pair_float (cpu, offset, Pre); return;
10841 case 7: load_pair_float (cpu, offset, Pre); return;
10843 case 10: store_pair_double (cpu, offset, Post); return;
10844 case 11: load_pair_double (cpu, offset, Post); return;
10845 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
10846 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
10847 case 14: store_pair_double (cpu, offset, Pre); return;
10848 case 15: load_pair_double (cpu, offset, Pre); return;
10850 case 18: store_pair_long_double (cpu, offset, Post); return;
10851 case 19: load_pair_long_double (cpu, offset, Post); return;
10852 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
10853 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
10854 case 22: store_pair_long_double (cpu, offset, Pre); return;
10855 case 23: load_pair_long_double (cpu, offset, Pre); return;
10862 static inline unsigned
10863 vec_reg (unsigned v, unsigned o)
10865 return (v + o) & 0x3F;
10868 /* Load multiple N-element structures to N consecutive registers. */
10870 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
10872 int all = INSTR (30, 30);
10873 unsigned size = INSTR (11, 10);
10874 unsigned vd = INSTR (4, 0);
10879 case 0: /* 8-bit operations. */
10881 for (i = 0; i < (16 * N); i++)
10882 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
10883 aarch64_get_mem_u8 (cpu, address + i));
10885 for (i = 0; i < (8 * N); i++)
10886 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
10887 aarch64_get_mem_u8 (cpu, address + i));
10890 case 1: /* 16-bit operations. */
10892 for (i = 0; i < (8 * N); i++)
10893 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
10894 aarch64_get_mem_u16 (cpu, address + i * 2));
10896 for (i = 0; i < (4 * N); i++)
10897 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
10898 aarch64_get_mem_u16 (cpu, address + i * 2));
10901 case 2: /* 32-bit operations. */
10903 for (i = 0; i < (4 * N); i++)
10904 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
10905 aarch64_get_mem_u32 (cpu, address + i * 4));
10907 for (i = 0; i < (2 * N); i++)
10908 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
10909 aarch64_get_mem_u32 (cpu, address + i * 4));
10912 case 3: /* 64-bit operations. */
10914 for (i = 0; i < (2 * N); i++)
10915 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
10916 aarch64_get_mem_u64 (cpu, address + i * 8));
10918 for (i = 0; i < N; i++)
10919 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
10920 aarch64_get_mem_u64 (cpu, address + i * 8));
10925 /* LD4: load multiple 4-element to four consecutive registers. */
10927 LD4 (sim_cpu *cpu, uint64_t address)
10929 vec_load (cpu, address, 4);
10932 /* LD3: load multiple 3-element structures to three consecutive registers. */
10934 LD3 (sim_cpu *cpu, uint64_t address)
10936 vec_load (cpu, address, 3);
10939 /* LD2: load multiple 2-element structures to two consecutive registers. */
10941 LD2 (sim_cpu *cpu, uint64_t address)
10943 vec_load (cpu, address, 2);
10946 /* Load multiple 1-element structures into one register. */
10948 LD1_1 (sim_cpu *cpu, uint64_t address)
10950 int all = INSTR (30, 30);
10951 unsigned size = INSTR (11, 10);
10952 unsigned vd = INSTR (4, 0);
10958 /* LD1 {Vd.16b}, addr, #16 */
10959 /* LD1 {Vd.8b}, addr, #8 */
10960 for (i = 0; i < (all ? 16 : 8); i++)
10961 aarch64_set_vec_u8 (cpu, vd, i,
10962 aarch64_get_mem_u8 (cpu, address + i));
10966 /* LD1 {Vd.8h}, addr, #16 */
10967 /* LD1 {Vd.4h}, addr, #8 */
10968 for (i = 0; i < (all ? 8 : 4); i++)
10969 aarch64_set_vec_u16 (cpu, vd, i,
10970 aarch64_get_mem_u16 (cpu, address + i * 2));
10974 /* LD1 {Vd.4s}, addr, #16 */
10975 /* LD1 {Vd.2s}, addr, #8 */
10976 for (i = 0; i < (all ? 4 : 2); i++)
10977 aarch64_set_vec_u32 (cpu, vd, i,
10978 aarch64_get_mem_u32 (cpu, address + i * 4));
10982 /* LD1 {Vd.2d}, addr, #16 */
10983 /* LD1 {Vd.1d}, addr, #8 */
10984 for (i = 0; i < (all ? 2 : 1); i++)
10985 aarch64_set_vec_u64 (cpu, vd, i,
10986 aarch64_get_mem_u64 (cpu, address + i * 8));
10991 /* Load multiple 1-element structures into two registers. */
10993 LD1_2 (sim_cpu *cpu, uint64_t address)
10995 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
10996 So why have two different instructions ? There must be something
10997 wrong somewhere. */
10998 vec_load (cpu, address, 2);
11001 /* Load multiple 1-element structures into three registers. */
11003 LD1_3 (sim_cpu *cpu, uint64_t address)
11005 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11006 So why have two different instructions ? There must be something
11007 wrong somewhere. */
11008 vec_load (cpu, address, 3);
11011 /* Load multiple 1-element structures into four registers. */
11013 LD1_4 (sim_cpu *cpu, uint64_t address)
11015 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11016 So why have two different instructions ? There must be something
11017 wrong somewhere. */
11018 vec_load (cpu, address, 4);
11021 /* Store multiple N-element structures to N consecutive registers. */
11023 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11025 int all = INSTR (30, 30);
11026 unsigned size = INSTR (11, 10);
11027 unsigned vd = INSTR (4, 0);
11032 case 0: /* 8-bit operations. */
11034 for (i = 0; i < (16 * N); i++)
11037 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11039 for (i = 0; i < (8 * N); i++)
11042 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11045 case 1: /* 16-bit operations. */
11047 for (i = 0; i < (8 * N); i++)
11048 aarch64_set_mem_u16
11049 (cpu, address + i * 2,
11050 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11052 for (i = 0; i < (4 * N); i++)
11053 aarch64_set_mem_u16
11054 (cpu, address + i * 2,
11055 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11058 case 2: /* 32-bit operations. */
11060 for (i = 0; i < (4 * N); i++)
11061 aarch64_set_mem_u32
11062 (cpu, address + i * 4,
11063 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11065 for (i = 0; i < (2 * N); i++)
11066 aarch64_set_mem_u32
11067 (cpu, address + i * 4,
11068 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11071 case 3: /* 64-bit operations. */
11073 for (i = 0; i < (2 * N); i++)
11074 aarch64_set_mem_u64
11075 (cpu, address + i * 8,
11076 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11078 for (i = 0; i < N; i++)
11079 aarch64_set_mem_u64
11080 (cpu, address + i * 8,
11081 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11086 /* Store multiple 4-element structure to four consecutive registers. */
11088 ST4 (sim_cpu *cpu, uint64_t address)
11090 vec_store (cpu, address, 4);
11093 /* Store multiple 3-element structures to three consecutive registers. */
11095 ST3 (sim_cpu *cpu, uint64_t address)
11097 vec_store (cpu, address, 3);
11100 /* Store multiple 2-element structures to two consecutive registers. */
11102 ST2 (sim_cpu *cpu, uint64_t address)
11104 vec_store (cpu, address, 2);
11107 /* Store multiple 1-element structures into one register. */
11109 ST1_1 (sim_cpu *cpu, uint64_t address)
11111 int all = INSTR (30, 30);
11112 unsigned size = INSTR (11, 10);
11113 unsigned vd = INSTR (4, 0);
11119 for (i = 0; i < (all ? 16 : 8); i++)
11120 aarch64_set_mem_u8 (cpu, address + i,
11121 aarch64_get_vec_u8 (cpu, vd, i));
11125 for (i = 0; i < (all ? 8 : 4); i++)
11126 aarch64_set_mem_u16 (cpu, address + i * 2,
11127 aarch64_get_vec_u16 (cpu, vd, i));
11131 for (i = 0; i < (all ? 4 : 2); i++)
11132 aarch64_set_mem_u32 (cpu, address + i * 4,
11133 aarch64_get_vec_u32 (cpu, vd, i));
11137 for (i = 0; i < (all ? 2 : 1); i++)
11138 aarch64_set_mem_u64 (cpu, address + i * 8,
11139 aarch64_get_vec_u64 (cpu, vd, i));
11144 /* Store multiple 1-element structures into two registers. */
11146 ST1_2 (sim_cpu *cpu, uint64_t address)
11148 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11149 So why have two different instructions ? There must be
11150 something wrong somewhere. */
11151 vec_store (cpu, address, 2);
11154 /* Store multiple 1-element structures into three registers. */
11156 ST1_3 (sim_cpu *cpu, uint64_t address)
11158 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11159 So why have two different instructions ? There must be
11160 something wrong somewhere. */
11161 vec_store (cpu, address, 3);
11164 /* Store multiple 1-element structures into four registers. */
11166 ST1_4 (sim_cpu *cpu, uint64_t address)
11168 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11169 So why have two different instructions ? There must be
11170 something wrong somewhere. */
11171 vec_store (cpu, address, 4);
11175 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11178 instr[30] = element selector 0=>half, 1=>all elements
11179 instr[29,24] = 00 1101
11180 instr[23] = 0=>simple, 1=>post
11182 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11183 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11184 11111 (immediate post inc)
11186 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11188 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11189 10=> word(s), 11=> double(d)
11190 instr[9,5] = address
11193 unsigned full = INSTR (30, 30);
11194 unsigned vd = INSTR (4, 0);
11195 unsigned size = INSTR (11, 10);
11198 NYI_assert (29, 24, 0x0D);
11199 NYI_assert (22, 22, 1);
11200 NYI_assert (15, 14, 3);
11201 NYI_assert (12, 12, 0);
11203 switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11205 case 0: /* LD1R. */
11210 uint8_t val = aarch64_get_mem_u8 (cpu, address);
11211 for (i = 0; i < (full ? 16 : 8); i++)
11212 aarch64_set_vec_u8 (cpu, vd, i, val);
11218 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11219 for (i = 0; i < (full ? 8 : 4); i++)
11220 aarch64_set_vec_u16 (cpu, vd, i, val);
11226 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11227 for (i = 0; i < (full ? 4 : 2); i++)
11228 aarch64_set_vec_u32 (cpu, vd, i, val);
11234 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11235 for (i = 0; i < (full ? 2 : 1); i++)
11236 aarch64_set_vec_u64 (cpu, vd, i, val);
11245 case 1: /* LD2R. */
11250 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11251 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11253 for (i = 0; i < (full ? 16 : 8); i++)
11255 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11256 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11263 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11264 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11266 for (i = 0; i < (full ? 8 : 4); i++)
11268 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11269 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11276 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11277 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11279 for (i = 0; i < (full ? 4 : 2); i++)
11281 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11282 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11289 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11290 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11292 for (i = 0; i < (full ? 2 : 1); i++)
11294 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11295 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11305 case 2: /* LD3R. */
11310 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11311 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11312 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11314 for (i = 0; i < (full ? 16 : 8); i++)
11316 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11317 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11318 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11325 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11326 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11327 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11329 for (i = 0; i < (full ? 8 : 4); i++)
11331 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11332 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11333 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11340 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11341 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11342 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11344 for (i = 0; i < (full ? 4 : 2); i++)
11346 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11347 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11348 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11355 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11356 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11357 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11359 for (i = 0; i < (full ? 2 : 1); i++)
11361 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11362 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11363 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11373 case 3: /* LD4R. */
11378 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11379 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11380 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11381 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11383 for (i = 0; i < (full ? 16 : 8); i++)
11385 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11386 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11387 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11388 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11395 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11396 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11397 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11398 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11400 for (i = 0; i < (full ? 8 : 4); i++)
11402 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11403 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11404 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11405 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11412 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11413 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11414 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11415 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11417 for (i = 0; i < (full ? 4 : 2); i++)
11419 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11420 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11421 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11422 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11429 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11430 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11431 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11432 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11434 for (i = 0; i < (full ? 2 : 1); i++)
11436 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11437 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11438 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11439 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11455 do_vec_load_store (sim_cpu *cpu)
11457 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11460 instr[30] = element selector 0=>half, 1=>all elements
11461 instr[29,25] = 00110
11463 instr[23] = 0=>simple, 1=>post
11464 instr[22] = 0=>store, 1=>load
11465 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11466 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11467 11111 (immediate post inc)
11468 instr[15,12] = elements and destinations. eg for load:
11469 0000=>LD4 => load multiple 4-element to
11470 four consecutive registers
11471 0100=>LD3 => load multiple 3-element to
11472 three consecutive registers
11473 1000=>LD2 => load multiple 2-element to
11474 two consecutive registers
11475 0010=>LD1 => load multiple 1-element to
11476 four consecutive registers
11477 0110=>LD1 => load multiple 1-element to
11478 three consecutive registers
11479 1010=>LD1 => load multiple 1-element to
11480 two consecutive registers
11481 0111=>LD1 => load multiple 1-element to
11485 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11486 10=> word(s), 11=> double(d)
11487 instr[9,5] = Vn, can be SP
11496 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11499 type = INSTR (15, 12);
11500 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11503 post = INSTR (23, 23);
11504 load = INSTR (22, 22);
11506 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11510 unsigned vm = INSTR (20, 16);
11514 unsigned sizeof_operation;
11518 case 0: sizeof_operation = 32; break;
11519 case 4: sizeof_operation = 24; break;
11520 case 8: sizeof_operation = 16; break;
11523 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11524 sizeof_operation <<= INSTR (11, 10);
11528 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11529 sizeof_operation <<= INSTR (11, 10);
11533 /* One register, immediate offset variant. */
11534 sizeof_operation = 8;
11538 /* Two registers, immediate offset variant. */
11539 sizeof_operation = 16;
11543 /* Three registers, immediate offset variant. */
11544 sizeof_operation = 24;
11548 /* Four registers, immediate offset variant. */
11549 sizeof_operation = 32;
11556 if (INSTR (30, 30))
11557 sizeof_operation *= 2;
11559 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11562 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11563 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11567 NYI_assert (20, 16, 0);
11574 case 0: LD4 (cpu, address); return;
11575 case 4: LD3 (cpu, address); return;
11576 case 8: LD2 (cpu, address); return;
11577 case 2: LD1_4 (cpu, address); return;
11578 case 6: LD1_3 (cpu, address); return;
11579 case 10: LD1_2 (cpu, address); return;
11580 case 7: LD1_1 (cpu, address); return;
11583 case 0xC: do_vec_LDnR (cpu, address); return;
11593 case 0: ST4 (cpu, address); return;
11594 case 4: ST3 (cpu, address); return;
11595 case 8: ST2 (cpu, address); return;
11596 case 2: ST1_4 (cpu, address); return;
11597 case 6: ST1_3 (cpu, address); return;
11598 case 10: ST1_2 (cpu, address); return;
11599 case 7: ST1_1 (cpu, address); return;
11606 dexLdSt (sim_cpu *cpu)
11608 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11609 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11610 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11611 bits [29,28:26] of a LS are the secondary dispatch vector. */
11612 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11617 dexLoadExclusive (cpu); return;
11621 dexLoadLiteral (cpu); return;
11625 dexLoadOther (cpu); return;
11627 case LS_ADVSIMD_001:
11628 do_vec_load_store (cpu); return;
11631 dex_load_store_pair_gr (cpu); return;
11634 dex_load_store_pair_fp (cpu); return;
11637 /* Should never reach here. */
11642 /* Specific decode and execute for group Data Processing Register. */
11645 dexLogicalShiftedRegister (sim_cpu *cpu)
11647 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11649 instr[28:24] = 01010
11650 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11653 instr[15,10] = count : must be 0xxxxx for 32 bit
11657 uint32_t size = INSTR (31, 31);
11658 Shift shiftType = INSTR (23, 22);
11659 uint32_t count = INSTR (15, 10);
11661 /* 32 bit operations must have count[5] = 0.
11662 or else we have an UNALLOC. */
11663 if (size == 0 && uimm (count, 5, 5))
11666 /* Dispatch on size:op:N. */
11667 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
11669 case 0: and32_shift (cpu, shiftType, count); return;
11670 case 1: bic32_shift (cpu, shiftType, count); return;
11671 case 2: orr32_shift (cpu, shiftType, count); return;
11672 case 3: orn32_shift (cpu, shiftType, count); return;
11673 case 4: eor32_shift (cpu, shiftType, count); return;
11674 case 5: eon32_shift (cpu, shiftType, count); return;
11675 case 6: ands32_shift (cpu, shiftType, count); return;
11676 case 7: bics32_shift (cpu, shiftType, count); return;
11677 case 8: and64_shift (cpu, shiftType, count); return;
11678 case 9: bic64_shift (cpu, shiftType, count); return;
11679 case 10:orr64_shift (cpu, shiftType, count); return;
11680 case 11:orn64_shift (cpu, shiftType, count); return;
11681 case 12:eor64_shift (cpu, shiftType, count); return;
11682 case 13:eon64_shift (cpu, shiftType, count); return;
11683 case 14:ands64_shift (cpu, shiftType, count); return;
11684 case 15:bics64_shift (cpu, shiftType, count); return;
11688 /* 32 bit conditional select. */
11690 csel32 (sim_cpu *cpu, CondCode cc)
11692 unsigned rm = INSTR (20, 16);
11693 unsigned rn = INSTR (9, 5);
11694 unsigned rd = INSTR (4, 0);
11696 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11697 testConditionCode (cpu, cc)
11698 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11699 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
11702 /* 64 bit conditional select. */
11704 csel64 (sim_cpu *cpu, CondCode cc)
11706 unsigned rm = INSTR (20, 16);
11707 unsigned rn = INSTR (9, 5);
11708 unsigned rd = INSTR (4, 0);
11710 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11711 testConditionCode (cpu, cc)
11712 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11713 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
11716 /* 32 bit conditional increment. */
11718 csinc32 (sim_cpu *cpu, CondCode cc)
11720 unsigned rm = INSTR (20, 16);
11721 unsigned rn = INSTR (9, 5);
11722 unsigned rd = INSTR (4, 0);
11724 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11725 testConditionCode (cpu, cc)
11726 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11727 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
11730 /* 64 bit conditional increment. */
11732 csinc64 (sim_cpu *cpu, CondCode cc)
11734 unsigned rm = INSTR (20, 16);
11735 unsigned rn = INSTR (9, 5);
11736 unsigned rd = INSTR (4, 0);
11738 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11739 testConditionCode (cpu, cc)
11740 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11741 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
11744 /* 32 bit conditional invert. */
11746 csinv32 (sim_cpu *cpu, CondCode cc)
11748 unsigned rm = INSTR (20, 16);
11749 unsigned rn = INSTR (9, 5);
11750 unsigned rd = INSTR (4, 0);
11752 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11753 testConditionCode (cpu, cc)
11754 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11755 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
11758 /* 64 bit conditional invert. */
11760 csinv64 (sim_cpu *cpu, CondCode cc)
11762 unsigned rm = INSTR (20, 16);
11763 unsigned rn = INSTR (9, 5);
11764 unsigned rd = INSTR (4, 0);
11766 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11767 testConditionCode (cpu, cc)
11768 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11769 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
11772 /* 32 bit conditional negate. */
11774 csneg32 (sim_cpu *cpu, CondCode cc)
11776 unsigned rm = INSTR (20, 16);
11777 unsigned rn = INSTR (9, 5);
11778 unsigned rd = INSTR (4, 0);
11780 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11781 testConditionCode (cpu, cc)
11782 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11783 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
11786 /* 64 bit conditional negate. */
11788 csneg64 (sim_cpu *cpu, CondCode cc)
11790 unsigned rm = INSTR (20, 16);
11791 unsigned rn = INSTR (9, 5);
11792 unsigned rd = INSTR (4, 0);
11794 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11795 testConditionCode (cpu, cc)
11796 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11797 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
11801 dexCondSelect (sim_cpu *cpu)
11803 /* instr[28,21] = 11011011
11804 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11805 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
11806 100 ==> CSINV, 101 ==> CSNEG,
11808 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
11809 instr[15,12] = cond
11810 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
11812 CondCode cc = INSTR (15, 12);
11813 uint32_t S = INSTR (29, 29);
11814 uint32_t op2 = INSTR (11, 10);
11822 switch ((INSTR (31, 30) << 1) | op2)
11824 case 0: csel32 (cpu, cc); return;
11825 case 1: csinc32 (cpu, cc); return;
11826 case 2: csinv32 (cpu, cc); return;
11827 case 3: csneg32 (cpu, cc); return;
11828 case 4: csel64 (cpu, cc); return;
11829 case 5: csinc64 (cpu, cc); return;
11830 case 6: csinv64 (cpu, cc); return;
11831 case 7: csneg64 (cpu, cc); return;
11835 /* Some helpers for counting leading 1 or 0 bits. */
11837 /* Counts the number of leading bits which are the same
11838 in a 32 bit value in the range 1 to 32. */
11840 leading32 (uint32_t value)
11842 int32_t mask= 0xffff0000;
11843 uint32_t count= 16; /* Counts number of bits set in mask. */
11844 uint32_t lo = 1; /* Lower bound for number of sign bits. */
11845 uint32_t hi = 32; /* Upper bound for number of sign bits. */
11847 while (lo + 1 < hi)
11849 int32_t test = (value & mask);
11851 if (test == 0 || test == mask)
11854 count = (lo + hi) / 2;
11855 mask >>= (count - lo);
11860 count = (lo + hi) / 2;
11861 mask <<= hi - count;
11870 test = (value & mask);
11872 if (test == 0 || test == mask)
11881 /* Counts the number of leading bits which are the same
11882 in a 64 bit value in the range 1 to 64. */
11884 leading64 (uint64_t value)
11886 int64_t mask= 0xffffffff00000000LL;
11887 uint64_t count = 32; /* Counts number of bits set in mask. */
11888 uint64_t lo = 1; /* Lower bound for number of sign bits. */
11889 uint64_t hi = 64; /* Upper bound for number of sign bits. */
11891 while (lo + 1 < hi)
11893 int64_t test = (value & mask);
11895 if (test == 0 || test == mask)
11898 count = (lo + hi) / 2;
11899 mask >>= (count - lo);
11904 count = (lo + hi) / 2;
11905 mask <<= hi - count;
11914 test = (value & mask);
11916 if (test == 0 || test == mask)
11925 /* Bit operations. */
11926 /* N.B register args may not be SP. */
11928 /* 32 bit count leading sign bits. */
11930 cls32 (sim_cpu *cpu)
11932 unsigned rn = INSTR (9, 5);
11933 unsigned rd = INSTR (4, 0);
11935 /* N.B. the result needs to exclude the leading bit. */
11936 aarch64_set_reg_u64
11937 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
11940 /* 64 bit count leading sign bits. */
11942 cls64 (sim_cpu *cpu)
11944 unsigned rn = INSTR (9, 5);
11945 unsigned rd = INSTR (4, 0);
11947 /* N.B. the result needs to exclude the leading bit. */
11948 aarch64_set_reg_u64
11949 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
11952 /* 32 bit count leading zero bits. */
11954 clz32 (sim_cpu *cpu)
11956 unsigned rn = INSTR (9, 5);
11957 unsigned rd = INSTR (4, 0);
11958 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11960 /* if the sign (top) bit is set then the count is 0. */
11961 if (pick32 (value, 31, 31))
11962 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11964 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
11967 /* 64 bit count leading zero bits. */
11969 clz64 (sim_cpu *cpu)
11971 unsigned rn = INSTR (9, 5);
11972 unsigned rd = INSTR (4, 0);
11973 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11975 /* if the sign (top) bit is set then the count is 0. */
11976 if (pick64 (value, 63, 63))
11977 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11979 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
11982 /* 32 bit reverse bits. */
11984 rbit32 (sim_cpu *cpu)
11986 unsigned rn = INSTR (9, 5);
11987 unsigned rd = INSTR (4, 0);
11988 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11989 uint32_t result = 0;
11992 for (i = 0; i < 32; i++)
11995 result |= (value & 1);
11998 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12001 /* 64 bit reverse bits. */
12003 rbit64 (sim_cpu *cpu)
12005 unsigned rn = INSTR (9, 5);
12006 unsigned rd = INSTR (4, 0);
12007 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12008 uint64_t result = 0;
12011 for (i = 0; i < 64; i++)
12014 result |= (value & 1UL);
12017 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12020 /* 32 bit reverse bytes. */
12022 rev32 (sim_cpu *cpu)
12024 unsigned rn = INSTR (9, 5);
12025 unsigned rd = INSTR (4, 0);
12026 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12027 uint32_t result = 0;
12030 for (i = 0; i < 4; i++)
12033 result |= (value & 0xff);
12036 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12039 /* 64 bit reverse bytes. */
12041 rev64 (sim_cpu *cpu)
12043 unsigned rn = INSTR (9, 5);
12044 unsigned rd = INSTR (4, 0);
12045 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12046 uint64_t result = 0;
12049 for (i = 0; i < 8; i++)
12052 result |= (value & 0xffULL);
12055 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12058 /* 32 bit reverse shorts. */
12059 /* N.B.this reverses the order of the bytes in each half word. */
12061 revh32 (sim_cpu *cpu)
12063 unsigned rn = INSTR (9, 5);
12064 unsigned rd = INSTR (4, 0);
12065 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12066 uint32_t result = 0;
12069 for (i = 0; i < 2; i++)
12072 result |= (value & 0x00ff00ff);
12075 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12078 /* 64 bit reverse shorts. */
12079 /* N.B.this reverses the order of the bytes in each half word. */
12081 revh64 (sim_cpu *cpu)
12083 unsigned rn = INSTR (9, 5);
12084 unsigned rd = INSTR (4, 0);
12085 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12086 uint64_t result = 0;
12089 for (i = 0; i < 2; i++)
12092 result |= (value & 0x00ff00ff00ff00ffULL);
12095 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12099 dexDataProc1Source (sim_cpu *cpu)
12102 instr[28,21] = 111010110
12103 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12104 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12105 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12106 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12107 000010 ==> REV, 000011 ==> UNALLOC
12108 000100 ==> CLZ, 000101 ==> CLS
12110 instr[9,5] = rn : may not be SP
12111 instr[4,0] = rd : may not be SP. */
12113 uint32_t S = INSTR (29, 29);
12114 uint32_t opcode2 = INSTR (20, 16);
12115 uint32_t opcode = INSTR (15, 10);
12116 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12129 case 0: rbit32 (cpu); return;
12130 case 1: revh32 (cpu); return;
12131 case 2: rev32 (cpu); return;
12132 case 4: clz32 (cpu); return;
12133 case 5: cls32 (cpu); return;
12134 case 8: rbit64 (cpu); return;
12135 case 9: revh64 (cpu); return;
12136 case 10:rev32 (cpu); return;
12137 case 11:rev64 (cpu); return;
12138 case 12:clz64 (cpu); return;
12139 case 13:cls64 (cpu); return;
12140 default: HALT_UNALLOC;
12145 Shifts by count supplied in register.
12146 N.B register args may not be SP.
12147 These all use the shifted auxiliary function for
12148 simplicity and clarity. Writing the actual shift
12149 inline would avoid a branch and so be faster but
12150 would also necessitate getting signs right. */
12152 /* 32 bit arithmetic shift right. */
12154 asrv32 (sim_cpu *cpu)
12156 unsigned rm = INSTR (20, 16);
12157 unsigned rn = INSTR (9, 5);
12158 unsigned rd = INSTR (4, 0);
12160 aarch64_set_reg_u64
12162 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12163 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12166 /* 64 bit arithmetic shift right. */
12168 asrv64 (sim_cpu *cpu)
12170 unsigned rm = INSTR (20, 16);
12171 unsigned rn = INSTR (9, 5);
12172 unsigned rd = INSTR (4, 0);
12174 aarch64_set_reg_u64
12176 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12177 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12180 /* 32 bit logical shift left. */
12182 lslv32 (sim_cpu *cpu)
12184 unsigned rm = INSTR (20, 16);
12185 unsigned rn = INSTR (9, 5);
12186 unsigned rd = INSTR (4, 0);
12188 aarch64_set_reg_u64
12190 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12191 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12194 /* 64 bit arithmetic shift left. */
12196 lslv64 (sim_cpu *cpu)
12198 unsigned rm = INSTR (20, 16);
12199 unsigned rn = INSTR (9, 5);
12200 unsigned rd = INSTR (4, 0);
12202 aarch64_set_reg_u64
12204 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12205 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12208 /* 32 bit logical shift right. */
12210 lsrv32 (sim_cpu *cpu)
12212 unsigned rm = INSTR (20, 16);
12213 unsigned rn = INSTR (9, 5);
12214 unsigned rd = INSTR (4, 0);
12216 aarch64_set_reg_u64
12218 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12219 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12222 /* 64 bit logical shift right. */
12224 lsrv64 (sim_cpu *cpu)
12226 unsigned rm = INSTR (20, 16);
12227 unsigned rn = INSTR (9, 5);
12228 unsigned rd = INSTR (4, 0);
12230 aarch64_set_reg_u64
12232 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12233 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12236 /* 32 bit rotate right. */
12238 rorv32 (sim_cpu *cpu)
12240 unsigned rm = INSTR (20, 16);
12241 unsigned rn = INSTR (9, 5);
12242 unsigned rd = INSTR (4, 0);
12244 aarch64_set_reg_u64
12246 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12247 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12250 /* 64 bit rotate right. */
12252 rorv64 (sim_cpu *cpu)
12254 unsigned rm = INSTR (20, 16);
12255 unsigned rn = INSTR (9, 5);
12256 unsigned rd = INSTR (4, 0);
12258 aarch64_set_reg_u64
12260 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12261 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12267 /* 32 bit signed divide. */
12269 cpuiv32 (sim_cpu *cpu)
12271 unsigned rm = INSTR (20, 16);
12272 unsigned rn = INSTR (9, 5);
12273 unsigned rd = INSTR (4, 0);
12274 /* N.B. the pseudo-code does the divide using 64 bit data. */
12275 /* TODO : check that this rounds towards zero as required. */
12276 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12277 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12279 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12280 divisor ? ((int32_t) (dividend / divisor)) : 0);
12283 /* 64 bit signed divide. */
12285 cpuiv64 (sim_cpu *cpu)
12287 unsigned rm = INSTR (20, 16);
12288 unsigned rn = INSTR (9, 5);
12289 unsigned rd = INSTR (4, 0);
12291 /* TODO : check that this rounds towards zero as required. */
12292 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12294 aarch64_set_reg_s64
12296 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12299 /* 32 bit unsigned divide. */
12301 udiv32 (sim_cpu *cpu)
12303 unsigned rm = INSTR (20, 16);
12304 unsigned rn = INSTR (9, 5);
12305 unsigned rd = INSTR (4, 0);
12307 /* N.B. the pseudo-code does the divide using 64 bit data. */
12308 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12309 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12311 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12312 divisor ? (uint32_t) (dividend / divisor) : 0);
12315 /* 64 bit unsigned divide. */
12317 udiv64 (sim_cpu *cpu)
12319 unsigned rm = INSTR (20, 16);
12320 unsigned rn = INSTR (9, 5);
12321 unsigned rd = INSTR (4, 0);
12323 /* TODO : check that this rounds towards zero as required. */
12324 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12326 aarch64_set_reg_u64
12328 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12332 dexDataProc2Source (sim_cpu *cpu)
12334 /* assert instr[30] == 0
12335 instr[28,21] == 11010110
12336 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12337 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12338 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12339 001000 ==> LSLV, 001001 ==> LSRV
12340 001010 ==> ASRV, 001011 ==> RORV
12344 uint32_t S = INSTR (29, 29);
12345 uint32_t opcode = INSTR (15, 10);
12353 dispatch = ( (INSTR (31, 31) << 3)
12354 | (uimm (opcode, 3, 3) << 2)
12355 | uimm (opcode, 1, 0));
12358 case 2: udiv32 (cpu); return;
12359 case 3: cpuiv32 (cpu); return;
12360 case 4: lslv32 (cpu); return;
12361 case 5: lsrv32 (cpu); return;
12362 case 6: asrv32 (cpu); return;
12363 case 7: rorv32 (cpu); return;
12364 case 10: udiv64 (cpu); return;
12365 case 11: cpuiv64 (cpu); return;
12366 case 12: lslv64 (cpu); return;
12367 case 13: lsrv64 (cpu); return;
12368 case 14: asrv64 (cpu); return;
12369 case 15: rorv64 (cpu); return;
12370 default: HALT_UNALLOC;
12377 /* 32 bit multiply and add. */
12379 madd32 (sim_cpu *cpu)
12381 unsigned rm = INSTR (20, 16);
12382 unsigned ra = INSTR (14, 10);
12383 unsigned rn = INSTR (9, 5);
12384 unsigned rd = INSTR (4, 0);
12386 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12387 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12388 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12389 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12392 /* 64 bit multiply and add. */
12394 madd64 (sim_cpu *cpu)
12396 unsigned rm = INSTR (20, 16);
12397 unsigned ra = INSTR (14, 10);
12398 unsigned rn = INSTR (9, 5);
12399 unsigned rd = INSTR (4, 0);
12401 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12402 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12403 + aarch64_get_reg_u64 (cpu, rn, NO_SP)
12404 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12407 /* 32 bit multiply and sub. */
12409 msub32 (sim_cpu *cpu)
12411 unsigned rm = INSTR (20, 16);
12412 unsigned ra = INSTR (14, 10);
12413 unsigned rn = INSTR (9, 5);
12414 unsigned rd = INSTR (4, 0);
12416 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12417 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12418 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12419 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12422 /* 64 bit multiply and sub. */
12424 msub64 (sim_cpu *cpu)
12426 unsigned rm = INSTR (20, 16);
12427 unsigned ra = INSTR (14, 10);
12428 unsigned rn = INSTR (9, 5);
12429 unsigned rd = INSTR (4, 0);
12431 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12432 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12433 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12434 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12437 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12439 smaddl (sim_cpu *cpu)
12441 unsigned rm = INSTR (20, 16);
12442 unsigned ra = INSTR (14, 10);
12443 unsigned rn = INSTR (9, 5);
12444 unsigned rd = INSTR (4, 0);
12446 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12447 obtain a 64 bit product. */
12448 aarch64_set_reg_s64
12450 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12451 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12452 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12455 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12457 smsubl (sim_cpu *cpu)
12459 unsigned rm = INSTR (20, 16);
12460 unsigned ra = INSTR (14, 10);
12461 unsigned rn = INSTR (9, 5);
12462 unsigned rd = INSTR (4, 0);
12464 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12465 obtain a 64 bit product. */
12466 aarch64_set_reg_s64
12468 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12469 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12470 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12473 /* Integer Multiply/Divide. */
12475 /* First some macros and a helper function. */
12476 /* Macros to test or access elements of 64 bit words. */
12478 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12479 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12480 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12481 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12482 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12483 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12485 /* Offset of sign bit in 64 bit signed integger. */
12486 #define SIGN_SHIFT_U64 63
12487 /* The sign bit itself -- also identifies the minimum negative int value. */
12488 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12489 /* Return true if a 64 bit signed int presented as an unsigned int is the
12490 most negative value. */
12491 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12492 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12493 int has its sign bit set to false. */
12494 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12495 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12496 an unsigned int has its sign bit set or not. */
12497 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12498 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12499 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12501 /* Multiply two 64 bit ints and return.
12502 the hi 64 bits of the 128 bit product. */
12505 mul64hi (uint64_t value1, uint64_t value2)
12507 uint64_t resultmid1;
12509 uint64_t value1_lo = lowWordToU64 (value1);
12510 uint64_t value1_hi = highWordToU64 (value1) ;
12511 uint64_t value2_lo = lowWordToU64 (value2);
12512 uint64_t value2_hi = highWordToU64 (value2);
12514 /* Cross-multiply and collect results. */
12516 uint64_t xproductlo = value1_lo * value2_lo;
12517 uint64_t xproductmid1 = value1_lo * value2_hi;
12518 uint64_t xproductmid2 = value1_hi * value2_lo;
12519 uint64_t xproducthi = value1_hi * value2_hi;
12520 uint64_t carry = 0;
12521 /* Start accumulating 64 bit results. */
12522 /* Drop bottom half of lowest cross-product. */
12523 uint64_t resultmid = xproductlo >> 32;
12524 /* Add in middle products. */
12525 resultmid = resultmid + xproductmid1;
12527 /* Check for overflow. */
12528 if (resultmid < xproductmid1)
12529 /* Carry over 1 into top cross-product. */
12532 resultmid1 = resultmid + xproductmid2;
12534 /* Check for overflow. */
12535 if (resultmid1 < xproductmid2)
12536 /* Carry over 1 into top cross-product. */
12539 /* Drop lowest 32 bits of middle cross-product. */
12540 result = resultmid1 >> 32;
12542 /* Add top cross-product plus and any carry. */
12543 result += xproducthi + carry;
12548 /* Signed multiply high, source, source2 :
12549 64 bit, dest <-- high 64-bit of result. */
12551 smulh (sim_cpu *cpu)
12555 unsigned rm = INSTR (20, 16);
12556 unsigned rn = INSTR (9, 5);
12557 unsigned rd = INSTR (4, 0);
12558 GReg ra = INSTR (14, 10);
12559 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12560 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12563 int64_t signum = 1;
12568 /* Convert to unsigned and use the unsigned mul64hi routine
12569 the fix the sign up afterwards. */
12590 uresult = mul64hi (uvalue1, uvalue2);
12594 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12597 /* Unsigned multiply add long -- source, source2 :
12598 32 bit, source3 : 64 bit. */
12600 umaddl (sim_cpu *cpu)
12602 unsigned rm = INSTR (20, 16);
12603 unsigned ra = INSTR (14, 10);
12604 unsigned rn = INSTR (9, 5);
12605 unsigned rd = INSTR (4, 0);
12607 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12608 obtain a 64 bit product. */
12609 aarch64_set_reg_u64
12611 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12612 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12613 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12616 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12618 umsubl (sim_cpu *cpu)
12620 unsigned rm = INSTR (20, 16);
12621 unsigned ra = INSTR (14, 10);
12622 unsigned rn = INSTR (9, 5);
12623 unsigned rd = INSTR (4, 0);
12625 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12626 obtain a 64 bit product. */
12627 aarch64_set_reg_u64
12629 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12630 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12631 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12634 /* Unsigned multiply high, source, source2 :
12635 64 bit, dest <-- high 64-bit of result. */
12637 umulh (sim_cpu *cpu)
12639 unsigned rm = INSTR (20, 16);
12640 unsigned rn = INSTR (9, 5);
12641 unsigned rd = INSTR (4, 0);
12642 GReg ra = INSTR (14, 10);
12647 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12648 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
12649 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12653 dexDataProc3Source (sim_cpu *cpu)
12655 /* assert instr[28,24] == 11011. */
12656 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
12657 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
12658 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
12659 instr[15] = o0 : 0/1 ==> ok
12660 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
12661 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
12662 0100 ==> SMULH, (64 bit only)
12663 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
12664 1100 ==> UMULH (64 bit only)
12668 uint32_t size = INSTR (31, 31);
12669 uint32_t op54 = INSTR (30, 29);
12670 uint32_t op31 = INSTR (23, 21);
12671 uint32_t o0 = INSTR (15, 15);
12688 dispatch = (op31 << 1) | o0;
12692 case 0: madd64 (cpu); return;
12693 case 1: msub64 (cpu); return;
12694 case 2: smaddl (cpu); return;
12695 case 3: smsubl (cpu); return;
12696 case 4: smulh (cpu); return;
12697 case 10: umaddl (cpu); return;
12698 case 11: umsubl (cpu); return;
12699 case 12: umulh (cpu); return;
12700 default: HALT_UNALLOC;
12705 dexDPReg (sim_cpu *cpu)
12707 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12708 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
12709 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
12710 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
12714 case DPREG_LOG_000:
12715 case DPREG_LOG_001:
12716 dexLogicalShiftedRegister (cpu); return;
12718 case DPREG_ADDSHF_010:
12719 dexAddSubtractShiftedRegister (cpu); return;
12721 case DPREG_ADDEXT_011:
12722 dexAddSubtractExtendedRegister (cpu); return;
12724 case DPREG_ADDCOND_100:
12726 /* This set bundles a variety of different operations. */
12728 /* 1) add/sub w carry. */
12729 uint32_t mask1 = 0x1FE00000U;
12730 uint32_t val1 = 0x1A000000U;
12731 /* 2) cond compare register/immediate. */
12732 uint32_t mask2 = 0x1FE00000U;
12733 uint32_t val2 = 0x1A400000U;
12734 /* 3) cond select. */
12735 uint32_t mask3 = 0x1FE00000U;
12736 uint32_t val3 = 0x1A800000U;
12737 /* 4) data proc 1/2 source. */
12738 uint32_t mask4 = 0x1FE00000U;
12739 uint32_t val4 = 0x1AC00000U;
12741 if ((aarch64_get_instr (cpu) & mask1) == val1)
12742 dexAddSubtractWithCarry (cpu);
12744 else if ((aarch64_get_instr (cpu) & mask2) == val2)
12747 else if ((aarch64_get_instr (cpu) & mask3) == val3)
12748 dexCondSelect (cpu);
12750 else if ((aarch64_get_instr (cpu) & mask4) == val4)
12752 /* Bit 30 is clear for data proc 2 source
12753 and set for data proc 1 source. */
12754 if (aarch64_get_instr (cpu) & (1U << 30))
12755 dexDataProc1Source (cpu);
12757 dexDataProc2Source (cpu);
12761 /* Should not reach here. */
12767 case DPREG_3SRC_110:
12768 dexDataProc3Source (cpu); return;
12770 case DPREG_UNALLOC_101:
12773 case DPREG_3SRC_111:
12774 dexDataProc3Source (cpu); return;
12777 /* Should never reach here. */
12782 /* Unconditional Branch immediate.
12783 Offset is a PC-relative byte offset in the range +/- 128MiB.
12784 The offset is assumed to be raw from the decode i.e. the
12785 simulator is expected to scale them from word offsets to byte. */
12787 /* Unconditional branch. */
12789 buc (sim_cpu *cpu, int32_t offset)
12791 aarch64_set_next_PC_by_offset (cpu, offset);
12794 static unsigned stack_depth = 0;
12796 /* Unconditional branch and link -- writes return PC to LR. */
12798 bl (sim_cpu *cpu, int32_t offset)
12800 aarch64_save_LR (cpu);
12801 aarch64_set_next_PC_by_offset (cpu, offset);
12803 if (TRACE_BRANCH_P (cpu))
12807 " %*scall %" PRIx64 " [%s]"
12808 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12809 stack_depth, " ", aarch64_get_next_PC (cpu),
12810 aarch64_get_func (aarch64_get_next_PC (cpu)),
12811 aarch64_get_reg_u64 (cpu, 0, NO_SP),
12812 aarch64_get_reg_u64 (cpu, 1, NO_SP),
12813 aarch64_get_reg_u64 (cpu, 2, NO_SP)
12818 /* Unconditional Branch register.
12819 Branch/return address is in source register. */
12821 /* Unconditional branch. */
12825 unsigned rn = INSTR (9, 5);
12826 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12829 /* Unconditional branch and link -- writes return PC to LR. */
12833 unsigned rn = INSTR (9, 5);
12835 /* The pseudo code in the spec says we update LR before fetching.
12836 the value from the rn. */
12837 aarch64_save_LR (cpu);
12838 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12840 if (TRACE_BRANCH_P (cpu))
12844 " %*scall %" PRIx64 " [%s]"
12845 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12846 stack_depth, " ", aarch64_get_next_PC (cpu),
12847 aarch64_get_func (aarch64_get_next_PC (cpu)),
12848 aarch64_get_reg_u64 (cpu, 0, NO_SP),
12849 aarch64_get_reg_u64 (cpu, 1, NO_SP),
12850 aarch64_get_reg_u64 (cpu, 2, NO_SP)
12855 /* Return -- assembler will default source to LR this is functionally
12856 equivalent to br but, presumably, unlike br it side effects the
12857 branch predictor. */
12861 unsigned rn = INSTR (9, 5);
12862 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12864 if (TRACE_BRANCH_P (cpu))
12867 " %*sreturn [result: %" PRIx64 "]",
12868 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
12873 /* NOP -- we implement this and call it from the decode in case we
12874 want to intercept it later. */
12881 /* Data synchronization barrier. */
12888 /* Data memory barrier. */
12895 /* Instruction synchronization barrier. */
12903 dexBranchImmediate (sim_cpu *cpu)
12905 /* assert instr[30,26] == 00101
12906 instr[31] ==> 0 == B, 1 == BL
12907 instr[25,0] == imm26 branch offset counted in words. */
12909 uint32_t top = INSTR (31, 31);
12910 /* We have a 26 byte signed word offset which we need to pass to the
12911 execute routine as a signed byte offset. */
12912 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
12920 /* Control Flow. */
12922 /* Conditional branch
12924 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
12925 a bit position in the range 0 .. 63
12927 cc is a CondCode enum value as pulled out of the decode
12929 N.B. any offset register (source) can only be Xn or Wn. */
12932 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
12934 /* the test returns TRUE if CC is met. */
12935 if (testConditionCode (cpu, cc))
12936 aarch64_set_next_PC_by_offset (cpu, offset);
12939 /* 32 bit branch on register non-zero. */
12941 cbnz32 (sim_cpu *cpu, int32_t offset)
12943 unsigned rt = INSTR (4, 0);
12945 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
12946 aarch64_set_next_PC_by_offset (cpu, offset);
12949 /* 64 bit branch on register zero. */
12951 cbnz (sim_cpu *cpu, int32_t offset)
12953 unsigned rt = INSTR (4, 0);
12955 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
12956 aarch64_set_next_PC_by_offset (cpu, offset);
12959 /* 32 bit branch on register non-zero. */
12961 cbz32 (sim_cpu *cpu, int32_t offset)
12963 unsigned rt = INSTR (4, 0);
12965 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
12966 aarch64_set_next_PC_by_offset (cpu, offset);
12969 /* 64 bit branch on register zero. */
12971 cbz (sim_cpu *cpu, int32_t offset)
12973 unsigned rt = INSTR (4, 0);
12975 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
12976 aarch64_set_next_PC_by_offset (cpu, offset);
12979 /* Branch on register bit test non-zero -- one size fits all. */
12981 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
12983 unsigned rt = INSTR (4, 0);
12985 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
12986 aarch64_set_next_PC_by_offset (cpu, offset);
12989 /* branch on register bit test zero -- one size fits all. */
12991 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
12993 unsigned rt = INSTR (4, 0);
12995 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
12996 aarch64_set_next_PC_by_offset (cpu, offset);
13000 dexCompareBranchImmediate (sim_cpu *cpu)
13002 /* instr[30,25] = 01 1010
13003 instr[31] = size : 0 ==> 32, 1 ==> 64
13004 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13005 instr[23,5] = simm19 branch offset counted in words
13008 uint32_t size = INSTR (31, 31);
13009 uint32_t op = INSTR (24, 24);
13010 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13015 cbz32 (cpu, offset);
13017 cbnz32 (cpu, offset);
13024 cbnz (cpu, offset);
13029 dexTestBranchImmediate (sim_cpu *cpu)
13031 /* instr[31] = b5 : bit 5 of test bit idx
13032 instr[30,25] = 01 1011
13033 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13034 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13035 instr[18,5] = simm14 : signed offset counted in words
13036 instr[4,0] = uimm5 */
13038 uint32_t pos = ((INSTR (31, 31) << 4) | INSTR (23, 19));
13039 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13041 NYI_assert (30, 25, 0x1b);
13043 if (INSTR (24, 24) == 0)
13044 tbz (cpu, pos, offset);
13046 tbnz (cpu, pos, offset);
13050 dexCondBranchImmediate (sim_cpu *cpu)
13052 /* instr[31,25] = 010 1010
13053 instr[24] = op1; op => 00 ==> B.cond
13054 instr[23,5] = simm19 : signed offset counted in words
13056 instr[3,0] = cond */
13059 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13061 NYI_assert (31, 25, 0x2a);
13066 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13068 bcc (cpu, offset, INSTR (3, 0));
13072 dexBranchRegister (sim_cpu *cpu)
13074 /* instr[31,25] = 110 1011
13075 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13076 instr[20,16] = op2 : must be 11111
13077 instr[15,10] = op3 : must be 000000
13078 instr[4,0] = op2 : must be 11111. */
13080 uint32_t op = INSTR (24, 21);
13081 uint32_t op2 = INSTR (20, 16);
13082 uint32_t op3 = INSTR (15, 10);
13083 uint32_t op4 = INSTR (4, 0);
13085 NYI_assert (31, 25, 0x6b);
13087 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13101 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13102 /* anything else is unallocated. */
13103 uint32_t rn = INSTR (4, 0);
13108 if (op == 4 || op == 5)
13115 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13116 but this may not be available. So instead we define the values we need
13118 #define AngelSVC_Reason_Open 0x01
13119 #define AngelSVC_Reason_Close 0x02
13120 #define AngelSVC_Reason_Write 0x05
13121 #define AngelSVC_Reason_Read 0x06
13122 #define AngelSVC_Reason_IsTTY 0x09
13123 #define AngelSVC_Reason_Seek 0x0A
13124 #define AngelSVC_Reason_FLen 0x0C
13125 #define AngelSVC_Reason_Remove 0x0E
13126 #define AngelSVC_Reason_Rename 0x0F
13127 #define AngelSVC_Reason_Clock 0x10
13128 #define AngelSVC_Reason_Time 0x11
13129 #define AngelSVC_Reason_System 0x12
13130 #define AngelSVC_Reason_Errno 0x13
13131 #define AngelSVC_Reason_GetCmdLine 0x15
13132 #define AngelSVC_Reason_HeapInfo 0x16
13133 #define AngelSVC_Reason_ReportException 0x18
13134 #define AngelSVC_Reason_Elapsed 0x30
13138 handle_halt (sim_cpu *cpu, uint32_t val)
13140 uint64_t result = 0;
13144 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13145 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13146 sim_stopped, SIM_SIGTRAP);
13149 /* We have encountered an Angel SVC call. See if we can process it. */
13150 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13152 case AngelSVC_Reason_HeapInfo:
13154 /* Get the values. */
13155 uint64_t stack_top = aarch64_get_stack_start (cpu);
13156 uint64_t heap_base = aarch64_get_heap_start (cpu);
13158 /* Get the pointer */
13159 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13160 ptr = aarch64_get_mem_u64 (cpu, ptr);
13162 /* Fill in the memory block. */
13163 /* Start addr of heap. */
13164 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13165 /* End addr of heap. */
13166 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13167 /* Lowest stack addr. */
13168 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13169 /* Initial stack addr. */
13170 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13172 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13176 case AngelSVC_Reason_Open:
13178 /* Get the pointer */
13179 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13180 /* FIXME: For now we just assume that we will only be asked
13181 to open the standard file descriptors. */
13185 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13189 case AngelSVC_Reason_Close:
13191 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13192 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13197 case AngelSVC_Reason_Errno:
13199 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13202 case AngelSVC_Reason_Clock:
13204 #ifdef CLOCKS_PER_SEC
13205 (CLOCKS_PER_SEC >= 100)
13206 ? (clock () / (CLOCKS_PER_SEC / 100))
13207 : ((clock () * 100) / CLOCKS_PER_SEC)
13209 /* Presume unix... clock() returns microseconds. */
13213 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13216 case AngelSVC_Reason_GetCmdLine:
13218 /* Get the pointer */
13219 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13220 ptr = aarch64_get_mem_u64 (cpu, ptr);
13222 /* FIXME: No command line for now. */
13223 aarch64_set_mem_u64 (cpu, ptr, 0);
13224 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13228 case AngelSVC_Reason_IsTTY:
13230 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13233 case AngelSVC_Reason_Write:
13235 /* Get the pointer */
13236 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13237 /* Get the write control block. */
13238 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13239 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13240 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13242 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13243 PRIx64 " on descriptor %" PRIx64,
13248 TRACE_SYSCALL (cpu,
13249 " AngelSVC: Write: Suspiciously long write: %ld",
13251 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13252 sim_stopped, SIM_SIGBUS);
13256 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13260 TRACE (cpu, 0, "\n");
13261 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13262 (int) len, aarch64_get_mem_ptr (cpu, buf));
13263 TRACE (cpu, 0, "\n");
13267 TRACE_SYSCALL (cpu,
13268 " AngelSVC: Write: Unexpected file handle: %d",
13270 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13271 sim_stopped, SIM_SIGABRT);
13276 case AngelSVC_Reason_ReportException:
13278 /* Get the pointer */
13279 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13280 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13281 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13282 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13284 TRACE_SYSCALL (cpu,
13285 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13288 if (type == 0x20026)
13289 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13290 sim_exited, state);
13292 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13293 sim_stopped, SIM_SIGINT);
13297 case AngelSVC_Reason_Read:
13298 case AngelSVC_Reason_FLen:
13299 case AngelSVC_Reason_Seek:
13300 case AngelSVC_Reason_Remove:
13301 case AngelSVC_Reason_Time:
13302 case AngelSVC_Reason_System:
13303 case AngelSVC_Reason_Rename:
13304 case AngelSVC_Reason_Elapsed:
13306 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13307 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13308 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13309 sim_stopped, SIM_SIGTRAP);
13312 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13316 dexExcpnGen (sim_cpu *cpu)
13318 /* instr[31:24] = 11010100
13319 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13320 010 ==> HLT, 101 ==> DBG GEN EXCPN
13321 instr[20,5] = imm16
13322 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13323 instr[1,0] = LL : discriminates opc */
13325 uint32_t opc = INSTR (23, 21);
13326 uint32_t imm16 = INSTR (20, 5);
13327 uint32_t opc2 = INSTR (4, 2);
13330 NYI_assert (31, 24, 0xd4);
13337 /* We only implement HLT and BRK for now. */
13338 if (opc == 1 && LL == 0)
13340 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13341 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13342 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13345 if (opc == 2 && LL == 0)
13346 handle_halt (cpu, imm16);
13348 else if (opc == 0 || opc == 5)
13355 /* Stub for accessing system registers. */
13358 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13359 unsigned crm, unsigned op2)
13361 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13362 /* DCZID_EL0 - the Data Cache Zero ID register.
13363 We do not support DC ZVA at the moment, so
13364 we return a value with the disable bit set.
13365 We implement support for the DCZID register since
13366 it is used by the C library's memset function. */
13367 return ((uint64_t) 1) << 4;
13369 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13370 /* Cache Type Register. */
13371 return 0x80008000UL;
13373 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13374 /* TPIDR_EL0 - thread pointer id. */
13375 return aarch64_get_thread_id (cpu);
13377 if (op1 == 3 && crm == 4 && op2 == 0)
13378 return aarch64_get_FPCR (cpu);
13380 if (op1 == 3 && crm == 4 && op2 == 1)
13381 return aarch64_get_FPSR (cpu);
13383 else if (op1 == 3 && crm == 2 && op2 == 0)
13384 return aarch64_get_CPSR (cpu);
13390 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13391 unsigned crm, unsigned op2, uint64_t val)
13393 if (op1 == 3 && crm == 4 && op2 == 0)
13394 aarch64_set_FPCR (cpu, val);
13396 else if (op1 == 3 && crm == 4 && op2 == 1)
13397 aarch64_set_FPSR (cpu, val);
13399 else if (op1 == 3 && crm == 2 && op2 == 0)
13400 aarch64_set_CPSR (cpu, val);
13407 do_mrs (sim_cpu *cpu)
13409 /* instr[31:20] = 1101 0101 0001 1
13416 unsigned sys_op0 = INSTR (19, 19) + 2;
13417 unsigned sys_op1 = INSTR (18, 16);
13418 unsigned sys_crn = INSTR (15, 12);
13419 unsigned sys_crm = INSTR (11, 8);
13420 unsigned sys_op2 = INSTR (7, 5);
13421 unsigned rt = INSTR (4, 0);
13423 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13424 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13428 do_MSR_immediate (sim_cpu *cpu)
13430 /* instr[31:19] = 1101 0101 0000 0
13432 instr[15,12] = 0100
13435 instr[4,0] = 1 1111 */
13437 unsigned op1 = INSTR (18, 16);
13438 /*unsigned crm = INSTR (11, 8);*/
13439 unsigned op2 = INSTR (7, 5);
13441 NYI_assert (31, 19, 0x1AA0);
13442 NYI_assert (15, 12, 0x4);
13443 NYI_assert (4, 0, 0x1F);
13448 HALT_NYI; /* set SPSel. */
13455 HALT_NYI; /* set DAIFset. */
13457 HALT_NYI; /* set DAIFclr. */
13466 do_MSR_reg (sim_cpu *cpu)
13468 /* instr[31:20] = 1101 0101 0001
13476 unsigned sys_op0 = INSTR (19, 19) + 2;
13477 unsigned sys_op1 = INSTR (18, 16);
13478 unsigned sys_crn = INSTR (15, 12);
13479 unsigned sys_crm = INSTR (11, 8);
13480 unsigned sys_op2 = INSTR (7, 5);
13481 unsigned rt = INSTR (4, 0);
13483 NYI_assert (31, 20, 0xD51);
13485 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13486 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13490 do_SYS (sim_cpu *cpu)
13492 /* instr[31,19] = 1101 0101 0000 1
13498 NYI_assert (31, 19, 0x1AA1);
13500 /* FIXME: For now we just silently accept system ops. */
13504 dexSystem (sim_cpu *cpu)
13506 /* instr[31:22] = 1101 01010 0
13513 instr[4,0] = uimm5 */
13515 /* We are interested in HINT, DSB, DMB and ISB
13517 Hint #0 encodes NOOP (this is the only hint we care about)
13518 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13519 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13521 DSB, DMB, ISB are data store barrier, data memory barrier and
13522 instruction store barrier, respectively, where
13524 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13525 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13526 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13527 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13528 10 ==> InerShareable, 11 ==> FullSystem
13529 types : 01 ==> Reads, 10 ==> Writes,
13530 11 ==> All, 00 ==> All (domain == FullSystem). */
13532 unsigned rt = INSTR (4, 0);
13534 NYI_assert (31, 22, 0x354);
13536 switch (INSTR (21, 12))
13541 /* NOP has CRm != 0000 OR. */
13542 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13543 uint32_t crm = INSTR (11, 8);
13544 uint32_t op2 = INSTR (7, 5);
13546 if (crm != 0 || (op2 == 0 || op2 > 5))
13548 /* Actually call nop method so we can reimplement it later. */
13557 uint32_t op2 = INSTR (7, 5);
13562 case 4: dsb (cpu); return;
13563 case 5: dmb (cpu); return;
13564 case 6: isb (cpu); return;
13565 default: HALT_UNALLOC;
13576 do_SYS (cpu); /* DC is an alias of SYS. */
13580 if (INSTR (21, 20) == 0x1)
13582 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13583 do_MSR_immediate (cpu);
13591 dexBr (sim_cpu *cpu)
13593 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13594 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13595 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13596 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13601 return dexBranchImmediate (cpu);
13603 case BR_IMMCMP_001:
13604 /* Compare has bit 25 clear while test has it set. */
13605 if (!INSTR (25, 25))
13606 dexCompareBranchImmediate (cpu);
13608 dexTestBranchImmediate (cpu);
13611 case BR_IMMCOND_010:
13612 /* This is a conditional branch if bit 25 is clear otherwise
13614 if (!INSTR (25, 25))
13615 dexCondBranchImmediate (cpu);
13620 case BR_UNALLOC_011:
13624 dexBranchImmediate (cpu);
13627 case BR_IMMCMP_101:
13628 /* Compare has bit 25 clear while test has it set. */
13629 if (!INSTR (25, 25))
13630 dexCompareBranchImmediate (cpu);
13632 dexTestBranchImmediate (cpu);
13636 /* Unconditional branch reg has bit 25 set. */
13637 if (INSTR (25, 25))
13638 dexBranchRegister (cpu);
13640 /* This includes both Excpn Gen, System and unalloc operations.
13641 We need to decode the Excpn Gen operation BRK so we can plant
13642 debugger entry points.
13643 Excpn Gen operations have instr [24] = 0.
13644 we need to decode at least one of the System operations NOP
13645 which is an alias for HINT #0.
13646 System operations have instr [24,22] = 100. */
13647 else if (INSTR (24, 24) == 0)
13650 else if (INSTR (24, 22) == 4)
13658 case BR_UNALLOC_111:
13662 /* Should never reach here. */
13668 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
13670 /* We need to check if gdb wants an in here. */
13671 /* checkBreak (cpu);. */
13673 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
13677 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
13678 case GROUP_LDST_0100: dexLdSt (cpu); break;
13679 case GROUP_DPREG_0101: dexDPReg (cpu); break;
13680 case GROUP_LDST_0110: dexLdSt (cpu); break;
13681 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
13682 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
13683 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
13684 case GROUP_BREXSYS_1010: dexBr (cpu); break;
13685 case GROUP_BREXSYS_1011: dexBr (cpu); break;
13686 case GROUP_LDST_1100: dexLdSt (cpu); break;
13687 case GROUP_DPREG_1101: dexDPReg (cpu); break;
13688 case GROUP_LDST_1110: dexLdSt (cpu); break;
13689 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
13691 case GROUP_UNALLOC_0001:
13692 case GROUP_UNALLOC_0010:
13693 case GROUP_UNALLOC_0011:
13697 /* Should never reach here. */
13703 aarch64_step (sim_cpu *cpu)
13705 uint64_t pc = aarch64_get_PC (cpu);
13707 if (pc == TOP_LEVEL_RETURN_PC)
13710 aarch64_set_next_PC (cpu, pc + 4);
13711 aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc);
13713 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
13714 aarch64_get_instr (cpu));
13715 TRACE_DISASM (cpu, pc);
13717 aarch64_decode_and_execute (cpu, pc);
13723 aarch64_run (SIM_DESC sd)
13725 sim_cpu *cpu = STATE_CPU (sd, 0);
13727 while (aarch64_step (cpu))
13728 aarch64_update_PC (cpu);
13730 sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
13731 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13735 aarch64_init (sim_cpu *cpu, uint64_t pc)
13737 uint64_t sp = aarch64_get_stack_start (cpu);
13739 /* Install SP, FP and PC and set LR to -20
13740 so we can detect a top-level return. */
13741 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
13742 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
13743 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
13744 aarch64_set_next_PC (cpu, pc);
13745 aarch64_update_PC (cpu);
13746 aarch64_init_LIT_table ();