1 /* simulator.c -- Interface for the AArch64 simulator.
3 Copyright (C) 2015-2016 Free Software Foundation, Inc.
5 Contributed by Red Hat.
7 This file is part of GDB.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include <sys/types.h>
31 #include "simulator.h"
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
45 #define HALT_UNALLOC \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
68 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: "); \
69 trace_disasm (CPU_STATE (cpu), cpu, aarch64_get_PC (cpu)); \
71 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
72 sim_stopped, SIM_SIGABRT); \
76 #define NYI_assert(HI, LO, EXPECTED) \
79 if (INSTR ((HI), (LO)) != (EXPECTED)) \
84 /* Helper functions used by expandLogicalImmediate. */
86 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
87 static inline uint64_t
90 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
93 /* result<0> to val<N> */
94 static inline uint64_t
95 pickbit (uint64_t val, int N)
97 return pickbits64 (val, N, N);
101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
107 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
108 (in other words, right rotated by R), then replicated. */
112 mask = 0xffffffffffffffffull;
118 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
119 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
120 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
121 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
122 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
125 mask = (1ull << simd_size) - 1;
126 /* Top bits are IGNORED. */
130 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
131 if (S == simd_size - 1)
134 /* S+1 consecutive bits to 1. */
135 /* NOTE: S can't be 63 due to detection above. */
136 imm = (1ull << (S + 1)) - 1;
138 /* Rotate to the left by simd_size - R. */
140 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
142 /* Replicate the value according to SIMD size. */
145 case 2: imm = (imm << 2) | imm;
146 case 4: imm = (imm << 4) | imm;
147 case 8: imm = (imm << 8) | imm;
148 case 16: imm = (imm << 16) | imm;
149 case 32: imm = (imm << 32) | imm;
157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
158 for each possible combination i.e. 13 bits worth of int entries. */
159 #define LI_TABLE_SIZE (1 << 13)
160 static uint64_t LITable[LI_TABLE_SIZE];
163 aarch64_init_LIT_table (void)
167 for (index = 0; index < LI_TABLE_SIZE; index++)
169 uint32_t N = uimm (index, 12, 12);
170 uint32_t immr = uimm (index, 11, 6);
171 uint32_t imms = uimm (index, 5, 0);
173 LITable [index] = expand_logical_immediate (imms, immr, N);
178 dexNotify (sim_cpu *cpu)
180 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
181 2 ==> exit Java, 3 ==> start next bytecode. */
182 uint32_t type = INSTR (14, 0);
184 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
189 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
190 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
194 aarch64_get_reg_u64 (cpu, R22, 0)); */
197 /* aarch64_notifyMethodExit (); */
200 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
201 aarch64_get_reg_u64 (cpu, R22, 0)); */
206 /* secondary decode within top level groups */
209 dexPseudo (sim_cpu *cpu)
211 /* assert instr[28,27] = 00
213 We provide 2 pseudo instructions:
215 HALT stops execution of the simulator causing an immediate
216 return to the x86 code which entered it.
218 CALLOUT initiates recursive entry into x86 code. A register
219 argument holds the address of the x86 routine. Immediate
220 values in the instruction identify the number of general
221 purpose and floating point register arguments to be passed
222 and the type of any value to be returned. */
224 uint32_t PSEUDO_HALT = 0xE0000000U;
225 uint32_t PSEUDO_CALLOUT = 0x00018000U;
226 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
227 uint32_t PSEUDO_NOTIFY = 0x00014000U;
230 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
232 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
233 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
234 sim_stopped, SIM_SIGTRAP);
237 dispatch = INSTR (31, 15);
239 /* We do not handle callouts at the moment. */
240 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
242 TRACE_EVENTS (cpu, " Callout");
243 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
244 sim_stopped, SIM_SIGABRT);
247 else if (dispatch == PSEUDO_NOTIFY)
254 /* Load-store single register (unscaled offset)
255 These instructions employ a base register plus an unscaled signed
258 N.B. the base register (source) can be Xn or SP. all other
259 registers may not be SP. */
261 /* 32 bit load 32 bit unscaled signed 9 bit. */
263 ldur32 (sim_cpu *cpu, int32_t offset)
265 unsigned rn = INSTR (9, 5);
266 unsigned rt = INSTR (4, 0);
268 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
269 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
273 /* 64 bit load 64 bit unscaled signed 9 bit. */
275 ldur64 (sim_cpu *cpu, int32_t offset)
277 unsigned rn = INSTR (9, 5);
278 unsigned rt = INSTR (4, 0);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
287 ldurb32 (sim_cpu *cpu, int32_t offset)
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
292 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
293 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
297 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 ldursb32 (sim_cpu *cpu, int32_t offset)
301 unsigned rn = INSTR (9, 5);
302 unsigned rt = INSTR (4, 0);
304 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
305 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
309 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
311 ldursb64 (sim_cpu *cpu, int32_t offset)
313 unsigned rn = INSTR (9, 5);
314 unsigned rt = INSTR (4, 0);
316 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
317 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 /* 32 bit load zero-extended short unscaled signed 9 bit */
323 ldurh32 (sim_cpu *cpu, int32_t offset)
325 unsigned rn = INSTR (9, 5);
326 unsigned rd = INSTR (4, 0);
328 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
329 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
333 /* 32 bit load sign-extended short unscaled signed 9 bit */
335 ldursh32 (sim_cpu *cpu, int32_t offset)
337 unsigned rn = INSTR (9, 5);
338 unsigned rd = INSTR (4, 0);
340 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
341 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
345 /* 64 bit load sign-extended short unscaled signed 9 bit */
347 ldursh64 (sim_cpu *cpu, int32_t offset)
349 unsigned rn = INSTR (9, 5);
350 unsigned rt = INSTR (4, 0);
352 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
353 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
357 /* 64 bit load sign-extended word unscaled signed 9 bit */
359 ldursw (sim_cpu *cpu, int32_t offset)
361 unsigned rn = INSTR (9, 5);
362 unsigned rd = INSTR (4, 0);
364 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
365 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
369 /* N.B. with stores the value in source is written to the address
370 identified by source2 modified by offset. */
372 /* 32 bit store 32 bit unscaled signed 9 bit. */
374 stur32 (sim_cpu *cpu, int32_t offset)
376 unsigned rn = INSTR (9, 5);
377 unsigned rd = INSTR (4, 0);
379 aarch64_set_mem_u32 (cpu,
380 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
381 aarch64_get_reg_u32 (cpu, rd, NO_SP));
384 /* 64 bit store 64 bit unscaled signed 9 bit */
386 stur64 (sim_cpu *cpu, int32_t offset)
388 unsigned rn = INSTR (9, 5);
389 unsigned rd = INSTR (4, 0);
391 aarch64_set_mem_u64 (cpu,
392 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
393 aarch64_get_reg_u64 (cpu, rd, NO_SP));
396 /* 32 bit store byte unscaled signed 9 bit */
398 sturb (sim_cpu *cpu, int32_t offset)
400 unsigned rn = INSTR (9, 5);
401 unsigned rd = INSTR (4, 0);
403 aarch64_set_mem_u8 (cpu,
404 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
405 aarch64_get_reg_u8 (cpu, rd, NO_SP));
408 /* 32 bit store short unscaled signed 9 bit */
410 sturh (sim_cpu *cpu, int32_t offset)
412 unsigned rn = INSTR (9, 5);
413 unsigned rd = INSTR (4, 0);
415 aarch64_set_mem_u16 (cpu,
416 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
417 aarch64_get_reg_u16 (cpu, rd, NO_SP));
420 /* Load single register pc-relative label
421 Offset is a signed 19 bit immediate count in words
424 /* 32 bit pc-relative load */
426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
428 unsigned rd = INSTR (4, 0);
430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
432 (cpu, aarch64_get_PC (cpu) + offset * 4));
435 /* 64 bit pc-relative load */
437 ldr_pcrel (sim_cpu *cpu, int32_t offset)
439 unsigned rd = INSTR (4, 0);
441 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 (cpu, aarch64_get_PC (cpu) + offset * 4));
446 /* sign extended 32 bit pc-relative load */
448 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
450 unsigned rd = INSTR (4, 0);
452 aarch64_set_reg_u64 (cpu, rd, NO_SP,
454 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 /* float pc-relative load */
459 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
461 unsigned int rd = INSTR (4, 0);
463 aarch64_set_vec_u32 (cpu, rd, 0,
465 (cpu, aarch64_get_PC (cpu) + offset * 4));
468 /* double pc-relative load */
470 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
472 unsigned int st = INSTR (4, 0);
474 aarch64_set_vec_u64 (cpu, st, 0,
476 (cpu, aarch64_get_PC (cpu) + offset * 4));
479 /* long double pc-relative load. */
481 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
483 unsigned int st = INSTR (4, 0);
484 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
487 aarch64_get_mem_long_double (cpu, addr, & a);
488 aarch64_set_FP_long_double (cpu, st, a);
491 /* This can be used to scale an offset by applying
492 the requisite shift. the second argument is either
495 #define SCALE(_offset, _elementSize) \
496 ((_offset) << ScaleShift ## _elementSize)
498 /* This can be used to optionally scale a register derived offset
499 by applying the requisite shift as indicated by the Scaling
500 argument. the second argument is either Byte, Short, Word
501 or Long. The third argument is either Scaled or Unscaled.
502 N.B. when _Scaling is Scaled the shift gets ANDed with
503 all 1s while when it is Unscaled it gets ANDed with 0. */
505 #define OPT_SCALE(_offset, _elementType, _Scaling) \
506 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
508 /* This can be used to zero or sign extend a 32 bit register derived
509 value to a 64 bit value. the first argument must be the value as
510 a uint32_t and the second must be either UXTW or SXTW. The result
511 is returned as an int64_t. */
513 static inline int64_t
514 extend (uint32_t value, Extension extension)
522 /* A branchless variant of this ought to be possible. */
523 if (extension == UXTW || extension == NoExtension)
530 /* Scalar Floating Point
532 FP load/store single register (4 addressing modes)
534 N.B. the base register (source) can be the stack pointer.
535 The secondary source register (source2) can only be an Xn register. */
537 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
539 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
541 unsigned rn = INSTR (9, 5);
542 unsigned st = INSTR (4, 0);
543 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
548 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
552 if (wb != NoWriteBack)
553 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
556 /* Load 8 bit with unsigned 12 bit offset. */
558 fldrb_abs (sim_cpu *cpu, uint32_t offset)
560 unsigned rd = INSTR (4, 0);
561 unsigned rn = INSTR (9, 5);
562 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
564 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
567 /* Load 16 bit scaled unsigned 12 bit. */
569 fldrh_abs (sim_cpu *cpu, uint32_t offset)
571 unsigned rd = INSTR (4, 0);
572 unsigned rn = INSTR (9, 5);
573 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
575 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
578 /* Load 32 bit scaled unsigned 12 bit. */
580 fldrs_abs (sim_cpu *cpu, uint32_t offset)
582 unsigned rd = INSTR (4, 0);
583 unsigned rn = INSTR (9, 5);
584 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
586 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
589 /* Load 64 bit scaled unsigned 12 bit. */
591 fldrd_abs (sim_cpu *cpu, uint32_t offset)
593 unsigned rd = INSTR (4, 0);
594 unsigned rn = INSTR (9, 5);
595 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
597 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
600 /* Load 128 bit scaled unsigned 12 bit. */
602 fldrq_abs (sim_cpu *cpu, uint32_t offset)
604 unsigned rd = INSTR (4, 0);
605 unsigned rn = INSTR (9, 5);
606 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
608 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
609 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
612 /* Load 32 bit scaled or unscaled zero- or sign-extended
613 32-bit register offset. */
615 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
617 unsigned rm = INSTR (20, 16);
618 unsigned rn = INSTR (9, 5);
619 unsigned st = INSTR (4, 0);
620 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
621 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
622 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
624 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
625 (cpu, address + displacement));
628 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
630 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
632 unsigned rn = INSTR (9, 5);
633 unsigned st = INSTR (4, 0);
634 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
639 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
644 if (wb != NoWriteBack)
645 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
648 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
650 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
652 unsigned rm = INSTR (20, 16);
653 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
654 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
656 fldrd_wb (cpu, displacement, NoWriteBack);
659 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
661 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
664 unsigned rn = INSTR (9, 5);
665 unsigned st = INSTR (4, 0);
666 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
671 aarch64_get_mem_long_double (cpu, address, & a);
672 aarch64_set_FP_long_double (cpu, st, a);
677 if (wb != NoWriteBack)
678 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
681 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
683 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
685 unsigned rm = INSTR (20, 16);
686 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
687 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
689 fldrq_wb (cpu, displacement, NoWriteBack);
694 load-store single register
695 There are four addressing modes available here which all employ a
696 64 bit source (base) register.
698 N.B. the base register (source) can be the stack pointer.
699 The secondary source register (source2)can only be an Xn register.
701 Scaled, 12-bit, unsigned immediate offset, without pre- and
703 Unscaled, 9-bit, signed immediate offset with pre- or post-index
705 scaled or unscaled 64-bit register offset.
706 scaled or unscaled 32-bit extended register offset.
708 All offsets are assumed to be raw from the decode i.e. the
709 simulator is expected to adjust scaled offsets based on the
710 accessed data size with register or extended register offset
711 versions the same applies except that in the latter case the
712 operation may also require a sign extend.
714 A separate method is provided for each possible addressing mode. */
716 /* 32 bit load 32 bit scaled unsigned 12 bit */
718 ldr32_abs (sim_cpu *cpu, uint32_t offset)
720 unsigned rn = INSTR (9, 5);
721 unsigned rt = INSTR (4, 0);
723 /* The target register may not be SP but the source may be. */
724 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
725 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
726 + SCALE (offset, 32)));
729 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
731 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
733 unsigned rn = INSTR (9, 5);
734 unsigned rt = INSTR (4, 0);
737 if (rn == rt && wb != NoWriteBack)
740 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
745 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
750 if (wb != NoWriteBack)
751 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
754 /* 32 bit load 32 bit scaled or unscaled
755 zero- or sign-extended 32-bit register offset */
757 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
759 unsigned rm = INSTR (20, 16);
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 /* rn may reference SP, rm and rt must reference ZR */
764 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
765 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
766 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
768 aarch64_set_reg_u64 (cpu, rt, NO_SP,
769 aarch64_get_mem_u32 (cpu, address + displacement));
772 /* 64 bit load 64 bit scaled unsigned 12 bit */
774 ldr_abs (sim_cpu *cpu, uint32_t offset)
776 unsigned rn = INSTR (9, 5);
777 unsigned rt = INSTR (4, 0);
779 /* The target register may not be SP but the source may be. */
780 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
781 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
782 + SCALE (offset, 64)));
785 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
787 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
789 unsigned rn = INSTR (9, 5);
790 unsigned rt = INSTR (4, 0);
793 if (rn == rt && wb != NoWriteBack)
796 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
801 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
806 if (wb != NoWriteBack)
807 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
810 /* 64 bit load 64 bit scaled or unscaled zero-
811 or sign-extended 32-bit register offset. */
813 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
815 unsigned rm = INSTR (20, 16);
816 unsigned rn = INSTR (9, 5);
817 unsigned rt = INSTR (4, 0);
818 /* rn may reference SP, rm and rt must reference ZR */
820 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
821 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
822 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
824 aarch64_set_reg_u64 (cpu, rt, NO_SP,
825 aarch64_get_mem_u64 (cpu, address + displacement));
828 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
830 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
832 unsigned rn = INSTR (9, 5);
833 unsigned rt = INSTR (4, 0);
835 /* The target register may not be SP but the source may be
836 there is no scaling required for a byte load. */
837 aarch64_set_reg_u64 (cpu, rt, NO_SP,
839 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
842 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
844 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
846 unsigned rn = INSTR (9, 5);
847 unsigned rt = INSTR (4, 0);
850 if (rn == rt && wb != NoWriteBack)
853 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
858 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
863 if (wb != NoWriteBack)
864 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
867 /* 32 bit load zero-extended byte scaled or unscaled zero-
868 or sign-extended 32-bit register offset. */
870 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
872 unsigned rm = INSTR (20, 16);
873 unsigned rn = INSTR (9, 5);
874 unsigned rt = INSTR (4, 0);
875 /* rn may reference SP, rm and rt must reference ZR */
877 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
878 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
881 /* There is no scaling required for a byte load. */
882 aarch64_set_reg_u64 (cpu, rt, NO_SP,
883 aarch64_get_mem_u8 (cpu, address + displacement));
886 /* 64 bit load sign-extended byte unscaled signed 9 bit
887 with pre- or post-writeback. */
889 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
891 unsigned rn = INSTR (9, 5);
892 unsigned rt = INSTR (4, 0);
895 if (rn == rt && wb != NoWriteBack)
898 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
903 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s8 (cpu, address));
908 if (wb != NoWriteBack)
909 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
912 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
914 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
916 ldrsb_wb (cpu, offset, NoWriteBack);
919 /* 64 bit load sign-extended byte scaled or unscaled zero-
920 or sign-extended 32-bit register offset. */
922 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
924 unsigned rm = INSTR (20, 16);
925 unsigned rn = INSTR (9, 5);
926 unsigned rt = INSTR (4, 0);
927 /* rn may reference SP, rm and rt must reference ZR */
929 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
930 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
932 /* There is no scaling required for a byte load. */
933 aarch64_set_reg_u64 (cpu, rt, NO_SP,
934 aarch64_get_mem_s8 (cpu, address + displacement));
937 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
939 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
941 unsigned rn = INSTR (9, 5);
942 unsigned rt = INSTR (4, 0);
944 /* The target register may not be SP but the source may be. */
945 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16
946 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
947 + SCALE (offset, 16)));
950 /* 32 bit load zero-extended short unscaled signed 9 bit
951 with pre- or post-writeback. */
953 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
955 unsigned rn = INSTR (9, 5);
956 unsigned rt = INSTR (4, 0);
959 if (rn == rt && wb != NoWriteBack)
962 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
967 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
972 if (wb != NoWriteBack)
973 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
976 /* 32 bit load zero-extended short scaled or unscaled zero-
977 or sign-extended 32-bit register offset. */
979 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
981 unsigned rm = INSTR (20, 16);
982 unsigned rn = INSTR (9, 5);
983 unsigned rt = INSTR (4, 0);
984 /* rn may reference SP, rm and rt must reference ZR */
986 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
987 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
988 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
990 aarch64_set_reg_u64 (cpu, rt, NO_SP,
991 aarch64_get_mem_u16 (cpu, address + displacement));
994 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
996 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
998 unsigned rn = INSTR (9, 5);
999 unsigned rt = INSTR (4, 0);
1001 /* The target register may not be SP but the source may be. */
1002 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s16
1004 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1005 + SCALE (offset, 16)));
1008 /* 32 bit load sign-extended short unscaled signed 9 bit
1009 with pre- or post-writeback. */
1011 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1013 unsigned rn = INSTR (9, 5);
1014 unsigned rt = INSTR (4, 0);
1017 if (rn == rt && wb != NoWriteBack)
1020 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1025 aarch64_set_reg_u64 (cpu, rt, NO_SP,
1026 (uint32_t) aarch64_get_mem_s16 (cpu, address));
1031 if (wb != NoWriteBack)
1032 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1035 /* 32 bit load sign-extended short scaled or unscaled zero-
1036 or sign-extended 32-bit register offset. */
1038 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1040 unsigned rm = INSTR (20, 16);
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 /* rn may reference SP, rm and rt must reference ZR */
1045 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1046 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1047 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1049 aarch64_set_reg_u64 (cpu, rt, NO_SP,
1050 (uint32_t) aarch64_get_mem_s16
1051 (cpu, address + displacement));
1054 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1056 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1058 unsigned rn = INSTR (9, 5);
1059 unsigned rt = INSTR (4, 0);
1061 /* The target register may not be SP but the source may be. */
1062 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16
1063 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1064 + SCALE (offset, 16)));
1067 /* 64 bit load sign-extended short unscaled signed 9 bit
1068 with pre- or post-writeback. */
1070 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1072 unsigned rn = INSTR (9, 5);
1073 unsigned rt = INSTR (4, 0);
1076 if (rn == rt && wb != NoWriteBack)
1079 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1084 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16 (cpu, address));
1089 if (wb != NoWriteBack)
1090 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1093 /* 64 bit load sign-extended short scaled or unscaled zero-
1094 or sign-extended 32-bit register offset. */
1096 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1098 unsigned rm = INSTR (20, 16);
1099 unsigned rn = INSTR (9, 5);
1100 unsigned rt = INSTR (4, 0);
1101 /* rn may reference SP, rm and rt must reference ZR */
1103 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1104 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1105 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1107 aarch64_set_reg_u64 (cpu, rt, NO_SP,
1108 aarch64_get_mem_s16 (cpu, address + displacement));
1111 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1113 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1115 unsigned rn = INSTR (9, 5);
1116 unsigned rt = INSTR (4, 0);
1118 /* The target register may not be SP but the source may be. */
1119 return aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32
1120 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1121 + SCALE (offset, 32)));
1124 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1125 with pre- or post-writeback. */
1127 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1129 unsigned rn = INSTR (9, 5);
1130 unsigned rt = INSTR (4, 0);
1133 if (rn == rt && wb != NoWriteBack)
1136 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1141 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1146 if (wb != NoWriteBack)
1147 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1150 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1151 or sign-extended 32-bit register offset. */
1153 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1155 unsigned rm = INSTR (20, 16);
1156 unsigned rn = INSTR (9, 5);
1157 unsigned rt = INSTR (4, 0);
1158 /* rn may reference SP, rm and rt must reference ZR */
1160 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1161 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1162 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1164 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1165 aarch64_get_mem_s32 (cpu, address + displacement));
1168 /* N.B. with stores the value in source is written to the
1169 address identified by source2 modified by source3/offset. */
1171 /* 32 bit store scaled unsigned 12 bit. */
1173 str32_abs (sim_cpu *cpu, uint32_t offset)
1175 unsigned rn = INSTR (9, 5);
1176 unsigned rt = INSTR (4, 0);
1178 /* The target register may not be SP but the source may be. */
1179 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1180 + SCALE (offset, 32)),
1181 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1184 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1186 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1188 unsigned rn = INSTR (9, 5);
1189 unsigned rt = INSTR (4, 0);
1192 if (rn == rt && wb != NoWriteBack)
1195 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1199 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1204 if (wb != NoWriteBack)
1205 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1208 /* 32 bit store scaled or unscaled zero- or
1209 sign-extended 32-bit register offset. */
1211 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1213 unsigned rm = INSTR (20, 16);
1214 unsigned rn = INSTR (9, 5);
1215 unsigned rt = INSTR (4, 0);
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1221 aarch64_set_mem_u32 (cpu, address + displacement,
1222 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1225 /* 64 bit store scaled unsigned 12 bit. */
1227 str_abs (sim_cpu *cpu, uint32_t offset)
1229 unsigned rn = INSTR (9, 5);
1230 unsigned rt = INSTR (4, 0);
1232 aarch64_set_mem_u64 (cpu,
1233 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1234 + SCALE (offset, 64),
1235 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1238 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1240 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1242 unsigned rn = INSTR (9, 5);
1243 unsigned rt = INSTR (4, 0);
1246 if (rn == rt && wb != NoWriteBack)
1249 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1254 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1259 if (wb != NoWriteBack)
1260 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1263 /* 64 bit store scaled or unscaled zero-
1264 or sign-extended 32-bit register offset. */
1266 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1268 unsigned rm = INSTR (20, 16);
1269 unsigned rn = INSTR (9, 5);
1270 unsigned rt = INSTR (4, 0);
1271 /* rn may reference SP, rm and rt must reference ZR */
1273 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1274 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1276 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1278 aarch64_set_mem_u64 (cpu, address + displacement,
1279 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1282 /* 32 bit store byte scaled unsigned 12 bit. */
1284 strb_abs (sim_cpu *cpu, uint32_t offset)
1286 unsigned rn = INSTR (9, 5);
1287 unsigned rt = INSTR (4, 0);
1289 /* The target register may not be SP but the source may be.
1290 There is no scaling required for a byte load. */
1291 aarch64_set_mem_u8 (cpu,
1292 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1293 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1296 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1298 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1300 unsigned rn = INSTR (9, 5);
1301 unsigned rt = INSTR (4, 0);
1304 if (rn == rt && wb != NoWriteBack)
1307 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1317 if (wb != NoWriteBack)
1318 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1321 /* 32 bit store byte scaled or unscaled zero-
1322 or sign-extended 32-bit register offset. */
1324 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1326 unsigned rm = INSTR (20, 16);
1327 unsigned rn = INSTR (9, 5);
1328 unsigned rt = INSTR (4, 0);
1329 /* rn may reference SP, rm and rt must reference ZR */
1331 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1332 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1335 /* There is no scaling required for a byte load. */
1336 aarch64_set_mem_u8 (cpu, address + displacement,
1337 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1340 /* 32 bit store short scaled unsigned 12 bit. */
1342 strh_abs (sim_cpu *cpu, uint32_t offset)
1344 unsigned rn = INSTR (9, 5);
1345 unsigned rt = INSTR (4, 0);
1347 /* The target register may not be SP but the source may be. */
1348 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1349 + SCALE (offset, 16),
1350 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1353 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1355 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1357 unsigned rn = INSTR (9, 5);
1358 unsigned rt = INSTR (4, 0);
1361 if (rn == rt && wb != NoWriteBack)
1364 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1369 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1374 if (wb != NoWriteBack)
1375 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1378 /* 32 bit store short scaled or unscaled zero-
1379 or sign-extended 32-bit register offset. */
1381 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1383 unsigned rm = INSTR (20, 16);
1384 unsigned rn = INSTR (9, 5);
1385 unsigned rt = INSTR (4, 0);
1386 /* rn may reference SP, rm and rt must reference ZR */
1388 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1389 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1390 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1392 aarch64_set_mem_u16 (cpu, address + displacement,
1393 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1396 /* Prefetch unsigned 12 bit. */
1398 prfm_abs (sim_cpu *cpu, uint32_t offset)
1400 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1401 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1402 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1403 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1404 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1405 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1407 PrfOp prfop = prfop (instr, 4, 0);
1408 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1409 + SCALE (offset, 64). */
1411 /* TODO : implement prefetch of address. */
1414 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1416 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1418 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1419 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1420 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1421 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1422 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1423 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1425 rn may reference SP, rm may only reference ZR
1426 PrfOp prfop = prfop (instr, 4, 0);
1427 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1428 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1430 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1431 uint64_t address = base + displacement. */
1433 /* TODO : implement prefetch of address */
1436 /* 64 bit pc-relative prefetch. */
1438 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1440 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1441 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1442 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1443 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1444 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1445 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1447 PrfOp prfop = prfop (instr, 4, 0);
1448 uint64_t address = aarch64_get_PC (cpu) + offset. */
1450 /* TODO : implement this */
1453 /* Load-store exclusive. */
1458 unsigned rn = INSTR (9, 5);
1459 unsigned rt = INSTR (4, 0);
1460 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1461 int size = INSTR (31, 30);
1462 /* int ordered = INSTR (15, 15); */
1463 /* int exclusive = ! INSTR (23, 23); */
1468 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1471 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1474 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1477 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1485 unsigned rn = INSTR (9, 5);
1486 unsigned rt = INSTR (4, 0);
1487 unsigned rs = INSTR (20, 16);
1488 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1489 int size = INSTR (31, 30);
1490 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1494 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1495 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1496 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1497 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1500 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1504 dexLoadLiteral (sim_cpu *cpu)
1506 /* instr[29,27] == 011
1508 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1509 010 ==> LDRX, 011 ==> FLDRD
1510 100 ==> LDRSW, 101 ==> FLDRQ
1511 110 ==> PRFM, 111 ==> UNALLOC
1512 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1513 instr[23, 5] == simm19 */
1515 /* unsigned rt = INSTR (4, 0); */
1516 uint32_t dispatch = ( (INSTR (31, 30) << 1)
1518 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1522 case 0: ldr32_pcrel (cpu, imm); break;
1523 case 1: fldrs_pcrel (cpu, imm); break;
1524 case 2: ldr_pcrel (cpu, imm); break;
1525 case 3: fldrd_pcrel (cpu, imm); break;
1526 case 4: ldrsw_pcrel (cpu, imm); break;
1527 case 5: fldrq_pcrel (cpu, imm); break;
1528 case 6: prfm_pcrel (cpu, imm); break;
1535 /* Immediate arithmetic
1536 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1537 value left shifted by 12 bits (done at decode).
1539 N.B. the register args (dest, source) can normally be Xn or SP.
1540 the exception occurs for flag setting instructions which may
1541 only use Xn for the output (dest). */
1543 /* 32 bit add immediate. */
1545 add32 (sim_cpu *cpu, uint32_t aimm)
1547 unsigned rn = INSTR (9, 5);
1548 unsigned rd = INSTR (4, 0);
1550 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1551 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1554 /* 64 bit add immediate. */
1556 add64 (sim_cpu *cpu, uint32_t aimm)
1558 unsigned rn = INSTR (9, 5);
1559 unsigned rd = INSTR (4, 0);
1561 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1562 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1566 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1568 int32_t result = value1 + value2;
1569 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1570 uint64_t uresult = (uint64_t)(uint32_t) value1
1571 + (uint64_t)(uint32_t) value2;
1577 if (result & (1 << 31))
1580 if (uresult != result)
1583 if (sresult != result)
1586 aarch64_set_CPSR (cpu, flags);
1590 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1592 int64_t sval1 = value1;
1593 int64_t sval2 = value2;
1594 uint64_t result = value1 + value2;
1595 int64_t sresult = sval1 + sval2;
1601 if (result & (1ULL << 63))
1608 /* Negative plus a negative. Overflow happens if
1609 the result is greater than either of the operands. */
1610 if (sresult > sval1 || sresult > sval2)
1613 /* else Negative plus a positive. Overflow cannot happen. */
1615 else /* value1 is +ve. */
1619 /* Overflow can only occur if we computed "0 - MININT". */
1620 if (sval1 == 0 && sval2 == (1LL << 63))
1625 /* Postive plus positive - overflow has happened if the
1626 result is smaller than either of the operands. */
1627 if (result < value1 || result < value2)
1632 aarch64_set_CPSR (cpu, flags);
1635 #define NEG(a) (((a) & signbit) == signbit)
1636 #define POS(a) (((a) & signbit) == 0)
1639 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1641 uint32_t result = value1 - value2;
1643 uint32_t signbit = 1U << 31;
1651 if ( (NEG (value1) && POS (value2))
1652 || (NEG (value1) && POS (result))
1653 || (POS (value2) && POS (result)))
1656 if ( (NEG (value1) && POS (value2) && POS (result))
1657 || (POS (value1) && NEG (value2) && NEG (result)))
1660 aarch64_set_CPSR (cpu, flags);
1664 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1666 uint64_t result = value1 - value2;
1668 uint64_t signbit = 1ULL << 63;
1676 if ( (NEG (value1) && POS (value2))
1677 || (NEG (value1) && POS (result))
1678 || (POS (value2) && POS (result)))
1681 if ( (NEG (value1) && POS (value2) && POS (result))
1682 || (POS (value1) && NEG (value2) && NEG (result)))
1685 aarch64_set_CPSR (cpu, flags);
1689 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1698 if (result & (1 << 31))
1703 aarch64_set_CPSR (cpu, flags);
1707 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1716 if (result & (1ULL << 63))
1721 aarch64_set_CPSR (cpu, flags);
1724 /* 32 bit add immediate set flags. */
1726 adds32 (sim_cpu *cpu, uint32_t aimm)
1728 unsigned rn = INSTR (9, 5);
1729 unsigned rd = INSTR (4, 0);
1730 /* TODO : do we need to worry about signs here? */
1731 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1733 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1734 set_flags_for_add32 (cpu, value1, aimm);
1737 /* 64 bit add immediate set flags. */
1739 adds64 (sim_cpu *cpu, uint32_t aimm)
1741 unsigned rn = INSTR (9, 5);
1742 unsigned rd = INSTR (4, 0);
1743 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1744 uint64_t value2 = aimm;
1746 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1747 set_flags_for_add64 (cpu, value1, value2);
1750 /* 32 bit sub immediate. */
1752 sub32 (sim_cpu *cpu, uint32_t aimm)
1754 unsigned rn = INSTR (9, 5);
1755 unsigned rd = INSTR (4, 0);
1757 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1758 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1761 /* 64 bit sub immediate. */
1763 sub64 (sim_cpu *cpu, uint32_t aimm)
1765 unsigned rn = INSTR (9, 5);
1766 unsigned rd = INSTR (4, 0);
1768 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1769 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1772 /* 32 bit sub immediate set flags. */
1774 subs32 (sim_cpu *cpu, uint32_t aimm)
1776 unsigned rn = INSTR (9, 5);
1777 unsigned rd = INSTR (4, 0);
1778 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1779 uint32_t value2 = aimm;
1781 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1782 set_flags_for_sub32 (cpu, value1, value2);
1785 /* 64 bit sub immediate set flags. */
1787 subs64 (sim_cpu *cpu, uint32_t aimm)
1789 unsigned rn = INSTR (9, 5);
1790 unsigned rd = INSTR (4, 0);
1791 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1792 uint32_t value2 = aimm;
1794 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1795 set_flags_for_sub64 (cpu, value1, value2);
1798 /* Data Processing Register. */
1800 /* First two helpers to perform the shift operations. */
1802 static inline uint32_t
1803 shifted32 (uint32_t value, Shift shift, uint32_t count)
1809 return (value << count);
1811 return (value >> count);
1814 int32_t svalue = value;
1815 return (svalue >> count);
1819 uint32_t top = value >> count;
1820 uint32_t bottom = value << (32 - count);
1821 return (bottom | top);
1826 static inline uint64_t
1827 shifted64 (uint64_t value, Shift shift, uint32_t count)
1833 return (value << count);
1835 return (value >> count);
1838 int64_t svalue = value;
1839 return (svalue >> count);
1843 uint64_t top = value >> count;
1844 uint64_t bottom = value << (64 - count);
1845 return (bottom | top);
1850 /* Arithmetic shifted register.
1851 These allow an optional LSL, ASR or LSR to the second source
1852 register with a count up to the register bit count.
1854 N.B register args may not be SP. */
1856 /* 32 bit ADD shifted register. */
1858 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1860 unsigned rm = INSTR (20, 16);
1861 unsigned rn = INSTR (9, 5);
1862 unsigned rd = INSTR (4, 0);
1864 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1865 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1866 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1870 /* 64 bit ADD shifted register. */
1872 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1874 unsigned rm = INSTR (20, 16);
1875 unsigned rn = INSTR (9, 5);
1876 unsigned rd = INSTR (4, 0);
1878 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1879 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1880 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1884 /* 32 bit ADD shifted register setting flags. */
1886 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1888 unsigned rm = INSTR (20, 16);
1889 unsigned rn = INSTR (9, 5);
1890 unsigned rd = INSTR (4, 0);
1892 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1893 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1896 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1897 set_flags_for_add32 (cpu, value1, value2);
1900 /* 64 bit ADD shifted register setting flags. */
1902 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1904 unsigned rm = INSTR (20, 16);
1905 unsigned rn = INSTR (9, 5);
1906 unsigned rd = INSTR (4, 0);
1908 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1909 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1912 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1913 set_flags_for_add64 (cpu, value1, value2);
1916 /* 32 bit SUB shifted register. */
1918 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1920 unsigned rm = INSTR (20, 16);
1921 unsigned rn = INSTR (9, 5);
1922 unsigned rd = INSTR (4, 0);
1924 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1925 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1926 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1930 /* 64 bit SUB shifted register. */
1932 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1934 unsigned rm = INSTR (20, 16);
1935 unsigned rn = INSTR (9, 5);
1936 unsigned rd = INSTR (4, 0);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1944 /* 32 bit SUB shifted register setting flags. */
1946 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1956 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1957 set_flags_for_sub32 (cpu, value1, value2);
1960 /* 64 bit SUB shifted register setting flags. */
1962 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 unsigned rm = INSTR (20, 16);
1965 unsigned rn = INSTR (9, 5);
1966 unsigned rd = INSTR (4, 0);
1968 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1969 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1972 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1973 set_flags_for_sub64 (cpu, value1, value2);
1976 /* First a couple more helpers to fetch the
1977 relevant source register element either
1978 sign or zero extended as required by the
1982 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
1986 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
1987 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
1988 case UXTW: /* Fall through. */
1989 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
1990 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
1991 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
1992 case SXTW: /* Fall through. */
1993 case SXTX: /* Fall through. */
1994 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
1999 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2003 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2004 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2005 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2006 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2007 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2008 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2009 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2011 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2015 /* Arithmetic extending register
2016 These allow an optional sign extension of some portion of the
2017 second source register followed by an optional left shift of
2018 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2020 N.B output (dest) and first input arg (source) may normally be Xn
2021 or SP. However, for flag setting operations dest can only be
2022 Xn. Second input registers are always Xn. */
2024 /* 32 bit ADD extending register. */
2026 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2028 unsigned rm = INSTR (20, 16);
2029 unsigned rn = INSTR (9, 5);
2030 unsigned rd = INSTR (4, 0);
2032 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2033 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2034 + (extreg32 (cpu, rm, extension) << shift));
2037 /* 64 bit ADD extending register.
2038 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2040 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2042 unsigned rm = INSTR (20, 16);
2043 unsigned rn = INSTR (9, 5);
2044 unsigned rd = INSTR (4, 0);
2046 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2047 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2048 + (extreg64 (cpu, rm, extension) << shift));
2051 /* 32 bit ADD extending register setting flags. */
2053 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2055 unsigned rm = INSTR (20, 16);
2056 unsigned rn = INSTR (9, 5);
2057 unsigned rd = INSTR (4, 0);
2059 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2060 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2062 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2063 set_flags_for_add32 (cpu, value1, value2);
2066 /* 64 bit ADD extending register setting flags */
2067 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2069 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2071 unsigned rm = INSTR (20, 16);
2072 unsigned rn = INSTR (9, 5);
2073 unsigned rd = INSTR (4, 0);
2075 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2076 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2078 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2079 set_flags_for_add64 (cpu, value1, value2);
2082 /* 32 bit SUB extending register. */
2084 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2086 unsigned rm = INSTR (20, 16);
2087 unsigned rn = INSTR (9, 5);
2088 unsigned rd = INSTR (4, 0);
2090 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2091 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2092 - (extreg32 (cpu, rm, extension) << shift));
2095 /* 64 bit SUB extending register. */
2096 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2098 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2100 unsigned rm = INSTR (20, 16);
2101 unsigned rn = INSTR (9, 5);
2102 unsigned rd = INSTR (4, 0);
2104 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2105 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2106 - (extreg64 (cpu, rm, extension) << shift));
2109 /* 32 bit SUB extending register setting flags. */
2111 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2113 unsigned rm = INSTR (20, 16);
2114 unsigned rn = INSTR (9, 5);
2115 unsigned rd = INSTR (4, 0);
2117 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2118 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2120 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2121 set_flags_for_sub32 (cpu, value1, value2);
2124 /* 64 bit SUB extending register setting flags */
2125 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2127 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2129 unsigned rm = INSTR (20, 16);
2130 unsigned rn = INSTR (9, 5);
2131 unsigned rd = INSTR (4, 0);
2133 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2134 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2136 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2137 set_flags_for_sub64 (cpu, value1, value2);
2141 dexAddSubtractImmediate (sim_cpu *cpu)
2143 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2144 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2145 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2146 instr[28,24] = 10001
2147 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2148 instr[21,10] = uimm12
2152 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2153 uint32_t shift = INSTR (23, 22);
2154 uint32_t imm = INSTR (21, 10);
2155 uint32_t dispatch = INSTR (31, 29);
2157 NYI_assert (28, 24, 0x11);
2167 case 0: add32 (cpu, imm); break;
2168 case 1: adds32 (cpu, imm); break;
2169 case 2: sub32 (cpu, imm); break;
2170 case 3: subs32 (cpu, imm); break;
2171 case 4: add64 (cpu, imm); break;
2172 case 5: adds64 (cpu, imm); break;
2173 case 6: sub64 (cpu, imm); break;
2174 case 7: subs64 (cpu, imm); break;
2179 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2181 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2182 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2183 instr[28,24] = 01011
2184 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2187 instr[15,10] = count : must be 0xxxxx for 32 bit
2191 uint32_t size = INSTR (31, 31);
2192 uint32_t count = INSTR (15, 10);
2193 Shift shiftType = INSTR (23, 22);
2195 NYI_assert (28, 24, 0x0B);
2196 NYI_assert (21, 21, 0);
2198 /* Shift encoded as ROR is unallocated. */
2199 if (shiftType == ROR)
2202 /* 32 bit operations must have count[5] = 0
2203 or else we have an UNALLOC. */
2204 if (size == 0 && uimm (count, 5, 5))
2207 /* Dispatch on size:op i.e instr [31,29]. */
2208 switch (INSTR (31, 29))
2210 case 0: add32_shift (cpu, shiftType, count); break;
2211 case 1: adds32_shift (cpu, shiftType, count); break;
2212 case 2: sub32_shift (cpu, shiftType, count); break;
2213 case 3: subs32_shift (cpu, shiftType, count); break;
2214 case 4: add64_shift (cpu, shiftType, count); break;
2215 case 5: adds64_shift (cpu, shiftType, count); break;
2216 case 6: sub64_shift (cpu, shiftType, count); break;
2217 case 7: subs64_shift (cpu, shiftType, count); break;
2222 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2224 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2225 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2226 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2227 instr[28,24] = 01011
2228 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2231 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2232 000 ==> LSL|UXTW, 001 ==> UXTZ,
2233 000 ==> SXTB, 001 ==> SXTH,
2234 000 ==> SXTW, 001 ==> SXTX,
2235 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2239 Extension extensionType = INSTR (15, 13);
2240 uint32_t shift = INSTR (12, 10);
2242 NYI_assert (28, 24, 0x0B);
2243 NYI_assert (21, 21, 1);
2245 /* Shift may not exceed 4. */
2249 /* Dispatch on size:op:set?. */
2250 switch (INSTR (31, 29))
2252 case 0: add32_ext (cpu, extensionType, shift); break;
2253 case 1: adds32_ext (cpu, extensionType, shift); break;
2254 case 2: sub32_ext (cpu, extensionType, shift); break;
2255 case 3: subs32_ext (cpu, extensionType, shift); break;
2256 case 4: add64_ext (cpu, extensionType, shift); break;
2257 case 5: adds64_ext (cpu, extensionType, shift); break;
2258 case 6: sub64_ext (cpu, extensionType, shift); break;
2259 case 7: subs64_ext (cpu, extensionType, shift); break;
2263 /* Conditional data processing
2264 Condition register is implicit 3rd source. */
2266 /* 32 bit add with carry. */
2267 /* N.B register args may not be SP. */
2270 adc32 (sim_cpu *cpu)
2272 unsigned rm = INSTR (20, 16);
2273 unsigned rn = INSTR (9, 5);
2274 unsigned rd = INSTR (4, 0);
2276 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2277 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2278 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2282 /* 64 bit add with carry */
2284 adc64 (sim_cpu *cpu)
2286 unsigned rm = INSTR (20, 16);
2287 unsigned rn = INSTR (9, 5);
2288 unsigned rd = INSTR (4, 0);
2290 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2291 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2292 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2296 /* 32 bit add with carry setting flags. */
2298 adcs32 (sim_cpu *cpu)
2300 unsigned rm = INSTR (20, 16);
2301 unsigned rn = INSTR (9, 5);
2302 unsigned rd = INSTR (4, 0);
2304 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2305 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2306 uint32_t carry = IS_SET (C);
2308 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2309 set_flags_for_add32 (cpu, value1, value2 + carry);
2312 /* 64 bit add with carry setting flags. */
2314 adcs64 (sim_cpu *cpu)
2316 unsigned rm = INSTR (20, 16);
2317 unsigned rn = INSTR (9, 5);
2318 unsigned rd = INSTR (4, 0);
2320 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2321 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2322 uint64_t carry = IS_SET (C);
2324 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2325 set_flags_for_add64 (cpu, value1, value2 + carry);
2328 /* 32 bit sub with carry. */
2330 sbc32 (sim_cpu *cpu)
2332 unsigned rm = INSTR (20, 16);
2333 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2334 unsigned rd = INSTR (4, 0);
2336 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2337 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2338 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2342 /* 64 bit sub with carry */
2344 sbc64 (sim_cpu *cpu)
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2350 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2351 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2352 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2356 /* 32 bit sub with carry setting flags */
2358 sbcs32 (sim_cpu *cpu)
2360 unsigned rm = INSTR (20, 16);
2361 unsigned rn = INSTR (9, 5);
2362 unsigned rd = INSTR (4, 0);
2364 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2365 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2366 uint32_t carry = IS_SET (C);
2367 uint32_t result = value1 - value2 + 1 - carry;
2369 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2370 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2373 /* 64 bit sub with carry setting flags */
2375 sbcs64 (sim_cpu *cpu)
2377 unsigned rm = INSTR (20, 16);
2378 unsigned rn = INSTR (9, 5);
2379 unsigned rd = INSTR (4, 0);
2381 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2382 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2383 uint64_t carry = IS_SET (C);
2384 uint64_t result = value1 - value2 + 1 - carry;
2386 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2387 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2391 dexAddSubtractWithCarry (sim_cpu *cpu)
2393 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2394 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2395 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2396 instr[28,21] = 1 1010 000
2398 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2402 uint32_t op2 = INSTR (15, 10);
2404 NYI_assert (28, 21, 0xD0);
2409 /* Dispatch on size:op:set?. */
2410 switch (INSTR (31, 29))
2412 case 0: adc32 (cpu); break;
2413 case 1: adcs32 (cpu); break;
2414 case 2: sbc32 (cpu); break;
2415 case 3: sbcs32 (cpu); break;
2416 case 4: adc64 (cpu); break;
2417 case 5: adcs64 (cpu); break;
2418 case 6: sbc64 (cpu); break;
2419 case 7: sbcs64 (cpu); break;
2424 testConditionCode (sim_cpu *cpu, CondCode cc)
2426 /* This should be reduceable to branchless logic
2427 by some careful testing of bits in CC followed
2428 by the requisite masking and combining of bits
2429 from the flag register.
2431 For now we do it with a switch. */
2436 case EQ: res = IS_SET (Z); break;
2437 case NE: res = IS_CLEAR (Z); break;
2438 case CS: res = IS_SET (C); break;
2439 case CC: res = IS_CLEAR (C); break;
2440 case MI: res = IS_SET (N); break;
2441 case PL: res = IS_CLEAR (N); break;
2442 case VS: res = IS_SET (V); break;
2443 case VC: res = IS_CLEAR (V); break;
2444 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2445 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2446 case GE: res = IS_SET (N) == IS_SET (V); break;
2447 case LT: res = IS_SET (N) != IS_SET (V); break;
2448 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2449 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2460 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2462 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2463 instr[30] = compare with positive (1) or negative value (0)
2464 instr[29,21] = 1 1101 0010
2465 instr[20,16] = Rm or const
2467 instr[11] = compare reg (0) or const (1)
2471 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2476 NYI_assert (29, 21, 0x1d2);
2477 NYI_assert (10, 10, 0);
2478 NYI_assert (4, 4, 0);
2480 if (! testConditionCode (cpu, INSTR (15, 12)))
2482 aarch64_set_CPSR (cpu, INSTR (3, 0));
2486 negate = INSTR (30, 30) ? 1 : -1;
2487 rm = INSTR (20, 16);
2493 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2494 negate * (uint64_t) rm);
2496 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2497 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2502 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2505 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2506 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2511 do_vec_MOV_whole_vector (sim_cpu *cpu)
2513 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2516 instr[30] = half(0)/full(1)
2517 instr[29,21] = 001110101
2519 instr[15,10] = 000111
2523 unsigned vs = INSTR (9, 5);
2524 unsigned vd = INSTR (4, 0);
2526 NYI_assert (29, 21, 0x075);
2527 NYI_assert (15, 10, 0x07);
2529 if (INSTR (20, 16) != vs)
2533 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2535 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2539 do_vec_MOV_into_scalar (sim_cpu *cpu)
2542 instr[30] = word(0)/long(1)
2543 instr[29,21] = 00 1110 000
2544 instr[20,18] = element size and index
2545 instr[17,10] = 00 0011 11
2546 instr[9,5] = V source
2547 instr[4,0] = R dest */
2549 unsigned vs = INSTR (9, 5);
2550 unsigned rd = INSTR (4, 0);
2552 NYI_assert (29, 21, 0x070);
2553 NYI_assert (17, 10, 0x0F);
2555 switch (INSTR (20, 18))
2558 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2562 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2569 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2570 (cpu, vs, INSTR (20, 19)));
2579 do_vec_INS (sim_cpu *cpu)
2581 /* instr[31,21] = 01001110000
2582 instr[20,16] = element size and index
2583 instr[15,10] = 000111
2584 instr[9,5] = W source
2585 instr[4,0] = V dest */
2588 unsigned rs = INSTR (9, 5);
2589 unsigned vd = INSTR (4, 0);
2591 NYI_assert (31, 21, 0x270);
2592 NYI_assert (15, 10, 0x07);
2596 index = INSTR (20, 17);
2597 aarch64_set_vec_u8 (cpu, vd, index,
2598 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2600 else if (INSTR (17, 17))
2602 index = INSTR (20, 18);
2603 aarch64_set_vec_u16 (cpu, vd, index,
2604 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2606 else if (INSTR (18, 18))
2608 index = INSTR (20, 19);
2609 aarch64_set_vec_u32 (cpu, vd, index,
2610 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2612 else if (INSTR (19, 19))
2614 index = INSTR (20, 20);
2615 aarch64_set_vec_u64 (cpu, vd, index,
2616 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2623 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2626 instr[30] = half(0)/full(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,16] = element size and index
2629 instr[15,10] = 0000 01
2630 instr[9,5] = V source
2631 instr[4,0] = V dest. */
2633 unsigned full = INSTR (30, 30);
2634 unsigned vs = INSTR (9, 5);
2635 unsigned vd = INSTR (4, 0);
2638 NYI_assert (29, 21, 0x070);
2639 NYI_assert (15, 10, 0x01);
2643 index = INSTR (20, 17);
2645 for (i = 0; i < (full ? 16 : 8); i++)
2646 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2648 else if (INSTR (17, 17))
2650 index = INSTR (20, 18);
2652 for (i = 0; i < (full ? 8 : 4); i++)
2653 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2655 else if (INSTR (18, 18))
2657 index = INSTR (20, 19);
2659 for (i = 0; i < (full ? 4 : 2); i++)
2660 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2664 if (INSTR (19, 19) == 0)
2670 index = INSTR (20, 20);
2672 for (i = 0; i < 2; i++)
2673 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2678 do_vec_TBL (sim_cpu *cpu)
2681 instr[30] = half(0)/full(1)
2682 instr[29,21] = 00 1110 000
2685 instr[14,13] = vec length
2687 instr[9,5] = V start
2688 instr[4,0] = V dest */
2690 int full = INSTR (30, 30);
2691 int len = INSTR (14, 13) + 1;
2692 unsigned vm = INSTR (20, 16);
2693 unsigned vn = INSTR (9, 5);
2694 unsigned vd = INSTR (4, 0);
2697 NYI_assert (29, 21, 0x070);
2698 NYI_assert (12, 10, 0);
2700 for (i = 0; i < (full ? 16 : 8); i++)
2702 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2706 val = aarch64_get_vec_u8 (cpu, vn, selector);
2707 else if (selector < 32)
2708 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2709 else if (selector < 48)
2710 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2711 else if (selector < 64)
2712 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2716 aarch64_set_vec_u8 (cpu, vd, i, val);
2721 do_vec_TRN (sim_cpu *cpu)
2724 instr[30] = half(0)/full(1)
2725 instr[29,24] = 00 1110
2730 instr[14] = TRN1 (0) / TRN2 (1)
2732 instr[9,5] = V source
2733 instr[4,0] = V dest. */
2735 int full = INSTR (30, 30);
2736 int second = INSTR (14, 14);
2737 unsigned vm = INSTR (20, 16);
2738 unsigned vn = INSTR (9, 5);
2739 unsigned vd = INSTR (4, 0);
2742 NYI_assert (29, 24, 0x0E);
2743 NYI_assert (13, 10, 0xA);
2745 switch (INSTR (23, 22))
2748 for (i = 0; i < (full ? 8 : 4); i++)
2752 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2754 (cpu, vd, 1 * 2 + 1,
2755 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2760 for (i = 0; i < (full ? 4 : 2); i++)
2764 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2766 (cpu, vd, 1 * 2 + 1,
2767 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2773 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2775 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2777 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2779 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2786 aarch64_set_vec_u64 (cpu, vd, 0,
2787 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2788 aarch64_set_vec_u64 (cpu, vd, 1,
2789 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2795 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2798 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2799 [must be 1 for 64-bit xfer]
2800 instr[29,20] = 00 1110 0000
2801 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2802 0100=> 32-bits. 1000=>64-bits
2803 instr[15,10] = 0000 11
2804 instr[9,5] = W source
2805 instr[4,0] = V dest. */
2808 unsigned Vd = INSTR (4, 0);
2809 unsigned Rs = INSTR (9, 5);
2810 int both = INSTR (30, 30);
2812 NYI_assert (29, 20, 0x0E0);
2813 NYI_assert (15, 10, 0x03);
2815 switch (INSTR (19, 16))
2818 for (i = 0; i < (both ? 16 : 8); i++)
2819 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2823 for (i = 0; i < (both ? 8 : 4); i++)
2824 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2828 for (i = 0; i < (both ? 4 : 2); i++)
2829 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2835 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2836 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2845 do_vec_UZP (sim_cpu *cpu)
2848 instr[30] = half(0)/full(1)
2849 instr[29,24] = 00 1110
2850 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2854 instr[14] = lower (0) / upper (1)
2859 int full = INSTR (30, 30);
2860 int upper = INSTR (14, 14);
2862 unsigned vm = INSTR (20, 16);
2863 unsigned vn = INSTR (9, 5);
2864 unsigned vd = INSTR (4, 0);
2866 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2867 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2868 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2869 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2874 uint64_t input1 = upper ? val_n1 : val_m1;
2875 uint64_t input2 = upper ? val_n2 : val_m2;
2878 NYI_assert (29, 24, 0x0E);
2879 NYI_assert (21, 21, 0);
2880 NYI_assert (15, 15, 0);
2881 NYI_assert (13, 10, 6);
2883 switch (INSTR (23, 23))
2886 for (i = 0; i < 8; i++)
2888 val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
2889 val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
2894 for (i = 0; i < 4; i++)
2896 val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
2897 val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
2902 val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
2903 val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
2911 aarch64_set_vec_u64 (cpu, vd, 0, val1);
2913 aarch64_set_vec_u64 (cpu, vd, 1, val2);
2917 do_vec_ZIP (sim_cpu *cpu)
2920 instr[30] = half(0)/full(1)
2921 instr[29,24] = 00 1110
2922 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
2926 instr[14] = lower (0) / upper (1)
2931 int full = INSTR (30, 30);
2932 int upper = INSTR (14, 14);
2934 unsigned vm = INSTR (20, 16);
2935 unsigned vn = INSTR (9, 5);
2936 unsigned vd = INSTR (4, 0);
2938 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2939 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2940 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2941 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2946 uint64_t input1 = upper ? val_n1 : val_m1;
2947 uint64_t input2 = upper ? val_n2 : val_m2;
2949 NYI_assert (29, 24, 0x0E);
2950 NYI_assert (21, 21, 0);
2951 NYI_assert (15, 15, 0);
2952 NYI_assert (13, 10, 0xE);
2954 switch (INSTR (23, 23))
2958 ((input1 << 0) & (0xFF << 0))
2959 | ((input2 << 8) & (0xFF << 8))
2960 | ((input1 << 8) & (0xFF << 16))
2961 | ((input2 << 16) & (0xFF << 24))
2962 | ((input1 << 16) & (0xFFULL << 32))
2963 | ((input2 << 24) & (0xFFULL << 40))
2964 | ((input1 << 24) & (0xFFULL << 48))
2965 | ((input2 << 32) & (0xFFULL << 56));
2968 ((input1 >> 32) & (0xFF << 0))
2969 | ((input2 >> 24) & (0xFF << 8))
2970 | ((input1 >> 24) & (0xFF << 16))
2971 | ((input2 >> 16) & (0xFF << 24))
2972 | ((input1 >> 16) & (0xFFULL << 32))
2973 | ((input2 >> 8) & (0xFFULL << 40))
2974 | ((input1 >> 8) & (0xFFULL << 48))
2975 | ((input2 >> 0) & (0xFFULL << 56));
2980 ((input1 << 0) & (0xFFFF << 0))
2981 | ((input2 << 16) & (0xFFFF << 16))
2982 | ((input1 << 16) & (0xFFFFULL << 32))
2983 | ((input2 << 32) & (0xFFFFULL << 48));
2986 ((input1 >> 32) & (0xFFFF << 0))
2987 | ((input2 >> 16) & (0xFFFF << 16))
2988 | ((input1 >> 16) & (0xFFFFULL << 32))
2989 | ((input2 >> 0) & (0xFFFFULL << 48));
2993 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
2994 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3003 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3005 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3008 /* Floating point immediates are encoded in 8 bits.
3009 fpimm[7] = sign bit.
3010 fpimm[6:4] = signed exponent.
3011 fpimm[3:0] = fraction (assuming leading 1).
3012 i.e. F = s * 1.f * 2^(e - b). */
3015 fp_immediate_for_encoding_32 (uint32_t imm8)
3018 uint32_t s, e, f, i;
3020 s = (imm8 >> 7) & 0x1;
3021 e = (imm8 >> 4) & 0x7;
3024 /* The fp value is s * n/16 * 2r where n is 16+e. */
3025 u = (16.0 + f) / 16.0;
3027 /* N.B. exponent is signed. */
3032 for (i = 0; i <= epos; i++)
3039 for (i = 0; i < eneg; i++)
3050 fp_immediate_for_encoding_64 (uint32_t imm8)
3053 uint32_t s, e, f, i;
3055 s = (imm8 >> 7) & 0x1;
3056 e = (imm8 >> 4) & 0x7;
3059 /* The fp value is s * n/16 * 2r where n is 16+e. */
3060 u = (16.0 + f) / 16.0;
3062 /* N.B. exponent is signed. */
3067 for (i = 0; i <= epos; i++)
3074 for (i = 0; i < eneg; i++)
3085 do_vec_MOV_immediate (sim_cpu *cpu)
3088 instr[30] = full/half selector
3089 instr[29,19] = 00111100000
3090 instr[18,16] = high 3 bits of uimm8
3091 instr[15,12] = size & shift:
3093 0010 => 32-bit + LSL#8
3094 0100 => 32-bit + LSL#16
3095 0110 => 32-bit + LSL#24
3096 1010 => 16-bit + LSL#8
3098 1101 => 32-bit + MSL#16
3099 1100 => 32-bit + MSL#8
3103 instr[9,5] = low 5-bits of uimm8
3106 int full = INSTR (30, 30);
3107 unsigned vd = INSTR (4, 0);
3108 unsigned val = INSTR (18, 16) << 5
3112 NYI_assert (29, 19, 0x1E0);
3113 NYI_assert (11, 10, 1);
3115 switch (INSTR (15, 12))
3117 case 0x0: /* 32-bit, no shift. */
3118 case 0x2: /* 32-bit, shift by 8. */
3119 case 0x4: /* 32-bit, shift by 16. */
3120 case 0x6: /* 32-bit, shift by 24. */
3121 val <<= (8 * INSTR (14, 13));
3122 for (i = 0; i < (full ? 4 : 2); i++)
3123 aarch64_set_vec_u32 (cpu, vd, i, val);
3126 case 0xa: /* 16-bit, shift by 8. */
3129 case 0x8: /* 16-bit, no shift. */
3130 for (i = 0; i < (full ? 8 : 4); i++)
3131 aarch64_set_vec_u16 (cpu, vd, i, val);
3133 case 0xd: /* 32-bit, mask shift by 16. */
3137 case 0xc: /* 32-bit, mask shift by 8. */
3140 for (i = 0; i < (full ? 4 : 2); i++)
3141 aarch64_set_vec_u32 (cpu, vd, i, val);
3144 case 0xe: /* 8-bit, no shift. */
3145 for (i = 0; i < (full ? 16 : 8); i++)
3146 aarch64_set_vec_u8 (cpu, vd, i, val);
3149 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3151 float u = fp_immediate_for_encoding_32 (val);
3152 for (i = 0; i < (full ? 4 : 2); i++)
3153 aarch64_set_vec_float (cpu, vd, i, u);
3163 do_vec_MVNI (sim_cpu *cpu)
3166 instr[30] = full/half selector
3167 instr[29,19] = 10111100000
3168 instr[18,16] = high 3 bits of uimm8
3169 instr[15,12] = selector
3171 instr[9,5] = low 5-bits of uimm8
3174 int full = INSTR (30, 30);
3175 unsigned vd = INSTR (4, 0);
3176 unsigned val = INSTR (18, 16) << 5
3180 NYI_assert (29, 19, 0x5E0);
3181 NYI_assert (11, 10, 1);
3183 switch (INSTR (15, 12))
3185 case 0x0: /* 32-bit, no shift. */
3186 case 0x2: /* 32-bit, shift by 8. */
3187 case 0x4: /* 32-bit, shift by 16. */
3188 case 0x6: /* 32-bit, shift by 24. */
3189 val <<= (8 * INSTR (14, 13));
3191 for (i = 0; i < (full ? 4 : 2); i++)
3192 aarch64_set_vec_u32 (cpu, vd, i, val);
3195 case 0xa: /* 16-bit, 8 bit shift. */
3197 case 0x8: /* 16-bit, no shift. */
3199 for (i = 0; i < (full ? 8 : 4); i++)
3200 aarch64_set_vec_u16 (cpu, vd, i, val);
3203 case 0xd: /* 32-bit, mask shift by 16. */
3206 case 0xc: /* 32-bit, mask shift by 8. */
3210 for (i = 0; i < (full ? 4 : 2); i++)
3211 aarch64_set_vec_u32 (cpu, vd, i, val);
3214 case 0xE: /* MOVI Dn, #mask64 */
3218 for (i = 0; i < 8; i++)
3220 mask |= (0xF << (i * 4));
3221 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3222 aarch64_set_vec_u64 (cpu, vd, 1, 0);
3226 case 0xf: /* FMOV Vd.2D, #fpimm. */
3228 double u = fp_immediate_for_encoding_64 (val);
3233 aarch64_set_vec_double (cpu, vd, 0, u);
3234 aarch64_set_vec_double (cpu, vd, 1, u);
3243 #define ABS(A) ((A) < 0 ? - (A) : (A))
3246 do_vec_ABS (sim_cpu *cpu)
3249 instr[30] = half(0)/full(1)
3250 instr[29,24] = 00 1110
3251 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3252 instr[21,10] = 10 0000 1011 10
3256 unsigned vn = INSTR (9, 5);
3257 unsigned vd = INSTR (4, 0);
3258 unsigned full = INSTR (30, 30);
3261 NYI_assert (29, 24, 0x0E);
3262 NYI_assert (21, 10, 0x82E);
3264 switch (INSTR (23, 22))
3267 for (i = 0; i < (full ? 16 : 8); i++)
3268 aarch64_set_vec_s8 (cpu, vd, i,
3269 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3273 for (i = 0; i < (full ? 8 : 4); i++)
3274 aarch64_set_vec_s16 (cpu, vd, i,
3275 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3279 for (i = 0; i < (full ? 4 : 2); i++)
3280 aarch64_set_vec_s32 (cpu, vd, i,
3281 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3287 for (i = 0; i < 2; i++)
3288 aarch64_set_vec_s64 (cpu, vd, i,
3289 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3295 do_vec_ADDV (sim_cpu *cpu)
3298 instr[30] = full/half selector
3299 instr[29,24] = 00 1110
3300 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3301 instr[21,10] = 11 0001 1011 10
3305 unsigned vm = INSTR (9, 5);
3306 unsigned rd = INSTR (4, 0);
3309 int full = INSTR (30, 30);
3311 NYI_assert (29, 24, 0x0E);
3312 NYI_assert (21, 10, 0xC6E);
3314 switch (INSTR (23, 22))
3317 for (i = 0; i < (full ? 16 : 8); i++)
3318 val += aarch64_get_vec_u8 (cpu, vm, i);
3319 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3323 for (i = 0; i < (full ? 8 : 4); i++)
3324 val += aarch64_get_vec_u16 (cpu, vm, i);
3325 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3329 for (i = 0; i < (full ? 4 : 2); i++)
3330 val += aarch64_get_vec_u32 (cpu, vm, i);
3331 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3337 val = aarch64_get_vec_u64 (cpu, vm, 0);
3338 val += aarch64_get_vec_u64 (cpu, vm, 1);
3339 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3345 do_vec_ins_2 (sim_cpu *cpu)
3347 /* instr[31,21] = 01001110000
3348 instr[20,18] = size & element selector
3350 instr[13] = direction: to vec(0), from vec (1)
3356 unsigned vm = INSTR (9, 5);
3357 unsigned vd = INSTR (4, 0);
3359 NYI_assert (31, 21, 0x270);
3360 NYI_assert (17, 14, 0);
3361 NYI_assert (12, 10, 7);
3363 if (INSTR (13, 13) == 1)
3365 if (INSTR (18, 18) == 1)
3368 elem = INSTR (20, 19);
3369 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3370 aarch64_get_vec_u32 (cpu, vm, elem));
3375 if (INSTR (19, 19) != 1)
3378 elem = INSTR (20, 20);
3379 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3380 aarch64_get_vec_u64 (cpu, vm, elem));
3385 if (INSTR (18, 18) == 1)
3388 elem = INSTR (20, 19);
3389 aarch64_set_vec_u32 (cpu, vd, elem,
3390 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3395 if (INSTR (19, 19) != 1)
3398 elem = INSTR (20, 20);
3399 aarch64_set_vec_u64 (cpu, vd, elem,
3400 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3406 do_vec_mull (sim_cpu *cpu)
3409 instr[30] = lower(0)/upper(1) selector
3410 instr[29] = signed(0)/unsigned(1)
3411 instr[28,24] = 0 1110
3412 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3415 instr[15,10] = 11 0000
3419 int unsign = INSTR (29, 29);
3420 int bias = INSTR (30, 30);
3421 unsigned vm = INSTR (20, 16);
3422 unsigned vn = INSTR ( 9, 5);
3423 unsigned vd = INSTR ( 4, 0);
3426 NYI_assert (28, 24, 0x0E);
3427 NYI_assert (15, 10, 0x30);
3429 switch (INSTR (23, 22))
3435 for (i = 0; i < 8; i++)
3436 aarch64_set_vec_u16 (cpu, vd, i,
3437 aarch64_get_vec_u8 (cpu, vn, i + bias)
3438 * aarch64_get_vec_u8 (cpu, vm, i + bias));
3440 for (i = 0; i < 8; i++)
3441 aarch64_set_vec_s16 (cpu, vd, i,
3442 aarch64_get_vec_s8 (cpu, vn, i + bias)
3443 * aarch64_get_vec_s8 (cpu, vm, i + bias));
3450 for (i = 0; i < 4; i++)
3451 aarch64_set_vec_u32 (cpu, vd, i,
3452 aarch64_get_vec_u16 (cpu, vn, i + bias)
3453 * aarch64_get_vec_u16 (cpu, vm, i + bias));
3455 for (i = 0; i < 4; i++)
3456 aarch64_set_vec_s32 (cpu, vd, i,
3457 aarch64_get_vec_s16 (cpu, vn, i + bias)
3458 * aarch64_get_vec_s16 (cpu, vm, i + bias));
3465 for (i = 0; i < 2; i++)
3466 aarch64_set_vec_u64 (cpu, vd, i,
3467 (uint64_t) aarch64_get_vec_u32 (cpu, vn,
3469 * (uint64_t) aarch64_get_vec_u32 (cpu, vm,
3472 for (i = 0; i < 2; i++)
3473 aarch64_set_vec_s64 (cpu, vd, i,
3474 aarch64_get_vec_s32 (cpu, vn, i + bias)
3475 * aarch64_get_vec_s32 (cpu, vm, i + bias));
3484 do_vec_fadd (sim_cpu *cpu)
3487 instr[30] = half(0)/full(1)
3488 instr[29,24] = 001110
3489 instr[23] = FADD(0)/FSUB(1)
3490 instr[22] = float (0)/double(1)
3493 instr[15,10] = 110101
3497 unsigned vm = INSTR (20, 16);
3498 unsigned vn = INSTR (9, 5);
3499 unsigned vd = INSTR (4, 0);
3501 int full = INSTR (30, 30);
3503 NYI_assert (29, 24, 0x0E);
3504 NYI_assert (21, 21, 1);
3505 NYI_assert (15, 10, 0x35);
3514 for (i = 0; i < 2; i++)
3515 aarch64_set_vec_double (cpu, vd, i,
3516 aarch64_get_vec_double (cpu, vn, i)
3517 - aarch64_get_vec_double (cpu, vm, i));
3521 for (i = 0; i < (full ? 4 : 2); i++)
3522 aarch64_set_vec_float (cpu, vd, i,
3523 aarch64_get_vec_float (cpu, vn, i)
3524 - aarch64_get_vec_float (cpu, vm, i));
3534 for (i = 0; i < 2; i++)
3535 aarch64_set_vec_double (cpu, vd, i,
3536 aarch64_get_vec_double (cpu, vm, i)
3537 + aarch64_get_vec_double (cpu, vn, i));
3541 for (i = 0; i < (full ? 4 : 2); i++)
3542 aarch64_set_vec_float (cpu, vd, i,
3543 aarch64_get_vec_float (cpu, vm, i)
3544 + aarch64_get_vec_float (cpu, vn, i));
3550 do_vec_add (sim_cpu *cpu)
3553 instr[30] = full/half selector
3554 instr[29,24] = 001110
3555 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3558 instr[15,10] = 100001
3562 unsigned vm = INSTR (20, 16);
3563 unsigned vn = INSTR (9, 5);
3564 unsigned vd = INSTR (4, 0);
3566 int full = INSTR (30, 30);
3568 NYI_assert (29, 24, 0x0E);
3569 NYI_assert (21, 21, 1);
3570 NYI_assert (15, 10, 0x21);
3572 switch (INSTR (23, 22))
3575 for (i = 0; i < (full ? 16 : 8); i++)
3576 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3577 + aarch64_get_vec_u8 (cpu, vm, i));
3581 for (i = 0; i < (full ? 8 : 4); i++)
3582 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3583 + aarch64_get_vec_u16 (cpu, vm, i));
3587 for (i = 0; i < (full ? 4 : 2); i++)
3588 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3589 + aarch64_get_vec_u32 (cpu, vm, i));
3595 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3596 + aarch64_get_vec_u64 (cpu, vm, 0));
3597 aarch64_set_vec_u64 (cpu, vd, 1,
3598 aarch64_get_vec_u64 (cpu, vn, 1)
3599 + aarch64_get_vec_u64 (cpu, vm, 1));
3605 do_vec_mul (sim_cpu *cpu)
3608 instr[30] = full/half selector
3609 instr[29,24] = 00 1110
3610 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3613 instr[15,10] = 10 0111
3617 unsigned vm = INSTR (20, 16);
3618 unsigned vn = INSTR (9, 5);
3619 unsigned vd = INSTR (4, 0);
3621 int full = INSTR (30, 30);
3623 NYI_assert (29, 24, 0x0E);
3624 NYI_assert (21, 21, 1);
3625 NYI_assert (15, 10, 0x27);
3627 switch (INSTR (23, 22))
3630 for (i = 0; i < (full ? 16 : 8); i++)
3632 uint16_t val = aarch64_get_vec_u8 (cpu, vn, i);
3633 val *= aarch64_get_vec_u8 (cpu, vm, i);
3635 aarch64_set_vec_u16 (cpu, vd, i, val);
3640 for (i = 0; i < (full ? 8 : 4); i++)
3642 uint32_t val = aarch64_get_vec_u16 (cpu, vn, i);
3643 val *= aarch64_get_vec_u16 (cpu, vm, i);
3645 aarch64_set_vec_u32 (cpu, vd, i, val);
3650 for (i = 0; i < (full ? 4 : 2); i++)
3652 uint64_t val = aarch64_get_vec_u32 (cpu, vn, i);
3653 val *= aarch64_get_vec_u32 (cpu, vm, i);
3655 aarch64_set_vec_u64 (cpu, vd, i, val);
3665 do_vec_MLA (sim_cpu *cpu)
3668 instr[30] = full/half selector
3669 instr[29,24] = 00 1110
3670 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3673 instr[15,10] = 1001 01
3677 unsigned vm = INSTR (20, 16);
3678 unsigned vn = INSTR (9, 5);
3679 unsigned vd = INSTR (4, 0);
3681 int full = INSTR (30, 30);
3683 NYI_assert (29, 24, 0x0E);
3684 NYI_assert (21, 21, 1);
3685 NYI_assert (15, 10, 0x25);
3687 switch (INSTR (23, 22))
3690 for (i = 0; i < (full ? 16 : 8); i++)
3692 uint16_t val = aarch64_get_vec_u8 (cpu, vn, i);
3693 val *= aarch64_get_vec_u8 (cpu, vm, i);
3694 val += aarch64_get_vec_u8 (cpu, vd, i);
3696 aarch64_set_vec_u16 (cpu, vd, i, val);
3701 for (i = 0; i < (full ? 8 : 4); i++)
3703 uint32_t val = aarch64_get_vec_u16 (cpu, vn, i);
3704 val *= aarch64_get_vec_u16 (cpu, vm, i);
3705 val += aarch64_get_vec_u16 (cpu, vd, i);
3707 aarch64_set_vec_u32 (cpu, vd, i, val);
3712 for (i = 0; i < (full ? 4 : 2); i++)
3714 uint64_t val = aarch64_get_vec_u32 (cpu, vn, i);
3715 val *= aarch64_get_vec_u32 (cpu, vm, i);
3716 val += aarch64_get_vec_u32 (cpu, vd, i);
3718 aarch64_set_vec_u64 (cpu, vd, i, val);
3728 fmaxnm (float a, float b)
3730 if (fpclassify (a) == FP_NORMAL)
3732 if (fpclassify (b) == FP_NORMAL)
3733 return a > b ? a : b;
3736 else if (fpclassify (b) == FP_NORMAL)
3742 fminnm (float a, float b)
3744 if (fpclassify (a) == FP_NORMAL)
3746 if (fpclassify (b) == FP_NORMAL)
3747 return a < b ? a : b;
3750 else if (fpclassify (b) == FP_NORMAL)
3756 dmaxnm (double a, double b)
3758 if (fpclassify (a) == FP_NORMAL)
3760 if (fpclassify (b) == FP_NORMAL)
3761 return a > b ? a : b;
3764 else if (fpclassify (b) == FP_NORMAL)
3770 dminnm (double a, double b)
3772 if (fpclassify (a) == FP_NORMAL)
3774 if (fpclassify (b) == FP_NORMAL)
3775 return a < b ? a : b;
3778 else if (fpclassify (b) == FP_NORMAL)
3784 do_vec_FminmaxNMP (sim_cpu *cpu)
3787 instr [30] = half (0)/full (1)
3788 instr [29,24] = 10 1110
3789 instr [23] = max(0)/min(1)
3790 instr [22] = float (0)/double (1)
3793 instr [15,10] = 1100 01
3795 instr [4.0] = Vd. */
3797 unsigned vm = INSTR (20, 16);
3798 unsigned vn = INSTR (9, 5);
3799 unsigned vd = INSTR (4, 0);
3800 int full = INSTR (30, 30);
3802 NYI_assert (29, 24, 0x2E);
3803 NYI_assert (21, 21, 1);
3804 NYI_assert (15, 10, 0x31);
3808 double (* fn)(double, double) = INSTR (23, 23)
3813 aarch64_set_vec_double (cpu, vd, 0,
3814 fn (aarch64_get_vec_double (cpu, vn, 0),
3815 aarch64_get_vec_double (cpu, vn, 1)));
3816 aarch64_set_vec_double (cpu, vd, 0,
3817 fn (aarch64_get_vec_double (cpu, vm, 0),
3818 aarch64_get_vec_double (cpu, vm, 1)));
3822 float (* fn)(float, float) = INSTR (23, 23)
3825 aarch64_set_vec_float (cpu, vd, 0,
3826 fn (aarch64_get_vec_float (cpu, vn, 0),
3827 aarch64_get_vec_float (cpu, vn, 1)));
3829 aarch64_set_vec_float (cpu, vd, 1,
3830 fn (aarch64_get_vec_float (cpu, vn, 2),
3831 aarch64_get_vec_float (cpu, vn, 3)));
3833 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3834 fn (aarch64_get_vec_float (cpu, vm, 0),
3835 aarch64_get_vec_float (cpu, vm, 1)));
3837 aarch64_set_vec_float (cpu, vd, 3,
3838 fn (aarch64_get_vec_float (cpu, vm, 2),
3839 aarch64_get_vec_float (cpu, vm, 3)));
3844 do_vec_AND (sim_cpu *cpu)
3847 instr[30] = half (0)/full (1)
3848 instr[29,21] = 001110001
3850 instr[15,10] = 000111
3854 unsigned vm = INSTR (20, 16);
3855 unsigned vn = INSTR (9, 5);
3856 unsigned vd = INSTR (4, 0);
3858 int full = INSTR (30, 30);
3860 NYI_assert (29, 21, 0x071);
3861 NYI_assert (15, 10, 0x07);
3863 for (i = 0; i < (full ? 4 : 2); i++)
3864 aarch64_set_vec_u32 (cpu, vd, i,
3865 aarch64_get_vec_u32 (cpu, vn, i)
3866 & aarch64_get_vec_u32 (cpu, vm, i));
3870 do_vec_BSL (sim_cpu *cpu)
3873 instr[30] = half (0)/full (1)
3874 instr[29,21] = 101110011
3876 instr[15,10] = 000111
3880 unsigned vm = INSTR (20, 16);
3881 unsigned vn = INSTR (9, 5);
3882 unsigned vd = INSTR (4, 0);
3884 int full = INSTR (30, 30);
3886 NYI_assert (29, 21, 0x173);
3887 NYI_assert (15, 10, 0x07);
3889 for (i = 0; i < (full ? 16 : 8); i++)
3890 aarch64_set_vec_u8 (cpu, vd, i,
3891 ( aarch64_get_vec_u8 (cpu, vd, i)
3892 & aarch64_get_vec_u8 (cpu, vn, i))
3893 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
3894 & aarch64_get_vec_u8 (cpu, vm, i)));
3898 do_vec_EOR (sim_cpu *cpu)
3901 instr[30] = half (0)/full (1)
3902 instr[29,21] = 10 1110 001
3904 instr[15,10] = 000111
3908 unsigned vm = INSTR (20, 16);
3909 unsigned vn = INSTR (9, 5);
3910 unsigned vd = INSTR (4, 0);
3912 int full = INSTR (30, 30);
3914 NYI_assert (29, 21, 0x171);
3915 NYI_assert (15, 10, 0x07);
3917 for (i = 0; i < (full ? 4 : 2); i++)
3918 aarch64_set_vec_u32 (cpu, vd, i,
3919 aarch64_get_vec_u32 (cpu, vn, i)
3920 ^ aarch64_get_vec_u32 (cpu, vm, i));
3924 do_vec_bit (sim_cpu *cpu)
3927 instr[30] = half (0)/full (1)
3928 instr[29,23] = 10 1110 1
3929 instr[22] = BIT (0) / BIF (1)
3932 instr[15,10] = 0001 11
3936 unsigned vm = INSTR (20, 16);
3937 unsigned vn = INSTR (9, 5);
3938 unsigned vd = INSTR (4, 0);
3939 unsigned full = INSTR (30, 30);
3940 unsigned test_false = INSTR (22, 22);
3943 NYI_assert (29, 23, 0x5D);
3944 NYI_assert (21, 21, 1);
3945 NYI_assert (15, 10, 0x07);
3949 for (i = 0; i < (full ? 16 : 8); i++)
3950 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
3951 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3955 for (i = 0; i < (full ? 16 : 8); i++)
3956 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
3957 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3962 do_vec_ORN (sim_cpu *cpu)
3965 instr[30] = half (0)/full (1)
3966 instr[29,21] = 00 1110 111
3968 instr[15,10] = 00 0111
3972 unsigned vm = INSTR (20, 16);
3973 unsigned vn = INSTR (9, 5);
3974 unsigned vd = INSTR (4, 0);
3976 int full = INSTR (30, 30);
3978 NYI_assert (29, 21, 0x077);
3979 NYI_assert (15, 10, 0x07);
3981 for (i = 0; i < (full ? 16 : 8); i++)
3982 aarch64_set_vec_u8 (cpu, vd, i,
3983 aarch64_get_vec_u8 (cpu, vn, i)
3984 | ~ aarch64_get_vec_u8 (cpu, vm, i));
3988 do_vec_ORR (sim_cpu *cpu)
3991 instr[30] = half (0)/full (1)
3992 instr[29,21] = 00 1110 101
3994 instr[15,10] = 0001 11
3998 unsigned vm = INSTR (20, 16);
3999 unsigned vn = INSTR (9, 5);
4000 unsigned vd = INSTR (4, 0);
4002 int full = INSTR (30, 30);
4004 NYI_assert (29, 21, 0x075);
4005 NYI_assert (15, 10, 0x07);
4007 for (i = 0; i < (full ? 16 : 8); i++)
4008 aarch64_set_vec_u8 (cpu, vd, i,
4009 aarch64_get_vec_u8 (cpu, vn, i)
4010 | aarch64_get_vec_u8 (cpu, vm, i));
4014 do_vec_BIC (sim_cpu *cpu)
4017 instr[30] = half (0)/full (1)
4018 instr[29,21] = 00 1110 011
4020 instr[15,10] = 00 0111
4024 unsigned vm = INSTR (20, 16);
4025 unsigned vn = INSTR (9, 5);
4026 unsigned vd = INSTR (4, 0);
4028 int full = INSTR (30, 30);
4030 NYI_assert (29, 21, 0x073);
4031 NYI_assert (15, 10, 0x07);
4033 for (i = 0; i < (full ? 16 : 8); i++)
4034 aarch64_set_vec_u8 (cpu, vd, i,
4035 aarch64_get_vec_u8 (cpu, vn, i)
4036 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4040 do_vec_XTN (sim_cpu *cpu)
4043 instr[30] = first part (0)/ second part (1)
4044 instr[29,24] = 00 1110
4045 instr[23,22] = size: byte(00), half(01), word (10)
4046 instr[21,10] = 1000 0100 1010
4050 unsigned vs = INSTR (9, 5);
4051 unsigned vd = INSTR (4, 0);
4052 unsigned bias = INSTR (30, 30);
4055 NYI_assert (29, 24, 0x0E);
4056 NYI_assert (21, 10, 0x84A);
4058 switch (INSTR (23, 22))
4062 for (i = 0; i < 8; i++)
4063 aarch64_set_vec_u8 (cpu, vd, i + 8,
4064 aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4066 for (i = 0; i < 8; i++)
4067 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4072 for (i = 0; i < 4; i++)
4073 aarch64_set_vec_u16 (cpu, vd, i + 4,
4074 aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4076 for (i = 0; i < 4; i++)
4077 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4082 for (i = 0; i < 2; i++)
4083 aarch64_set_vec_u32 (cpu, vd, i + 4,
4084 aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4086 for (i = 0; i < 2; i++)
4087 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4093 do_vec_maxv (sim_cpu *cpu)
4096 instr[30] = half(0)/full(1)
4097 instr[29] = signed (0)/unsigned(1)
4098 instr[28,24] = 0 1110
4099 instr[23,22] = size: byte(00), half(01), word (10)
4101 instr[20,17] = 1 000
4102 instr[16] = max(0)/min(1)
4103 instr[15,10] = 1010 10
4104 instr[9,5] = V source
4105 instr[4.0] = R dest. */
4107 unsigned vs = INSTR (9, 5);
4108 unsigned rd = INSTR (4, 0);
4109 unsigned full = INSTR (30, 30);
4112 NYI_assert (28, 24, 0x0E);
4113 NYI_assert (21, 21, 1);
4114 NYI_assert (20, 17, 8);
4115 NYI_assert (15, 10, 0x2A);
4117 switch ((INSTR (29, 29) << 1)
4120 case 0: /* SMAXV. */
4123 switch (INSTR (23, 22))
4126 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4127 for (i = 1; i < (full ? 16 : 8); i++)
4128 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4131 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4132 for (i = 1; i < (full ? 8 : 4); i++)
4133 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4136 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4137 for (i = 1; i < (full ? 4 : 2); i++)
4138 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4143 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4147 case 1: /* SMINV. */
4150 switch (INSTR (23, 22))
4153 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4154 for (i = 1; i < (full ? 16 : 8); i++)
4155 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4158 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4159 for (i = 1; i < (full ? 8 : 4); i++)
4160 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4163 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4164 for (i = 1; i < (full ? 4 : 2); i++)
4165 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4171 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4175 case 2: /* UMAXV. */
4178 switch (INSTR (23, 22))
4181 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4182 for (i = 1; i < (full ? 16 : 8); i++)
4183 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4186 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4187 for (i = 1; i < (full ? 8 : 4); i++)
4188 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4191 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4192 for (i = 1; i < (full ? 4 : 2); i++)
4193 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4199 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4203 case 3: /* UMINV. */
4206 switch (INSTR (23, 22))
4209 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4210 for (i = 1; i < (full ? 16 : 8); i++)
4211 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4214 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4215 for (i = 1; i < (full ? 8 : 4); i++)
4216 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4219 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4220 for (i = 1; i < (full ? 4 : 2); i++)
4221 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4227 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4234 do_vec_fminmaxV (sim_cpu *cpu)
4236 /* instr[31,24] = 0110 1110
4237 instr[23] = max(0)/min(1)
4238 instr[22,14] = 011 0000 11
4239 instr[13,12] = nm(00)/normal(11)
4241 instr[9,5] = V source
4242 instr[4.0] = R dest. */
4244 unsigned vs = INSTR (9, 5);
4245 unsigned rd = INSTR (4, 0);
4247 float res = aarch64_get_vec_float (cpu, vs, 0);
4249 NYI_assert (31, 24, 0x6E);
4250 NYI_assert (22, 14, 0x0C3);
4251 NYI_assert (11, 10, 2);
4255 switch (INSTR (13, 12))
4257 case 0: /* FMNINNMV. */
4258 for (i = 1; i < 4; i++)
4259 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4262 case 3: /* FMINV. */
4263 for (i = 1; i < 4; i++)
4264 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4273 switch (INSTR (13, 12))
4275 case 0: /* FMNAXNMV. */
4276 for (i = 1; i < 4; i++)
4277 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4280 case 3: /* FMAXV. */
4281 for (i = 1; i < 4; i++)
4282 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4290 aarch64_set_FP_float (cpu, rd, res);
4294 do_vec_Fminmax (sim_cpu *cpu)
4297 instr[30] = half(0)/full(1)
4298 instr[29,24] = 00 1110
4299 instr[23] = max(0)/min(1)
4300 instr[22] = float(0)/double(1)
4304 instr[13,12] = nm(00)/normal(11)
4309 unsigned vm = INSTR (20, 16);
4310 unsigned vn = INSTR (9, 5);
4311 unsigned vd = INSTR (4, 0);
4312 unsigned full = INSTR (30, 30);
4313 unsigned min = INSTR (23, 23);
4316 NYI_assert (29, 24, 0x0E);
4317 NYI_assert (21, 21, 1);
4318 NYI_assert (15, 14, 3);
4319 NYI_assert (11, 10, 1);
4323 double (* func)(double, double);
4328 if (INSTR (13, 12) == 0)
4329 func = min ? dminnm : dmaxnm;
4330 else if (INSTR (13, 12) == 3)
4331 func = min ? fmin : fmax;
4335 for (i = 0; i < 2; i++)
4336 aarch64_set_vec_double (cpu, vd, i,
4337 func (aarch64_get_vec_double (cpu, vn, i),
4338 aarch64_get_vec_double (cpu, vm, i)));
4342 float (* func)(float, float);
4344 if (INSTR (13, 12) == 0)
4345 func = min ? fminnm : fmaxnm;
4346 else if (INSTR (13, 12) == 3)
4347 func = min ? fminf : fmaxf;
4351 for (i = 0; i < (full ? 4 : 2); i++)
4352 aarch64_set_vec_float (cpu, vd, i,
4353 func (aarch64_get_vec_float (cpu, vn, i),
4354 aarch64_get_vec_float (cpu, vm, i)));
4359 do_vec_SCVTF (sim_cpu *cpu)
4363 instr[29,23] = 00 1110 0
4364 instr[22] = float(0)/double(1)
4365 instr[21,10] = 10 0001 1101 10
4369 unsigned vn = INSTR (9, 5);
4370 unsigned vd = INSTR (4, 0);
4371 unsigned full = INSTR (30, 30);
4372 unsigned size = INSTR (22, 22);
4375 NYI_assert (29, 23, 0x1C);
4376 NYI_assert (21, 10, 0x876);
4383 for (i = 0; i < 2; i++)
4385 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4386 aarch64_set_vec_double (cpu, vd, i, val);
4391 for (i = 0; i < (full ? 4 : 2); i++)
4393 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4394 aarch64_set_vec_float (cpu, vd, i, val);
4399 #define VEC_CMP(SOURCE, CMP) \
4405 for (i = 0; i < (full ? 16 : 8); i++) \
4406 aarch64_set_vec_u8 (cpu, vd, i, \
4407 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4409 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4413 for (i = 0; i < (full ? 8 : 4); i++) \
4414 aarch64_set_vec_u16 (cpu, vd, i, \
4415 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4417 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4421 for (i = 0; i < (full ? 4 : 2); i++) \
4422 aarch64_set_vec_u32 (cpu, vd, i, \
4423 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4425 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4431 for (i = 0; i < 2; i++) \
4432 aarch64_set_vec_u64 (cpu, vd, i, \
4433 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4435 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4442 #define VEC_CMP0(SOURCE, CMP) \
4448 for (i = 0; i < (full ? 16 : 8); i++) \
4449 aarch64_set_vec_u8 (cpu, vd, i, \
4450 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4454 for (i = 0; i < (full ? 8 : 4); i++) \
4455 aarch64_set_vec_u16 (cpu, vd, i, \
4456 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4460 for (i = 0; i < (full ? 4 : 2); i++) \
4461 aarch64_set_vec_u32 (cpu, vd, i, \
4462 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4468 for (i = 0; i < 2; i++) \
4469 aarch64_set_vec_u64 (cpu, vd, i, \
4470 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4471 CMP 0 ? -1ULL : 0); \
4477 #define VEC_FCMP0(CMP) \
4482 if (INSTR (22, 22)) \
4486 for (i = 0; i < 2; i++) \
4487 aarch64_set_vec_u64 (cpu, vd, i, \
4488 aarch64_get_vec_double (cpu, vn, i) \
4489 CMP 0.0 ? -1 : 0); \
4493 for (i = 0; i < (full ? 4 : 2); i++) \
4494 aarch64_set_vec_u32 (cpu, vd, i, \
4495 aarch64_get_vec_float (cpu, vn, i) \
4496 CMP 0.0 ? -1 : 0); \
4502 #define VEC_FCMP(CMP) \
4505 if (INSTR (22, 22)) \
4509 for (i = 0; i < 2; i++) \
4510 aarch64_set_vec_u64 (cpu, vd, i, \
4511 aarch64_get_vec_double (cpu, vn, i) \
4513 aarch64_get_vec_double (cpu, vm, i) \
4518 for (i = 0; i < (full ? 4 : 2); i++) \
4519 aarch64_set_vec_u32 (cpu, vd, i, \
4520 aarch64_get_vec_float (cpu, vn, i) \
4522 aarch64_get_vec_float (cpu, vm, i) \
4530 do_vec_compare (sim_cpu *cpu)
4533 instr[30] = half(0)/full(1)
4534 instr[29] = part-of-comparison-type
4535 instr[28,24] = 0 1110
4536 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4537 type of float compares: single (-0) / double (-1)
4539 instr[20,16] = Vm or 00000 (compare vs 0)
4540 instr[15,10] = part-of-comparison-type
4544 int full = INSTR (30, 30);
4545 int size = INSTR (23, 22);
4546 unsigned vm = INSTR (20, 16);
4547 unsigned vn = INSTR (9, 5);
4548 unsigned vd = INSTR (4, 0);
4551 NYI_assert (28, 24, 0x0E);
4552 NYI_assert (21, 21, 1);
4556 || ((INSTR (11, 11) == 0
4557 && INSTR (10, 10) == 0)))
4559 /* A compare vs 0. */
4562 if (INSTR (15, 10) == 0x2A)
4564 else if (INSTR (15, 10) == 0x32
4565 || INSTR (15, 10) == 0x3E)
4566 do_vec_fminmaxV (cpu);
4567 else if (INSTR (29, 23) == 0x1C
4568 && INSTR (21, 10) == 0x876)
4578 /* A floating point compare. */
4579 unsigned decode = (INSTR (29, 29) << 5)
4580 | (INSTR (23, 23) << 4)
4583 NYI_assert (15, 15, 1);
4587 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4588 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4589 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4590 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4591 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4592 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4593 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4594 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4602 unsigned decode = (INSTR (29, 29) << 6)
4607 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4608 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4609 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4610 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4611 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4612 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4613 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4614 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4615 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4616 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4626 do_vec_SSHL (sim_cpu *cpu)
4629 instr[30] = first part (0)/ second part (1)
4630 instr[29,24] = 00 1110
4631 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4634 instr[15,10] = 0100 01
4638 unsigned full = INSTR (30, 30);
4639 unsigned vm = INSTR (20, 16);
4640 unsigned vn = INSTR (9, 5);
4641 unsigned vd = INSTR (4, 0);
4645 NYI_assert (29, 24, 0x0E);
4646 NYI_assert (21, 21, 1);
4647 NYI_assert (15, 10, 0x11);
4649 /* FIXME: What is a signed shift left in this context ?. */
4651 switch (INSTR (23, 22))
4654 for (i = 0; i < (full ? 16 : 8); i++)
4656 shift = aarch64_get_vec_s8 (cpu, vm, i);
4658 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4661 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4667 for (i = 0; i < (full ? 8 : 4); i++)
4669 shift = aarch64_get_vec_s8 (cpu, vm, i);
4671 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4674 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4680 for (i = 0; i < (full ? 4 : 2); i++)
4682 shift = aarch64_get_vec_s8 (cpu, vm, i);
4684 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4687 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4695 for (i = 0; i < 2; i++)
4697 shift = aarch64_get_vec_s8 (cpu, vm, i);
4699 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4702 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4710 do_vec_USHL (sim_cpu *cpu)
4713 instr[30] = first part (0)/ second part (1)
4714 instr[29,24] = 10 1110
4715 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4718 instr[15,10] = 0100 01
4722 unsigned full = INSTR (30, 30);
4723 unsigned vm = INSTR (20, 16);
4724 unsigned vn = INSTR (9, 5);
4725 unsigned vd = INSTR (4, 0);
4729 NYI_assert (29, 24, 0x2E);
4730 NYI_assert (15, 10, 0x11);
4732 switch (INSTR (23, 22))
4735 for (i = 0; i < (full ? 16 : 8); i++)
4737 shift = aarch64_get_vec_s8 (cpu, vm, i);
4739 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4742 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4748 for (i = 0; i < (full ? 8 : 4); i++)
4750 shift = aarch64_get_vec_s8 (cpu, vm, i);
4752 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4755 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4761 for (i = 0; i < (full ? 4 : 2); i++)
4763 shift = aarch64_get_vec_s8 (cpu, vm, i);
4765 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4768 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4776 for (i = 0; i < 2; i++)
4778 shift = aarch64_get_vec_s8 (cpu, vm, i);
4780 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4783 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4791 do_vec_FMLA (sim_cpu *cpu)
4794 instr[30] = full/half selector
4795 instr[29,23] = 0011100
4796 instr[22] = size: 0=>float, 1=>double
4799 instr[15,10] = 1100 11
4803 unsigned vm = INSTR (20, 16);
4804 unsigned vn = INSTR (9, 5);
4805 unsigned vd = INSTR (4, 0);
4807 int full = INSTR (30, 30);
4809 NYI_assert (29, 23, 0x1C);
4810 NYI_assert (21, 21, 1);
4811 NYI_assert (15, 10, 0x33);
4817 for (i = 0; i < 2; i++)
4818 aarch64_set_vec_double (cpu, vd, i,
4819 aarch64_get_vec_double (cpu, vn, i) *
4820 aarch64_get_vec_double (cpu, vm, i) +
4821 aarch64_get_vec_double (cpu, vd, i));
4825 for (i = 0; i < (full ? 4 : 2); i++)
4826 aarch64_set_vec_float (cpu, vd, i,
4827 aarch64_get_vec_float (cpu, vn, i) *
4828 aarch64_get_vec_float (cpu, vm, i) +
4829 aarch64_get_vec_float (cpu, vd, i));
4834 do_vec_max (sim_cpu *cpu)
4837 instr[30] = full/half selector
4838 instr[29] = SMAX (0) / UMAX (1)
4839 instr[28,24] = 0 1110
4840 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4843 instr[15,10] = 0110 01
4847 unsigned vm = INSTR (20, 16);
4848 unsigned vn = INSTR (9, 5);
4849 unsigned vd = INSTR (4, 0);
4851 int full = INSTR (30, 30);
4853 NYI_assert (28, 24, 0x0E);
4854 NYI_assert (21, 21, 1);
4855 NYI_assert (15, 10, 0x19);
4859 switch (INSTR (23, 22))
4862 for (i = 0; i < (full ? 16 : 8); i++)
4863 aarch64_set_vec_u8 (cpu, vd, i,
4864 aarch64_get_vec_u8 (cpu, vn, i)
4865 > aarch64_get_vec_u8 (cpu, vm, i)
4866 ? aarch64_get_vec_u8 (cpu, vn, i)
4867 : aarch64_get_vec_u8 (cpu, vm, i));
4871 for (i = 0; i < (full ? 8 : 4); i++)
4872 aarch64_set_vec_u16 (cpu, vd, i,
4873 aarch64_get_vec_u16 (cpu, vn, i)
4874 > aarch64_get_vec_u16 (cpu, vm, i)
4875 ? aarch64_get_vec_u16 (cpu, vn, i)
4876 : aarch64_get_vec_u16 (cpu, vm, i));
4880 for (i = 0; i < (full ? 4 : 2); i++)
4881 aarch64_set_vec_u32 (cpu, vd, i,
4882 aarch64_get_vec_u32 (cpu, vn, i)
4883 > aarch64_get_vec_u32 (cpu, vm, i)
4884 ? aarch64_get_vec_u32 (cpu, vn, i)
4885 : aarch64_get_vec_u32 (cpu, vm, i));
4894 switch (INSTR (23, 22))
4897 for (i = 0; i < (full ? 16 : 8); i++)
4898 aarch64_set_vec_s8 (cpu, vd, i,
4899 aarch64_get_vec_s8 (cpu, vn, i)
4900 > aarch64_get_vec_s8 (cpu, vm, i)
4901 ? aarch64_get_vec_s8 (cpu, vn, i)
4902 : aarch64_get_vec_s8 (cpu, vm, i));
4906 for (i = 0; i < (full ? 8 : 4); i++)
4907 aarch64_set_vec_s16 (cpu, vd, i,
4908 aarch64_get_vec_s16 (cpu, vn, i)
4909 > aarch64_get_vec_s16 (cpu, vm, i)
4910 ? aarch64_get_vec_s16 (cpu, vn, i)
4911 : aarch64_get_vec_s16 (cpu, vm, i));
4915 for (i = 0; i < (full ? 4 : 2); i++)
4916 aarch64_set_vec_s32 (cpu, vd, i,
4917 aarch64_get_vec_s32 (cpu, vn, i)
4918 > aarch64_get_vec_s32 (cpu, vm, i)
4919 ? aarch64_get_vec_s32 (cpu, vn, i)
4920 : aarch64_get_vec_s32 (cpu, vm, i));
4930 do_vec_min (sim_cpu *cpu)
4933 instr[30] = full/half selector
4934 instr[29] = SMIN (0) / UMIN (1)
4935 instr[28,24] = 0 1110
4936 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4939 instr[15,10] = 0110 11
4943 unsigned vm = INSTR (20, 16);
4944 unsigned vn = INSTR (9, 5);
4945 unsigned vd = INSTR (4, 0);
4947 int full = INSTR (30, 30);
4949 NYI_assert (28, 24, 0x0E);
4950 NYI_assert (21, 21, 1);
4951 NYI_assert (15, 10, 0x1B);
4955 switch (INSTR (23, 22))
4958 for (i = 0; i < (full ? 16 : 8); i++)
4959 aarch64_set_vec_u8 (cpu, vd, i,
4960 aarch64_get_vec_u8 (cpu, vn, i)
4961 < aarch64_get_vec_u8 (cpu, vm, i)
4962 ? aarch64_get_vec_u8 (cpu, vn, i)
4963 : aarch64_get_vec_u8 (cpu, vm, i));
4967 for (i = 0; i < (full ? 8 : 4); i++)
4968 aarch64_set_vec_u16 (cpu, vd, i,
4969 aarch64_get_vec_u16 (cpu, vn, i)
4970 < aarch64_get_vec_u16 (cpu, vm, i)
4971 ? aarch64_get_vec_u16 (cpu, vn, i)
4972 : aarch64_get_vec_u16 (cpu, vm, i));
4976 for (i = 0; i < (full ? 4 : 2); i++)
4977 aarch64_set_vec_u32 (cpu, vd, i,
4978 aarch64_get_vec_u32 (cpu, vn, i)
4979 < aarch64_get_vec_u32 (cpu, vm, i)
4980 ? aarch64_get_vec_u32 (cpu, vn, i)
4981 : aarch64_get_vec_u32 (cpu, vm, i));
4990 switch (INSTR (23, 22))
4993 for (i = 0; i < (full ? 16 : 8); i++)
4994 aarch64_set_vec_s8 (cpu, vd, i,
4995 aarch64_get_vec_s8 (cpu, vn, i)
4996 < aarch64_get_vec_s8 (cpu, vm, i)
4997 ? aarch64_get_vec_s8 (cpu, vn, i)
4998 : aarch64_get_vec_s8 (cpu, vm, i));
5002 for (i = 0; i < (full ? 8 : 4); i++)
5003 aarch64_set_vec_s16 (cpu, vd, i,
5004 aarch64_get_vec_s16 (cpu, vn, i)
5005 < aarch64_get_vec_s16 (cpu, vm, i)
5006 ? aarch64_get_vec_s16 (cpu, vn, i)
5007 : aarch64_get_vec_s16 (cpu, vm, i));
5011 for (i = 0; i < (full ? 4 : 2); i++)
5012 aarch64_set_vec_s32 (cpu, vd, i,
5013 aarch64_get_vec_s32 (cpu, vn, i)
5014 < aarch64_get_vec_s32 (cpu, vm, i)
5015 ? aarch64_get_vec_s32 (cpu, vn, i)
5016 : aarch64_get_vec_s32 (cpu, vm, i));
5026 do_vec_sub_long (sim_cpu *cpu)
5029 instr[30] = lower (0) / upper (1)
5030 instr[29] = signed (0) / unsigned (1)
5031 instr[28,24] = 0 1110
5032 instr[23,22] = size: bytes (00), half (01), word (10)
5035 instr[15,10] = 0010 00
5037 instr[4,0] = V dest. */
5039 unsigned size = INSTR (23, 22);
5040 unsigned vm = INSTR (20, 16);
5041 unsigned vn = INSTR (9, 5);
5042 unsigned vd = INSTR (4, 0);
5046 NYI_assert (28, 24, 0x0E);
5047 NYI_assert (21, 21, 1);
5048 NYI_assert (15, 10, 0x08);
5053 switch (INSTR (30, 29))
5055 case 2: /* SSUBL2. */
5057 case 0: /* SSUBL. */
5062 for (i = 0; i < 8; i++)
5063 aarch64_set_vec_s16 (cpu, vd, i,
5064 aarch64_get_vec_s8 (cpu, vn, i + bias)
5065 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5070 for (i = 0; i < 4; i++)
5071 aarch64_set_vec_s32 (cpu, vd, i,
5072 aarch64_get_vec_s16 (cpu, vn, i + bias)
5073 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5077 for (i = 0; i < 2; i++)
5078 aarch64_set_vec_s64 (cpu, vd, i,
5079 aarch64_get_vec_s32 (cpu, vn, i + bias)
5080 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5088 case 3: /* USUBL2. */
5090 case 1: /* USUBL. */
5095 for (i = 0; i < 8; i++)
5096 aarch64_set_vec_u16 (cpu, vd, i,
5097 aarch64_get_vec_u8 (cpu, vn, i + bias)
5098 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5103 for (i = 0; i < 4; i++)
5104 aarch64_set_vec_u32 (cpu, vd, i,
5105 aarch64_get_vec_u16 (cpu, vn, i + bias)
5106 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5110 for (i = 0; i < 2; i++)
5111 aarch64_set_vec_u64 (cpu, vd, i,
5112 aarch64_get_vec_u32 (cpu, vn, i + bias)
5113 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5124 do_vec_ADDP (sim_cpu *cpu)
5127 instr[30] = half(0)/full(1)
5128 instr[29,24] = 00 1110
5129 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5132 instr[15,10] = 1011 11
5134 instr[4,0] = V dest. */
5138 unsigned full = INSTR (30, 30);
5139 unsigned size = INSTR (23, 22);
5140 unsigned vm = INSTR (20, 16);
5141 unsigned vn = INSTR (9, 5);
5142 unsigned vd = INSTR (4, 0);
5145 NYI_assert (29, 24, 0x0E);
5146 NYI_assert (21, 21, 1);
5147 NYI_assert (15, 10, 0x2F);
5149 /* Make copies of the source registers in case vd == vn/vm. */
5150 copy_vn = cpu->fr[vn];
5151 copy_vm = cpu->fr[vm];
5156 range = full ? 8 : 4;
5157 for (i = 0; i < range; i++)
5159 aarch64_set_vec_u8 (cpu, vd, i,
5160 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5161 aarch64_set_vec_u8 (cpu, vd, i + range,
5162 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5167 range = full ? 4 : 2;
5168 for (i = 0; i < range; i++)
5170 aarch64_set_vec_u16 (cpu, vd, i,
5171 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5172 aarch64_set_vec_u16 (cpu, vd, i + range,
5173 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5178 range = full ? 2 : 1;
5179 for (i = 0; i < range; i++)
5181 aarch64_set_vec_u32 (cpu, vd, i,
5182 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5183 aarch64_set_vec_u32 (cpu, vd, i + range,
5184 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5191 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5192 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5198 do_vec_UMOV (sim_cpu *cpu)
5201 instr[30] = 32-bit(0)/64-bit(1)
5202 instr[29,21] = 00 1110 000
5203 insrt[20,16] = size & index
5204 instr[15,10] = 0011 11
5205 instr[9,5] = V source
5206 instr[4,0] = R dest. */
5208 unsigned vs = INSTR (9, 5);
5209 unsigned rd = INSTR (4, 0);
5212 NYI_assert (29, 21, 0x070);
5213 NYI_assert (15, 10, 0x0F);
5217 /* Byte transfer. */
5218 index = INSTR (20, 17);
5219 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5220 aarch64_get_vec_u8 (cpu, vs, index));
5222 else if (INSTR (17, 17))
5224 index = INSTR (20, 18);
5225 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5226 aarch64_get_vec_u16 (cpu, vs, index));
5228 else if (INSTR (18, 18))
5230 index = INSTR (20, 19);
5231 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5232 aarch64_get_vec_u32 (cpu, vs, index));
5236 if (INSTR (30, 30) != 1)
5239 index = INSTR (20, 20);
5240 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5241 aarch64_get_vec_u64 (cpu, vs, index));
5246 do_vec_FABS (sim_cpu *cpu)
5249 instr[30] = half(0)/full(1)
5250 instr[29,23] = 00 1110 1
5251 instr[22] = float(0)/double(1)
5252 instr[21,16] = 10 0000
5253 instr[15,10] = 1111 10
5257 unsigned vn = INSTR (9, 5);
5258 unsigned vd = INSTR (4, 0);
5259 unsigned full = INSTR (30, 30);
5262 NYI_assert (29, 23, 0x1D);
5263 NYI_assert (21, 10, 0x83E);
5270 for (i = 0; i < 2; i++)
5271 aarch64_set_vec_double (cpu, vd, i,
5272 fabs (aarch64_get_vec_double (cpu, vn, i)));
5276 for (i = 0; i < (full ? 4 : 2); i++)
5277 aarch64_set_vec_float (cpu, vd, i,
5278 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5283 do_vec_FCVTZS (sim_cpu *cpu)
5286 instr[30] = half (0) / all (1)
5287 instr[29,23] = 00 1110 1
5288 instr[22] = single (0) / double (1)
5289 instr[21,10] = 10 0001 1011 10
5293 unsigned rn = INSTR (9, 5);
5294 unsigned rd = INSTR (4, 0);
5295 unsigned full = INSTR (30, 30);
5298 NYI_assert (31, 31, 0);
5299 NYI_assert (29, 23, 0x1D);
5300 NYI_assert (21, 10, 0x86E);
5307 for (i = 0; i < 2; i++)
5308 aarch64_set_vec_s64 (cpu, rd, i,
5309 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5312 for (i = 0; i < (full ? 4 : 2); i++)
5313 aarch64_set_vec_s32 (cpu, rd, i,
5314 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5318 do_vec_op1 (sim_cpu *cpu)
5321 instr[30] = half/full
5322 instr[29,24] = 00 1110
5325 instr[15,10] = sub-opcode
5328 NYI_assert (29, 24, 0x0E);
5330 if (INSTR (21, 21) == 0)
5332 if (INSTR (23, 22) == 0)
5334 if (INSTR (30, 30) == 1
5335 && INSTR (17, 14) == 0
5336 && INSTR (12, 10) == 7)
5337 return do_vec_ins_2 (cpu);
5339 switch (INSTR (15, 10))
5341 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5342 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5343 case 0x07: do_vec_INS (cpu); return;
5344 case 0x0A: do_vec_TRN (cpu); return;
5347 if (INSTR (17, 16) == 0)
5349 do_vec_MOV_into_scalar (cpu);
5358 do_vec_TBL (cpu); return;
5362 do_vec_UZP (cpu); return;
5366 do_vec_ZIP (cpu); return;
5373 switch (INSTR (13, 10))
5375 case 0x6: do_vec_UZP (cpu); return;
5376 case 0xE: do_vec_ZIP (cpu); return;
5377 case 0xA: do_vec_TRN (cpu); return;
5378 case 0xF: do_vec_UMOV (cpu); return;
5383 switch (INSTR (15, 10))
5386 switch (INSTR (23, 21))
5388 case 1: do_vec_AND (cpu); return;
5389 case 3: do_vec_BIC (cpu); return;
5390 case 5: do_vec_ORR (cpu); return;
5391 case 7: do_vec_ORN (cpu); return;
5395 case 0x08: do_vec_sub_long (cpu); return;
5396 case 0x0a: do_vec_XTN (cpu); return;
5397 case 0x11: do_vec_SSHL (cpu); return;
5398 case 0x19: do_vec_max (cpu); return;
5399 case 0x1B: do_vec_min (cpu); return;
5400 case 0x21: do_vec_add (cpu); return;
5401 case 0x25: do_vec_MLA (cpu); return;
5402 case 0x27: do_vec_mul (cpu); return;
5403 case 0x2F: do_vec_ADDP (cpu); return;
5404 case 0x30: do_vec_mull (cpu); return;
5405 case 0x33: do_vec_FMLA (cpu); return;
5406 case 0x35: do_vec_fadd (cpu); return;
5409 switch (INSTR (20, 16))
5411 case 0x00: do_vec_ABS (cpu); return;
5412 case 0x01: do_vec_FCVTZS (cpu); return;
5413 case 0x11: do_vec_ADDV (cpu); return;
5419 do_vec_Fminmax (cpu); return;
5431 do_vec_compare (cpu); return;
5434 do_vec_FABS (cpu); return;
5442 do_vec_xtl (sim_cpu *cpu)
5445 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5446 instr[28,22] = 0 1111 00
5447 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5448 instr[15,10] = 1010 01
5449 instr[9,5] = V source
5450 instr[4,0] = V dest. */
5452 unsigned vs = INSTR (9, 5);
5453 unsigned vd = INSTR (4, 0);
5454 unsigned i, shift, bias = 0;
5456 NYI_assert (28, 22, 0x3C);
5457 NYI_assert (15, 10, 0x29);
5459 switch (INSTR (30, 29))
5461 case 2: /* SXTL2, SSHLL2. */
5463 case 0: /* SXTL, SSHLL. */
5466 shift = INSTR (20, 16);
5468 (cpu, vd, 0, aarch64_get_vec_s32 (cpu, vs, bias) << shift);
5470 (cpu, vd, 1, aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift);
5472 else if (INSTR (20, 20))
5474 shift = INSTR (19, 16);
5476 for (i = 0; i < 4; i++)
5478 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vs, i + bias) << shift);
5482 NYI_assert (19, 19, 1);
5484 shift = INSTR (18, 16);
5486 for (i = 0; i < 8; i++)
5488 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vs, i + bias) << shift);
5492 case 3: /* UXTL2, USHLL2. */
5494 case 1: /* UXTL, USHLL. */
5497 shift = INSTR (20, 16);
5499 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, vs, bias) << shift);
5501 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift);
5503 else if (INSTR (20, 20))
5505 shift = INSTR (19, 16);
5507 for (i = 0; i < 4; i++)
5509 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i + bias) << shift);
5513 NYI_assert (19, 19, 1);
5515 shift = INSTR (18, 16);
5517 for (i = 0; i < 8; i++)
5519 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, i + bias) << shift);
5526 do_vec_SHL (sim_cpu *cpu)
5529 instr [30] = half(0)/full(1)
5530 instr [29,23] = 001 1110
5531 instr [22,16] = size and shift amount
5532 instr [15,10] = 01 0101
5534 instr [4, 0] = Vd. */
5537 int full = INSTR (30, 30);
5538 unsigned vs = INSTR (9, 5);
5539 unsigned vd = INSTR (4, 0);
5542 NYI_assert (29, 23, 0x1E);
5543 NYI_assert (15, 10, 0x15);
5547 shift = INSTR (21, 16);
5552 for (i = 0; i < 2; i++)
5554 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5555 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5563 shift = INSTR (20, 16);
5565 for (i = 0; i < (full ? 4 : 2); i++)
5567 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5568 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5576 shift = INSTR (19, 16);
5578 for (i = 0; i < (full ? 8 : 4); i++)
5580 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5581 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5587 if (INSTR (19, 19) == 0)
5590 shift = INSTR (18, 16);
5592 for (i = 0; i < (full ? 16 : 8); i++)
5594 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5595 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5600 do_vec_SSHR_USHR (sim_cpu *cpu)
5603 instr [30] = half(0)/full(1)
5604 instr [29] = signed(0)/unsigned(1)
5605 instr [28,23] = 0 1111 0
5606 instr [22,16] = size and shift amount
5607 instr [15,10] = 0000 01
5609 instr [4, 0] = Vd. */
5611 int full = INSTR (30, 30);
5612 int sign = ! INSTR (29, 29);
5613 unsigned shift = INSTR (22, 16);
5614 unsigned vs = INSTR (9, 5);
5615 unsigned vd = INSTR (4, 0);
5618 NYI_assert (28, 23, 0x1E);
5619 NYI_assert (15, 10, 0x01);
5623 shift = 128 - shift;
5629 for (i = 0; i < 2; i++)
5631 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5632 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5635 for (i = 0; i < 2; i++)
5637 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5638 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5649 for (i = 0; i < (full ? 4 : 2); i++)
5651 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5652 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5655 for (i = 0; i < (full ? 4 : 2); i++)
5657 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5658 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5669 for (i = 0; i < (full ? 8 : 4); i++)
5671 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5672 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5675 for (i = 0; i < (full ? 8 : 4); i++)
5677 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5678 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5684 if (INSTR (19, 19) == 0)
5690 for (i = 0; i < (full ? 16 : 8); i++)
5692 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5693 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5696 for (i = 0; i < (full ? 16 : 8); i++)
5698 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5699 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5704 do_vec_MUL_by_element (sim_cpu *cpu)
5707 instr[30] = half/full
5708 instr[29,24] = 00 1111
5719 unsigned full = INSTR (30, 30);
5720 unsigned L = INSTR (21, 21);
5721 unsigned H = INSTR (11, 11);
5722 unsigned vn = INSTR (9, 5);
5723 unsigned vd = INSTR (4, 0);
5724 unsigned size = INSTR (23, 22);
5729 NYI_assert (29, 24, 0x0F);
5730 NYI_assert (15, 12, 0x8);
5731 NYI_assert (10, 10, 0);
5737 /* 16 bit products. */
5742 index = (H << 2) | (L << 1) | INSTR (20, 20);
5743 vm = INSTR (19, 16);
5744 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5746 for (e = 0; e < (full ? 8 : 4); e ++)
5748 element1 = aarch64_get_vec_u16 (cpu, vn, e);
5749 product = element1 * element2;
5750 aarch64_set_vec_u16 (cpu, vd, e, product);
5757 /* 32 bit products. */
5762 index = (H << 1) | L;
5763 vm = INSTR (20, 16);
5764 element2 = aarch64_get_vec_u32 (cpu, vm, index);
5766 for (e = 0; e < (full ? 4 : 2); e ++)
5768 element1 = aarch64_get_vec_u32 (cpu, vn, e);
5769 product = element1 * element2;
5770 aarch64_set_vec_u32 (cpu, vd, e, product);
5781 do_vec_op2 (sim_cpu *cpu)
5784 instr[30] = half/full
5785 instr[29,24] = 00 1111
5787 instr[22,16] = element size & index
5788 instr[15,10] = sub-opcode
5792 NYI_assert (29, 24, 0x0F);
5794 if (INSTR (23, 23) != 0)
5796 switch (INSTR (15, 10))
5799 case 0x22: do_vec_MUL_by_element (cpu); return;
5805 switch (INSTR (15, 10))
5807 case 0x01: do_vec_SSHR_USHR (cpu); return;
5808 case 0x15: do_vec_SHL (cpu); return;
5810 case 0x22: do_vec_MUL_by_element (cpu); return;
5811 case 0x29: do_vec_xtl (cpu); return;
5818 do_vec_neg (sim_cpu *cpu)
5821 instr[30] = full(1)/half(0)
5822 instr[29,24] = 10 1110
5823 instr[23,22] = size: byte(00), half (01), word (10), long (11)
5824 instr[21,10] = 1000 0010 1110
5828 int full = INSTR (30, 30);
5829 unsigned vs = INSTR (9, 5);
5830 unsigned vd = INSTR (4, 0);
5833 NYI_assert (29, 24, 0x2E);
5834 NYI_assert (21, 10, 0x82E);
5836 switch (INSTR (23, 22))
5839 for (i = 0; i < (full ? 16 : 8); i++)
5840 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
5844 for (i = 0; i < (full ? 8 : 4); i++)
5845 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
5849 for (i = 0; i < (full ? 4 : 2); i++)
5850 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
5856 for (i = 0; i < 2; i++)
5857 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
5863 do_vec_sqrt (sim_cpu *cpu)
5866 instr[30] = full(1)/half(0)
5867 instr[29,23] = 101 1101
5868 instr[22] = single(0)/double(1)
5869 instr[21,10] = 1000 0111 1110
5873 int full = INSTR (30, 30);
5874 unsigned vs = INSTR (9, 5);
5875 unsigned vd = INSTR (4, 0);
5878 NYI_assert (29, 23, 0x5B);
5879 NYI_assert (21, 10, 0x87E);
5881 if (INSTR (22, 22) == 0)
5882 for (i = 0; i < (full ? 4 : 2); i++)
5883 aarch64_set_vec_float (cpu, vd, i,
5884 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
5886 for (i = 0; i < 2; i++)
5887 aarch64_set_vec_double (cpu, vd, i,
5888 sqrt (aarch64_get_vec_double (cpu, vs, i)));
5892 do_vec_mls_indexed (sim_cpu *cpu)
5895 instr[30] = half(0)/full(1)
5896 instr[29,24] = 10 1111
5897 instr[23,22] = 16-bit(01)/32-bit(10)
5898 instr[21,20+11] = index (if 16-bit)
5899 instr[21+11] = index (if 32-bit)
5902 instr[11] = part of index
5907 int full = INSTR (30, 30);
5908 unsigned vs = INSTR (9, 5);
5909 unsigned vd = INSTR (4, 0);
5910 unsigned vm = INSTR (20, 16);
5913 NYI_assert (15, 12, 4);
5914 NYI_assert (10, 10, 0);
5916 switch (INSTR (23, 22))
5926 elem = (INSTR (21, 20) << 1)
5928 val = aarch64_get_vec_u16 (cpu, vm, elem);
5930 for (i = 0; i < (full ? 8 : 4); i++)
5931 aarch64_set_vec_u32 (cpu, vd, i,
5932 aarch64_get_vec_u32 (cpu, vd, i) -
5933 (aarch64_get_vec_u32 (cpu, vs, i) * val));
5939 unsigned elem = (INSTR (21, 21) << 1)
5941 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
5943 for (i = 0; i < (full ? 4 : 2); i++)
5944 aarch64_set_vec_u64 (cpu, vd, i,
5945 aarch64_get_vec_u64 (cpu, vd, i) -
5946 (aarch64_get_vec_u64 (cpu, vs, i) * val));
5958 do_vec_SUB (sim_cpu *cpu)
5961 instr [30] = half(0)/full(1)
5962 instr [29,24] = 10 1110
5963 instr [23,22] = size: byte(00, half(01), word (10), long (11)
5966 instr [15,10] = 10 0001
5968 instr [4, 0] = Vd. */
5970 unsigned full = INSTR (30, 30);
5971 unsigned vm = INSTR (20, 16);
5972 unsigned vn = INSTR (9, 5);
5973 unsigned vd = INSTR (4, 0);
5976 NYI_assert (29, 24, 0x2E);
5977 NYI_assert (21, 21, 1);
5978 NYI_assert (15, 10, 0x21);
5980 switch (INSTR (23, 22))
5983 for (i = 0; i < (full ? 16 : 8); i++)
5984 aarch64_set_vec_s8 (cpu, vd, i,
5985 aarch64_get_vec_s8 (cpu, vn, i)
5986 - aarch64_get_vec_s8 (cpu, vm, i));
5990 for (i = 0; i < (full ? 8 : 4); i++)
5991 aarch64_set_vec_s16 (cpu, vd, i,
5992 aarch64_get_vec_s16 (cpu, vn, i)
5993 - aarch64_get_vec_s16 (cpu, vm, i));
5997 for (i = 0; i < (full ? 4 : 2); i++)
5998 aarch64_set_vec_s32 (cpu, vd, i,
5999 aarch64_get_vec_s32 (cpu, vn, i)
6000 - aarch64_get_vec_s32 (cpu, vm, i));
6007 for (i = 0; i < 2; i++)
6008 aarch64_set_vec_s64 (cpu, vd, i,
6009 aarch64_get_vec_s64 (cpu, vn, i)
6010 - aarch64_get_vec_s64 (cpu, vm, i));
6016 do_vec_MLS (sim_cpu *cpu)
6019 instr [30] = half(0)/full(1)
6020 instr [29,24] = 10 1110
6021 instr [23,22] = size: byte(00, half(01), word (10)
6024 instr [15,10] = 10 0101
6026 instr [4, 0] = Vd. */
6028 unsigned full = INSTR (30, 30);
6029 unsigned vm = INSTR (20, 16);
6030 unsigned vn = INSTR (9, 5);
6031 unsigned vd = INSTR (4, 0);
6034 NYI_assert (29, 24, 0x2E);
6035 NYI_assert (21, 21, 1);
6036 NYI_assert (15, 10, 0x25);
6038 switch (INSTR (23, 22))
6041 for (i = 0; i < (full ? 16 : 8); i++)
6042 aarch64_set_vec_u8 (cpu, vd, i,
6043 (aarch64_get_vec_u8 (cpu, vn, i)
6044 * aarch64_get_vec_u8 (cpu, vm, i))
6045 - aarch64_get_vec_u8 (cpu, vd, i));
6049 for (i = 0; i < (full ? 8 : 4); i++)
6050 aarch64_set_vec_u16 (cpu, vd, i,
6051 (aarch64_get_vec_u16 (cpu, vn, i)
6052 * aarch64_get_vec_u16 (cpu, vm, i))
6053 - aarch64_get_vec_u16 (cpu, vd, i));
6057 for (i = 0; i < (full ? 4 : 2); i++)
6058 aarch64_set_vec_u32 (cpu, vd, i,
6059 (aarch64_get_vec_u32 (cpu, vn, i)
6060 * aarch64_get_vec_u32 (cpu, vm, i))
6061 - aarch64_get_vec_u32 (cpu, vd, i));
6070 do_vec_FDIV (sim_cpu *cpu)
6073 instr [30] = half(0)/full(1)
6074 instr [29,23] = 10 1110 0
6075 instr [22] = float()/double(1)
6078 instr [15,10] = 1111 11
6080 instr [4, 0] = Vd. */
6082 unsigned full = INSTR (30, 30);
6083 unsigned vm = INSTR (20, 16);
6084 unsigned vn = INSTR (9, 5);
6085 unsigned vd = INSTR (4, 0);
6088 NYI_assert (29, 23, 0x5C);
6089 NYI_assert (21, 21, 1);
6090 NYI_assert (15, 10, 0x3F);
6097 for (i = 0; i < 2; i++)
6098 aarch64_set_vec_double (cpu, vd, i,
6099 aarch64_get_vec_double (cpu, vn, i)
6100 / aarch64_get_vec_double (cpu, vm, i));
6103 for (i = 0; i < (full ? 4 : 2); i++)
6104 aarch64_set_vec_float (cpu, vd, i,
6105 aarch64_get_vec_float (cpu, vn, i)
6106 / aarch64_get_vec_float (cpu, vm, i));
6110 do_vec_FMUL (sim_cpu *cpu)
6113 instr [30] = half(0)/full(1)
6114 instr [29,23] = 10 1110 0
6115 instr [22] = float(0)/double(1)
6118 instr [15,10] = 1101 11
6120 instr [4, 0] = Vd. */
6122 unsigned full = INSTR (30, 30);
6123 unsigned vm = INSTR (20, 16);
6124 unsigned vn = INSTR (9, 5);
6125 unsigned vd = INSTR (4, 0);
6128 NYI_assert (29, 23, 0x5C);
6129 NYI_assert (21, 21, 1);
6130 NYI_assert (15, 10, 0x37);
6137 for (i = 0; i < 2; i++)
6138 aarch64_set_vec_double (cpu, vd, i,
6139 aarch64_get_vec_double (cpu, vn, i)
6140 * aarch64_get_vec_double (cpu, vm, i));
6143 for (i = 0; i < (full ? 4 : 2); i++)
6144 aarch64_set_vec_float (cpu, vd, i,
6145 aarch64_get_vec_float (cpu, vn, i)
6146 * aarch64_get_vec_float (cpu, vm, i));
6150 do_vec_FADDP (sim_cpu *cpu)
6153 instr [30] = half(0)/full(1)
6154 instr [29,23] = 10 1110 0
6155 instr [22] = float(0)/double(1)
6158 instr [15,10] = 1101 01
6160 instr [4, 0] = Vd. */
6162 unsigned full = INSTR (30, 30);
6163 unsigned vm = INSTR (20, 16);
6164 unsigned vn = INSTR (9, 5);
6165 unsigned vd = INSTR (4, 0);
6167 NYI_assert (29, 23, 0x5C);
6168 NYI_assert (21, 21, 1);
6169 NYI_assert (15, 10, 0x35);
6173 /* Extract values before adding them incase vd == vn/vm. */
6174 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6175 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6176 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6177 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6182 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6183 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6187 /* Extract values before adding them incase vd == vn/vm. */
6188 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6189 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6190 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6191 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6195 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6196 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6197 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6198 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6200 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6201 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6202 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6203 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6207 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6208 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6214 do_vec_FSQRT (sim_cpu *cpu)
6217 instr[30] = half(0)/full(1)
6218 instr[29,23] = 10 1110 1
6219 instr[22] = single(0)/double(1)
6220 instr[21,10] = 10 0001 1111 10
6222 instr[4,0] = Vdest. */
6224 unsigned vn = INSTR (9, 5);
6225 unsigned vd = INSTR (4, 0);
6226 unsigned full = INSTR (30, 30);
6229 NYI_assert (29, 23, 0x5D);
6230 NYI_assert (21, 10, 0x87E);
6237 for (i = 0; i < 2; i++)
6238 aarch64_set_vec_double (cpu, vd, i,
6239 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6243 for (i = 0; i < (full ? 4 : 2); i++)
6244 aarch64_set_vec_float (cpu, vd, i,
6245 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6250 do_vec_FNEG (sim_cpu *cpu)
6253 instr[30] = half (0)/full (1)
6254 instr[29,23] = 10 1110 1
6255 instr[22] = single (0)/double (1)
6256 instr[21,10] = 10 0000 1111 10
6258 instr[4,0] = Vdest. */
6260 unsigned vn = INSTR (9, 5);
6261 unsigned vd = INSTR (4, 0);
6262 unsigned full = INSTR (30, 30);
6265 NYI_assert (29, 23, 0x5D);
6266 NYI_assert (21, 10, 0x83E);
6273 for (i = 0; i < 2; i++)
6274 aarch64_set_vec_double (cpu, vd, i,
6275 - aarch64_get_vec_double (cpu, vn, i));
6279 for (i = 0; i < (full ? 4 : 2); i++)
6280 aarch64_set_vec_float (cpu, vd, i,
6281 - aarch64_get_vec_float (cpu, vn, i));
6286 do_vec_NOT (sim_cpu *cpu)
6289 instr[30] = half (0)/full (1)
6290 instr[29,10] = 10 1110 0010 0000 0101 10
6294 unsigned vn = INSTR (9, 5);
6295 unsigned vd = INSTR (4, 0);
6297 int full = INSTR (30, 30);
6299 NYI_assert (29, 10, 0xB8816);
6301 for (i = 0; i < (full ? 16 : 8); i++)
6302 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6306 clz (uint64_t val, unsigned size)
6311 mask <<= (size - 1);
6326 do_vec_CLZ (sim_cpu *cpu)
6329 instr[30] = half (0)/full (1)
6330 instr[29,24] = 10 1110
6332 instr[21,10] = 10 0000 0100 10
6336 unsigned vn = INSTR (9, 5);
6337 unsigned vd = INSTR (4, 0);
6339 int full = INSTR (30,30);
6341 NYI_assert (29, 24, 0x2E);
6342 NYI_assert (21, 10, 0x812);
6344 switch (INSTR (23, 22))
6347 for (i = 0; i < (full ? 16 : 8); i++)
6348 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6351 for (i = 0; i < (full ? 8 : 4); i++)
6352 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6355 for (i = 0; i < (full ? 4 : 2); i++)
6356 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6361 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6362 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6368 do_vec_MOV_element (sim_cpu *cpu)
6370 /* instr[31,21] = 0110 1110 000
6371 instr[20,16] = size & dest index
6373 instr[14,11] = source index
6378 unsigned vs = INSTR (9, 5);
6379 unsigned vd = INSTR (4, 0);
6383 NYI_assert (31, 21, 0x370);
6384 NYI_assert (15, 15, 0);
6385 NYI_assert (10, 10, 1);
6390 src_index = INSTR (14, 11);
6391 dst_index = INSTR (20, 17);
6392 aarch64_set_vec_u8 (cpu, vd, dst_index,
6393 aarch64_get_vec_u8 (cpu, vs, src_index));
6395 else if (INSTR (17, 17))
6398 NYI_assert (11, 11, 0);
6399 src_index = INSTR (14, 12);
6400 dst_index = INSTR (20, 18);
6401 aarch64_set_vec_u16 (cpu, vd, dst_index,
6402 aarch64_get_vec_u16 (cpu, vs, src_index));
6404 else if (INSTR (18, 18))
6407 NYI_assert (12, 11, 0);
6408 src_index = INSTR (14, 13);
6409 dst_index = INSTR (20, 19);
6410 aarch64_set_vec_u32 (cpu, vd, dst_index,
6411 aarch64_get_vec_u32 (cpu, vs, src_index));
6415 NYI_assert (19, 19, 1);
6416 NYI_assert (13, 11, 0);
6417 src_index = INSTR (14, 14);
6418 dst_index = INSTR (20, 20);
6419 aarch64_set_vec_u64 (cpu, vd, dst_index,
6420 aarch64_get_vec_u64 (cpu, vs, src_index));
6425 dexAdvSIMD0 (sim_cpu *cpu)
6427 /* instr [28,25] = 0 111. */
6428 if ( INSTR (15, 10) == 0x07
6432 if (INSTR (31, 21) == 0x075
6433 || INSTR (31, 21) == 0x275)
6435 do_vec_MOV_whole_vector (cpu);
6440 if (INSTR (29, 19) == 0x1E0)
6442 do_vec_MOV_immediate (cpu);
6446 if (INSTR (29, 19) == 0x5E0)
6452 if (INSTR (29, 19) == 0x1C0
6453 || INSTR (29, 19) == 0x1C1)
6455 if (INSTR (15, 10) == 0x03)
6457 do_vec_DUP_scalar_into_vector (cpu);
6462 switch (INSTR (29, 24))
6464 case 0x0E: do_vec_op1 (cpu); return;
6465 case 0x0F: do_vec_op2 (cpu); return;
6468 switch (INSTR (15, 10))
6470 case 0x01: do_vec_SSHR_USHR (cpu); return;
6472 case 0x12: do_vec_mls_indexed (cpu); return;
6473 case 0x29: do_vec_xtl (cpu); return;
6479 if (INSTR (21, 21) == 1)
6481 switch (INSTR (15, 10))
6484 switch (INSTR (23, 22))
6486 case 0: do_vec_EOR (cpu); return;
6487 case 1: do_vec_BSL (cpu); return;
6489 case 3: do_vec_bit (cpu); return;
6493 case 0x08: do_vec_sub_long (cpu); return;
6494 case 0x11: do_vec_USHL (cpu); return;
6495 case 0x12: do_vec_CLZ (cpu); return;
6496 case 0x16: do_vec_NOT (cpu); return;
6497 case 0x19: do_vec_max (cpu); return;
6498 case 0x1B: do_vec_min (cpu); return;
6499 case 0x21: do_vec_SUB (cpu); return;
6500 case 0x25: do_vec_MLS (cpu); return;
6501 case 0x31: do_vec_FminmaxNMP (cpu); return;
6502 case 0x35: do_vec_FADDP (cpu); return;
6503 case 0x37: do_vec_FMUL (cpu); return;
6504 case 0x3F: do_vec_FDIV (cpu); return;
6507 switch (INSTR (20, 16))
6509 case 0x00: do_vec_FNEG (cpu); return;
6510 case 0x01: do_vec_FSQRT (cpu); return;
6524 do_vec_compare (cpu); return;
6531 if (INSTR (31, 21) == 0x370)
6533 do_vec_MOV_element (cpu);
6537 switch (INSTR (21, 10))
6539 case 0x82E: do_vec_neg (cpu); return;
6540 case 0x87E: do_vec_sqrt (cpu); return;
6542 if (INSTR (15, 10) == 0x30)
6560 /* Float multiply add. */
6562 fmadds (sim_cpu *cpu)
6564 unsigned sa = INSTR (14, 10);
6565 unsigned sm = INSTR (20, 16);
6566 unsigned sn = INSTR ( 9, 5);
6567 unsigned sd = INSTR ( 4, 0);
6569 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6570 + aarch64_get_FP_float (cpu, sn)
6571 * aarch64_get_FP_float (cpu, sm));
6574 /* Double multiply add. */
6576 fmaddd (sim_cpu *cpu)
6578 unsigned sa = INSTR (14, 10);
6579 unsigned sm = INSTR (20, 16);
6580 unsigned sn = INSTR ( 9, 5);
6581 unsigned sd = INSTR ( 4, 0);
6583 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6584 + aarch64_get_FP_double (cpu, sn)
6585 * aarch64_get_FP_double (cpu, sm));
6588 /* Float multiply subtract. */
6590 fmsubs (sim_cpu *cpu)
6592 unsigned sa = INSTR (14, 10);
6593 unsigned sm = INSTR (20, 16);
6594 unsigned sn = INSTR ( 9, 5);
6595 unsigned sd = INSTR ( 4, 0);
6597 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6598 - aarch64_get_FP_float (cpu, sn)
6599 * aarch64_get_FP_float (cpu, sm));
6602 /* Double multiply subtract. */
6604 fmsubd (sim_cpu *cpu)
6606 unsigned sa = INSTR (14, 10);
6607 unsigned sm = INSTR (20, 16);
6608 unsigned sn = INSTR ( 9, 5);
6609 unsigned sd = INSTR ( 4, 0);
6611 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6612 - aarch64_get_FP_double (cpu, sn)
6613 * aarch64_get_FP_double (cpu, sm));
6616 /* Float negative multiply add. */
6618 fnmadds (sim_cpu *cpu)
6620 unsigned sa = INSTR (14, 10);
6621 unsigned sm = INSTR (20, 16);
6622 unsigned sn = INSTR ( 9, 5);
6623 unsigned sd = INSTR ( 4, 0);
6625 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6626 + (- aarch64_get_FP_float (cpu, sn))
6627 * aarch64_get_FP_float (cpu, sm));
6630 /* Double negative multiply add. */
6632 fnmaddd (sim_cpu *cpu)
6634 unsigned sa = INSTR (14, 10);
6635 unsigned sm = INSTR (20, 16);
6636 unsigned sn = INSTR ( 9, 5);
6637 unsigned sd = INSTR ( 4, 0);
6639 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6640 + (- aarch64_get_FP_double (cpu, sn))
6641 * aarch64_get_FP_double (cpu, sm));
6644 /* Float negative multiply subtract. */
6646 fnmsubs (sim_cpu *cpu)
6648 unsigned sa = INSTR (14, 10);
6649 unsigned sm = INSTR (20, 16);
6650 unsigned sn = INSTR ( 9, 5);
6651 unsigned sd = INSTR ( 4, 0);
6653 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6654 + aarch64_get_FP_float (cpu, sn)
6655 * aarch64_get_FP_float (cpu, sm));
6658 /* Double negative multiply subtract. */
6660 fnmsubd (sim_cpu *cpu)
6662 unsigned sa = INSTR (14, 10);
6663 unsigned sm = INSTR (20, 16);
6664 unsigned sn = INSTR ( 9, 5);
6665 unsigned sd = INSTR ( 4, 0);
6667 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6668 + aarch64_get_FP_double (cpu, sn)
6669 * aarch64_get_FP_double (cpu, sm));
6673 dexSimpleFPDataProc3Source (sim_cpu *cpu)
6675 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
6677 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
6680 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6681 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
6682 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
6684 uint32_t M_S = (INSTR (31, 31) << 1)
6686 /* dispatch on combined type:o1:o2. */
6687 uint32_t dispatch = (INSTR (23, 21) << 1)
6695 case 0: fmadds (cpu); return;
6696 case 1: fmsubs (cpu); return;
6697 case 2: fnmadds (cpu); return;
6698 case 3: fnmsubs (cpu); return;
6699 case 4: fmaddd (cpu); return;
6700 case 5: fmsubd (cpu); return;
6701 case 6: fnmaddd (cpu); return;
6702 case 7: fnmsubd (cpu); return;
6704 /* type > 1 is currently unallocated. */
6710 dexSimpleFPFixedConvert (sim_cpu *cpu)
6716 dexSimpleFPCondCompare (sim_cpu *cpu)
6718 /* instr [31,23] = 0001 1110 0
6722 instr [15,12] = condition
6726 instr [3,0] = nzcv */
6728 unsigned rm = INSTR (20, 16);
6729 unsigned rn = INSTR (9, 5);
6731 NYI_assert (31, 23, 0x3C);
6732 NYI_assert (11, 10, 0x1);
6733 NYI_assert (4, 4, 0);
6735 if (! testConditionCode (cpu, INSTR (15, 12)))
6737 aarch64_set_CPSR (cpu, INSTR (3, 0));
6743 /* Double precision. */
6744 double val1 = aarch64_get_vec_double (cpu, rn, 0);
6745 double val2 = aarch64_get_vec_double (cpu, rm, 0);
6747 /* FIXME: Check for NaNs. */
6749 aarch64_set_CPSR (cpu, (Z | C));
6750 else if (val1 < val2)
6751 aarch64_set_CPSR (cpu, N);
6752 else /* val1 > val2 */
6753 aarch64_set_CPSR (cpu, C);
6757 /* Single precision. */
6758 float val1 = aarch64_get_vec_float (cpu, rn, 0);
6759 float val2 = aarch64_get_vec_float (cpu, rm, 0);
6761 /* FIXME: Check for NaNs. */
6763 aarch64_set_CPSR (cpu, (Z | C));
6764 else if (val1 < val2)
6765 aarch64_set_CPSR (cpu, N);
6766 else /* val1 > val2 */
6767 aarch64_set_CPSR (cpu, C);
6775 fadds (sim_cpu *cpu)
6777 unsigned sm = INSTR (20, 16);
6778 unsigned sn = INSTR ( 9, 5);
6779 unsigned sd = INSTR ( 4, 0);
6781 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6782 + aarch64_get_FP_float (cpu, sm));
6787 faddd (sim_cpu *cpu)
6789 unsigned sm = INSTR (20, 16);
6790 unsigned sn = INSTR ( 9, 5);
6791 unsigned sd = INSTR ( 4, 0);
6793 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6794 + aarch64_get_FP_double (cpu, sm));
6799 fdivs (sim_cpu *cpu)
6801 unsigned sm = INSTR (20, 16);
6802 unsigned sn = INSTR ( 9, 5);
6803 unsigned sd = INSTR ( 4, 0);
6805 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6806 / aarch64_get_FP_float (cpu, sm));
6809 /* Double divide. */
6811 fdivd (sim_cpu *cpu)
6813 unsigned sm = INSTR (20, 16);
6814 unsigned sn = INSTR ( 9, 5);
6815 unsigned sd = INSTR ( 4, 0);
6817 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6818 / aarch64_get_FP_double (cpu, sm));
6821 /* Float multiply. */
6823 fmuls (sim_cpu *cpu)
6825 unsigned sm = INSTR (20, 16);
6826 unsigned sn = INSTR ( 9, 5);
6827 unsigned sd = INSTR ( 4, 0);
6829 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6830 * aarch64_get_FP_float (cpu, sm));
6833 /* Double multiply. */
6835 fmuld (sim_cpu *cpu)
6837 unsigned sm = INSTR (20, 16);
6838 unsigned sn = INSTR ( 9, 5);
6839 unsigned sd = INSTR ( 4, 0);
6841 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6842 * aarch64_get_FP_double (cpu, sm));
6845 /* Float negate and multiply. */
6847 fnmuls (sim_cpu *cpu)
6849 unsigned sm = INSTR (20, 16);
6850 unsigned sn = INSTR ( 9, 5);
6851 unsigned sd = INSTR ( 4, 0);
6853 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
6854 * aarch64_get_FP_float (cpu, sm)));
6857 /* Double negate and multiply. */
6859 fnmuld (sim_cpu *cpu)
6861 unsigned sm = INSTR (20, 16);
6862 unsigned sn = INSTR ( 9, 5);
6863 unsigned sd = INSTR ( 4, 0);
6865 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
6866 * aarch64_get_FP_double (cpu, sm)));
6869 /* Float subtract. */
6871 fsubs (sim_cpu *cpu)
6873 unsigned sm = INSTR (20, 16);
6874 unsigned sn = INSTR ( 9, 5);
6875 unsigned sd = INSTR ( 4, 0);
6877 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6878 - aarch64_get_FP_float (cpu, sm));
6881 /* Double subtract. */
6883 fsubd (sim_cpu *cpu)
6885 unsigned sm = INSTR (20, 16);
6886 unsigned sn = INSTR ( 9, 5);
6887 unsigned sd = INSTR ( 4, 0);
6889 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6890 - aarch64_get_FP_double (cpu, sm));
6894 do_FMINNM (sim_cpu *cpu)
6896 /* instr[31,23] = 0 0011 1100
6897 instr[22] = float(0)/double(1)
6900 instr[15,10] = 01 1110
6904 unsigned sm = INSTR (20, 16);
6905 unsigned sn = INSTR ( 9, 5);
6906 unsigned sd = INSTR ( 4, 0);
6908 NYI_assert (31, 23, 0x03C);
6909 NYI_assert (15, 10, 0x1E);
6912 aarch64_set_FP_double (cpu, sd,
6913 dminnm (aarch64_get_FP_double (cpu, sn),
6914 aarch64_get_FP_double (cpu, sm)));
6916 aarch64_set_FP_float (cpu, sd,
6917 fminnm (aarch64_get_FP_float (cpu, sn),
6918 aarch64_get_FP_float (cpu, sm)));
6922 do_FMAXNM (sim_cpu *cpu)
6924 /* instr[31,23] = 0 0011 1100
6925 instr[22] = float(0)/double(1)
6928 instr[15,10] = 01 1010
6932 unsigned sm = INSTR (20, 16);
6933 unsigned sn = INSTR ( 9, 5);
6934 unsigned sd = INSTR ( 4, 0);
6936 NYI_assert (31, 23, 0x03C);
6937 NYI_assert (15, 10, 0x1A);
6940 aarch64_set_FP_double (cpu, sd,
6941 dmaxnm (aarch64_get_FP_double (cpu, sn),
6942 aarch64_get_FP_double (cpu, sm)));
6944 aarch64_set_FP_float (cpu, sd,
6945 fmaxnm (aarch64_get_FP_float (cpu, sn),
6946 aarch64_get_FP_float (cpu, sm)));
6950 dexSimpleFPDataProc2Source (sim_cpu *cpu)
6952 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
6954 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
6957 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6960 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
6961 0010 ==> FADD, 0011 ==> FSUB,
6962 0100 ==> FMAX, 0101 ==> FMIN
6963 0110 ==> FMAXNM, 0111 ==> FMINNM
6964 1000 ==> FNMUL, ow ==> UNALLOC
6969 uint32_t M_S = (INSTR (31, 31) << 1)
6971 uint32_t type = INSTR (23, 22);
6972 /* Dispatch on opcode. */
6973 uint32_t dispatch = INSTR (15, 12);
6984 case 0: fmuld (cpu); return;
6985 case 1: fdivd (cpu); return;
6986 case 2: faddd (cpu); return;
6987 case 3: fsubd (cpu); return;
6988 case 6: do_FMAXNM (cpu); return;
6989 case 7: do_FMINNM (cpu); return;
6990 case 8: fnmuld (cpu); return;
6992 /* Have not yet implemented fmax and fmin. */
7000 else /* type == 0 => floats. */
7003 case 0: fmuls (cpu); return;
7004 case 1: fdivs (cpu); return;
7005 case 2: fadds (cpu); return;
7006 case 3: fsubs (cpu); return;
7007 case 6: do_FMAXNM (cpu); return;
7008 case 7: do_FMINNM (cpu); return;
7009 case 8: fnmuls (cpu); return;
7021 dexSimpleFPCondSelect (sim_cpu *cpu)
7024 instr[31,23] = 0 0011 1100
7025 instr[22] = 0=>single 1=>double
7032 unsigned sm = INSTR (20, 16);
7033 unsigned sn = INSTR ( 9, 5);
7034 unsigned sd = INSTR ( 4, 0);
7035 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7037 NYI_assert (31, 23, 0x03C);
7038 NYI_assert (11, 10, 0x3);
7041 aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7043 aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7046 /* Store 32 bit unscaled signed 9 bit. */
7048 fsturs (sim_cpu *cpu, int32_t offset)
7050 unsigned int rn = INSTR (9, 5);
7051 unsigned int st = INSTR (4, 0);
7053 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7054 aarch64_get_vec_u32 (cpu, rn, 0));
7057 /* Store 64 bit unscaled signed 9 bit. */
7059 fsturd (sim_cpu *cpu, int32_t offset)
7061 unsigned int rn = INSTR (9, 5);
7062 unsigned int st = INSTR (4, 0);
7064 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7065 aarch64_get_vec_u64 (cpu, rn, 0));
7068 /* Store 128 bit unscaled signed 9 bit. */
7070 fsturq (sim_cpu *cpu, int32_t offset)
7072 unsigned int rn = INSTR (9, 5);
7073 unsigned int st = INSTR (4, 0);
7076 aarch64_get_FP_long_double (cpu, rn, & a);
7077 aarch64_set_mem_long_double (cpu,
7078 aarch64_get_reg_u64 (cpu, st, 1)
7082 /* TODO FP move register. */
7084 /* 32 bit fp to fp move register. */
7086 ffmovs (sim_cpu *cpu)
7088 unsigned int rn = INSTR (9, 5);
7089 unsigned int st = INSTR (4, 0);
7091 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7094 /* 64 bit fp to fp move register. */
7096 ffmovd (sim_cpu *cpu)
7098 unsigned int rn = INSTR (9, 5);
7099 unsigned int st = INSTR (4, 0);
7101 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7104 /* 32 bit GReg to Vec move register. */
7106 fgmovs (sim_cpu *cpu)
7108 unsigned int rn = INSTR (9, 5);
7109 unsigned int st = INSTR (4, 0);
7111 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7114 /* 64 bit g to fp move register. */
7116 fgmovd (sim_cpu *cpu)
7118 unsigned int rn = INSTR (9, 5);
7119 unsigned int st = INSTR (4, 0);
7121 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7124 /* 32 bit fp to g move register. */
7126 gfmovs (sim_cpu *cpu)
7128 unsigned int rn = INSTR (9, 5);
7129 unsigned int st = INSTR (4, 0);
7131 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7134 /* 64 bit fp to g move register. */
7136 gfmovd (sim_cpu *cpu)
7138 unsigned int rn = INSTR (9, 5);
7139 unsigned int st = INSTR (4, 0);
7141 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7144 /* FP move immediate
7146 These install an immediate 8 bit value in the target register
7147 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7151 fmovs (sim_cpu *cpu)
7153 unsigned int sd = INSTR (4, 0);
7154 uint32_t imm = INSTR (20, 13);
7155 float f = fp_immediate_for_encoding_32 (imm);
7157 aarch64_set_FP_float (cpu, sd, f);
7161 fmovd (sim_cpu *cpu)
7163 unsigned int sd = INSTR (4, 0);
7164 uint32_t imm = INSTR (20, 13);
7165 double d = fp_immediate_for_encoding_64 (imm);
7167 aarch64_set_FP_double (cpu, sd, d);
7171 dexSimpleFPImmediate (sim_cpu *cpu)
7173 /* instr[31,23] == 00111100
7174 instr[22] == type : single(0)/double(1)
7176 instr[20,13] == imm8
7178 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7180 uint32_t imm5 = INSTR (9, 5);
7182 NYI_assert (31, 23, 0x3C);
7193 /* TODO specific decode and execute for group Load Store. */
7195 /* TODO FP load/store single register (unscaled offset). */
7197 /* TODO load 8 bit unscaled signed 9 bit. */
7198 /* TODO load 16 bit unscaled signed 9 bit. */
7200 /* Load 32 bit unscaled signed 9 bit. */
7202 fldurs (sim_cpu *cpu, int32_t offset)
7204 unsigned int rn = INSTR (9, 5);
7205 unsigned int st = INSTR (4, 0);
7207 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7208 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7211 /* Load 64 bit unscaled signed 9 bit. */
7213 fldurd (sim_cpu *cpu, int32_t offset)
7215 unsigned int rn = INSTR (9, 5);
7216 unsigned int st = INSTR (4, 0);
7218 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7219 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7222 /* Load 128 bit unscaled signed 9 bit. */
7224 fldurq (sim_cpu *cpu, int32_t offset)
7226 unsigned int rn = INSTR (9, 5);
7227 unsigned int st = INSTR (4, 0);
7229 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7231 aarch64_get_mem_long_double (cpu, addr, & a);
7232 aarch64_set_FP_long_double (cpu, st, a);
7235 /* TODO store 8 bit unscaled signed 9 bit. */
7236 /* TODO store 16 bit unscaled signed 9 bit. */
7241 /* Float absolute value. */
7243 fabss (sim_cpu *cpu)
7245 unsigned sn = INSTR (9, 5);
7246 unsigned sd = INSTR (4, 0);
7247 float value = aarch64_get_FP_float (cpu, sn);
7249 aarch64_set_FP_float (cpu, sd, fabsf (value));
7252 /* Double absolute value. */
7254 fabcpu (sim_cpu *cpu)
7256 unsigned sn = INSTR (9, 5);
7257 unsigned sd = INSTR (4, 0);
7258 double value = aarch64_get_FP_double (cpu, sn);
7260 aarch64_set_FP_double (cpu, sd, fabs (value));
7263 /* Float negative value. */
7265 fnegs (sim_cpu *cpu)
7267 unsigned sn = INSTR (9, 5);
7268 unsigned sd = INSTR (4, 0);
7270 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7273 /* Double negative value. */
7275 fnegd (sim_cpu *cpu)
7277 unsigned sn = INSTR (9, 5);
7278 unsigned sd = INSTR (4, 0);
7280 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7283 /* Float square root. */
7285 fsqrts (sim_cpu *cpu)
7287 unsigned sn = INSTR (9, 5);
7288 unsigned sd = INSTR (4, 0);
7290 aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn)));
7293 /* Double square root. */
7295 fsqrtd (sim_cpu *cpu)
7297 unsigned sn = INSTR (9, 5);
7298 unsigned sd = INSTR (4, 0);
7300 aarch64_set_FP_double (cpu, sd,
7301 sqrt (aarch64_get_FP_double (cpu, sn)));
7304 /* Convert double to float. */
7306 fcvtds (sim_cpu *cpu)
7308 unsigned sn = INSTR (9, 5);
7309 unsigned sd = INSTR (4, 0);
7311 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7314 /* Convert float to double. */
7316 fcvtcpu (sim_cpu *cpu)
7318 unsigned sn = INSTR (9, 5);
7319 unsigned sd = INSTR (4, 0);
7321 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7325 do_FRINT (sim_cpu *cpu)
7327 /* instr[31,23] = 0001 1110 0
7328 instr[22] = single(0)/double(1)
7330 instr[17,15] = rounding mode
7331 instr[14,10] = 10000
7333 instr[4,0] = dest */
7336 unsigned rs = INSTR (9, 5);
7337 unsigned rd = INSTR (4, 0);
7338 unsigned int rmode = INSTR (17, 15);
7340 NYI_assert (31, 23, 0x03C);
7341 NYI_assert (21, 18, 0x9);
7342 NYI_assert (14, 10, 0x10);
7344 if (rmode == 6 || rmode == 7)
7345 /* FIXME: Add support for rmode == 6 exactness check. */
7346 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7350 double val = aarch64_get_FP_double (cpu, rs);
7354 case 0: /* mode N: nearest or even. */
7356 double rval = round (val);
7358 if (val - rval == 0.5)
7360 if (((rval / 2.0) * 2.0) != rval)
7364 aarch64_set_FP_double (cpu, rd, round (val));
7368 case 1: /* mode P: towards +inf. */
7370 aarch64_set_FP_double (cpu, rd, trunc (val));
7372 aarch64_set_FP_double (cpu, rd, round (val));
7375 case 2: /* mode M: towards -inf. */
7377 aarch64_set_FP_double (cpu, rd, round (val));
7379 aarch64_set_FP_double (cpu, rd, trunc (val));
7382 case 3: /* mode Z: towards 0. */
7383 aarch64_set_FP_double (cpu, rd, trunc (val));
7386 case 4: /* mode A: away from 0. */
7387 aarch64_set_FP_double (cpu, rd, round (val));
7390 case 6: /* mode X: use FPCR with exactness check. */
7391 case 7: /* mode I: use FPCR mode. */
7399 val = aarch64_get_FP_float (cpu, rs);
7403 case 0: /* mode N: nearest or even. */
7405 float rval = roundf (val);
7407 if (val - rval == 0.5)
7409 if (((rval / 2.0) * 2.0) != rval)
7413 aarch64_set_FP_float (cpu, rd, rval);
7417 case 1: /* mode P: towards +inf. */
7419 aarch64_set_FP_float (cpu, rd, truncf (val));
7421 aarch64_set_FP_float (cpu, rd, roundf (val));
7424 case 2: /* mode M: towards -inf. */
7426 aarch64_set_FP_float (cpu, rd, truncf (val));
7428 aarch64_set_FP_float (cpu, rd, roundf (val));
7431 case 3: /* mode Z: towards 0. */
7432 aarch64_set_FP_float (cpu, rd, truncf (val));
7435 case 4: /* mode A: away from 0. */
7436 aarch64_set_FP_float (cpu, rd, roundf (val));
7439 case 6: /* mode X: use FPCR with exactness check. */
7440 case 7: /* mode I: use FPCR mode. */
7448 /* Convert half to float. */
7450 do_FCVT_half_to_single (sim_cpu *cpu)
7452 unsigned rn = INSTR (9, 5);
7453 unsigned rd = INSTR (4, 0);
7455 NYI_assert (31, 10, 0x7B890);
7457 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7460 /* Convert half to float. */
7462 do_FCVT_half_to_double (sim_cpu *cpu)
7464 unsigned rn = INSTR (9, 5);
7465 unsigned rd = INSTR (4, 0);
7467 NYI_assert (31, 10, 0x7B8B0);
7469 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7473 do_FCVT_single_to_half (sim_cpu *cpu)
7475 unsigned rn = INSTR (9, 5);
7476 unsigned rd = INSTR (4, 0);
7478 NYI_assert (31, 10, 0x788F0);
7480 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7483 /* Convert half to float. */
7485 do_FCVT_double_to_half (sim_cpu *cpu)
7487 unsigned rn = INSTR (9, 5);
7488 unsigned rd = INSTR (4, 0);
7490 NYI_assert (31, 10, 0x798F0);
7492 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7496 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7498 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7500 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7503 instr[23,22] ==> type : 00 ==> source is single,
7504 01 ==> source is double
7506 11 ==> UNALLOC or source is half
7508 instr[20,15] ==> opcode : with type 00 or 01
7509 000000 ==> FMOV, 000001 ==> FABS,
7510 000010 ==> FNEG, 000011 ==> FSQRT,
7511 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7512 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7513 001000 ==> FRINTN, 001001 ==> FRINTP,
7514 001010 ==> FRINTM, 001011 ==> FRINTZ,
7515 001100 ==> FRINTA, 001101 ==> UNALLOC
7516 001110 ==> FRINTX, 001111 ==> FRINTI
7518 000100 ==> FCVT (half-to-single)
7519 000101 ==> FCVT (half-to-double)
7520 instr[14,10] = 10000. */
7522 uint32_t M_S = (INSTR (31, 31) << 1)
7524 uint32_t type = INSTR (23, 22);
7525 uint32_t opcode = INSTR (20, 15);
7533 do_FCVT_half_to_single (cpu);
7534 else if (opcode == 5)
7535 do_FCVT_half_to_double (cpu);
7587 case 8: /* FRINTN etc. */
7599 do_FCVT_double_to_half (cpu);
7601 do_FCVT_single_to_half (cpu);
7612 /* 32 bit signed int to float. */
7614 scvtf32 (sim_cpu *cpu)
7616 unsigned rn = INSTR (9, 5);
7617 unsigned sd = INSTR (4, 0);
7619 aarch64_set_FP_float
7620 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7623 /* signed int to float. */
7625 scvtf (sim_cpu *cpu)
7627 unsigned rn = INSTR (9, 5);
7628 unsigned sd = INSTR (4, 0);
7630 aarch64_set_FP_float
7631 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7634 /* 32 bit signed int to double. */
7636 scvtd32 (sim_cpu *cpu)
7638 unsigned rn = INSTR (9, 5);
7639 unsigned sd = INSTR (4, 0);
7641 aarch64_set_FP_double
7642 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7645 /* signed int to double. */
7647 scvtd (sim_cpu *cpu)
7649 unsigned rn = INSTR (9, 5);
7650 unsigned sd = INSTR (4, 0);
7652 aarch64_set_FP_double
7653 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7656 static const float FLOAT_INT_MAX = (float) INT_MAX;
7657 static const float FLOAT_INT_MIN = (float) INT_MIN;
7658 static const double DOUBLE_INT_MAX = (double) INT_MAX;
7659 static const double DOUBLE_INT_MIN = (double) INT_MIN;
7660 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
7661 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
7662 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
7663 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
7665 /* Check for FP exception conditions:
7668 Out of Range raises IO and IX and saturates value
7669 Denormal raises ID and IX and sets to zero. */
7670 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
7673 switch (fpclassify (F)) \
7677 aarch64_set_FPSR (cpu, IO); \
7679 VALUE = ITYPE##_MAX; \
7681 VALUE = ITYPE##_MIN; \
7685 if (F >= FTYPE##_##ITYPE##_MAX) \
7687 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
7688 VALUE = ITYPE##_MAX; \
7690 else if (F <= FTYPE##_##ITYPE##_MIN) \
7692 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
7693 VALUE = ITYPE##_MIN; \
7697 case FP_SUBNORMAL: \
7698 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
7710 /* 32 bit convert float to signed int truncate towards zero. */
7712 fcvtszs32 (sim_cpu *cpu)
7714 unsigned sn = INSTR (9, 5);
7715 unsigned rd = INSTR (4, 0);
7716 /* TODO : check that this rounds toward zero. */
7717 float f = aarch64_get_FP_float (cpu, sn);
7718 int32_t value = (int32_t) f;
7720 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7722 /* Avoid sign extension to 64 bit. */
7723 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7726 /* 64 bit convert float to signed int truncate towards zero. */
7728 fcvtszs (sim_cpu *cpu)
7730 unsigned sn = INSTR (9, 5);
7731 unsigned rd = INSTR (4, 0);
7732 float f = aarch64_get_FP_float (cpu, sn);
7733 int64_t value = (int64_t) f;
7735 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7737 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7740 /* 32 bit convert double to signed int truncate towards zero. */
7742 fcvtszd32 (sim_cpu *cpu)
7744 unsigned sn = INSTR (9, 5);
7745 unsigned rd = INSTR (4, 0);
7746 /* TODO : check that this rounds toward zero. */
7747 double d = aarch64_get_FP_double (cpu, sn);
7748 int32_t value = (int32_t) d;
7750 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7752 /* Avoid sign extension to 64 bit. */
7753 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7756 /* 64 bit convert double to signed int truncate towards zero. */
7758 fcvtszd (sim_cpu *cpu)
7760 unsigned sn = INSTR (9, 5);
7761 unsigned rd = INSTR (4, 0);
7762 /* TODO : check that this rounds toward zero. */
7763 double d = aarch64_get_FP_double (cpu, sn);
7766 value = (int64_t) d;
7768 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7770 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7774 do_fcvtzu (sim_cpu *cpu)
7776 /* instr[31] = size: 32-bit (0), 64-bit (1)
7777 instr[30,23] = 00111100
7778 instr[22] = type: single (0)/ double (1)
7779 instr[21] = enable (0)/disable(1) precision
7780 instr[20,16] = 11001
7781 instr[15,10] = precision
7785 unsigned rs = INSTR (9, 5);
7786 unsigned rd = INSTR (4, 0);
7788 NYI_assert (30, 23, 0x3C);
7789 NYI_assert (20, 16, 0x19);
7791 if (INSTR (21, 21) != 1)
7792 /* Convert to fixed point. */
7797 /* Convert to unsigned 64-bit integer. */
7800 double d = aarch64_get_FP_double (cpu, rs);
7801 uint64_t value = (uint64_t) d;
7803 /* Do not raise an exception if we have reached ULONG_MAX. */
7804 if (value != (1UL << 63))
7805 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7807 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7811 float f = aarch64_get_FP_float (cpu, rs);
7812 uint64_t value = (uint64_t) f;
7814 /* Do not raise an exception if we have reached ULONG_MAX. */
7815 if (value != (1UL << 63))
7816 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7818 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7825 /* Convert to unsigned 32-bit integer. */
7828 double d = aarch64_get_FP_double (cpu, rs);
7830 value = (uint32_t) d;
7831 /* Do not raise an exception if we have reached UINT_MAX. */
7832 if (value != (1UL << 31))
7833 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7837 float f = aarch64_get_FP_float (cpu, rs);
7839 value = (uint32_t) f;
7840 /* Do not raise an exception if we have reached UINT_MAX. */
7841 if (value != (1UL << 31))
7842 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7845 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7850 do_UCVTF (sim_cpu *cpu)
7852 /* instr[31] = size: 32-bit (0), 64-bit (1)
7853 instr[30,23] = 001 1110 0
7854 instr[22] = type: single (0)/ double (1)
7855 instr[21] = enable (0)/disable(1) precision
7856 instr[20,16] = 0 0011
7857 instr[15,10] = precision
7861 unsigned rs = INSTR (9, 5);
7862 unsigned rd = INSTR (4, 0);
7864 NYI_assert (30, 23, 0x3C);
7865 NYI_assert (20, 16, 0x03);
7867 if (INSTR (21, 21) != 1)
7870 /* FIXME: Add exception raising. */
7873 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
7876 aarch64_set_FP_double (cpu, rd, (double) value);
7878 aarch64_set_FP_float (cpu, rd, (float) value);
7882 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
7885 aarch64_set_FP_double (cpu, rd, (double) value);
7887 aarch64_set_FP_float (cpu, rd, (float) value);
7892 float_vector_move (sim_cpu *cpu)
7894 /* instr[31,17] == 100 1111 0101 0111
7895 instr[16] ==> direction 0=> to GR, 1=> from GR
7897 instr[9,5] ==> source
7898 instr[4,0] ==> dest. */
7900 unsigned rn = INSTR (9, 5);
7901 unsigned rd = INSTR (4, 0);
7903 NYI_assert (31, 17, 0x4F57);
7905 if (INSTR (15, 10) != 0)
7909 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7911 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
7915 dexSimpleFPIntegerConvert (sim_cpu *cpu)
7917 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
7919 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
7922 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
7924 instr[20,19] = rmode
7925 instr[18,16] = opcode
7926 instr[15,10] = 10 0000 */
7928 uint32_t rmode_opcode;
7934 if (INSTR (31, 17) == 0x4F57)
7936 float_vector_move (cpu);
7940 size = INSTR (31, 31);
7945 type = INSTR (23, 22);
7949 rmode_opcode = INSTR (20, 16);
7950 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
7952 switch (rmode_opcode)
7954 case 2: /* SCVTF. */
7957 case 0: scvtf32 (cpu); return;
7958 case 1: scvtd32 (cpu); return;
7959 case 2: scvtf (cpu); return;
7960 case 3: scvtd (cpu); return;
7963 case 6: /* FMOV GR, Vec. */
7966 case 0: gfmovs (cpu); return;
7967 case 3: gfmovd (cpu); return;
7968 default: HALT_UNALLOC;
7971 case 7: /* FMOV vec, GR. */
7974 case 0: fgmovs (cpu); return;
7975 case 3: fgmovd (cpu); return;
7976 default: HALT_UNALLOC;
7979 case 24: /* FCVTZS. */
7982 case 0: fcvtszs32 (cpu); return;
7983 case 1: fcvtszd32 (cpu); return;
7984 case 2: fcvtszs (cpu); return;
7985 case 3: fcvtszd (cpu); return;
7988 case 25: do_fcvtzu (cpu); return;
7989 case 3: do_UCVTF (cpu); return;
7991 case 0: /* FCVTNS. */
7992 case 1: /* FCVTNU. */
7993 case 4: /* FCVTAS. */
7994 case 5: /* FCVTAU. */
7995 case 8: /* FCVPTS. */
7996 case 9: /* FCVTPU. */
7997 case 16: /* FCVTMS. */
7998 case 17: /* FCVTMU. */
8005 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8009 if (isnan (fvalue1) || isnan (fvalue2))
8013 float result = fvalue1 - fvalue2;
8017 else if (result < 0)
8019 else /* (result > 0). */
8023 aarch64_set_CPSR (cpu, flags);
8027 fcmps (sim_cpu *cpu)
8029 unsigned sm = INSTR (20, 16);
8030 unsigned sn = INSTR ( 9, 5);
8032 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8033 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8035 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8038 /* Float compare to zero -- Invalid Operation exception
8039 only on signaling NaNs. */
8041 fcmpzs (sim_cpu *cpu)
8043 unsigned sn = INSTR ( 9, 5);
8044 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8046 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8049 /* Float compare -- Invalid Operation exception on all NaNs. */
8051 fcmpes (sim_cpu *cpu)
8053 unsigned sm = INSTR (20, 16);
8054 unsigned sn = INSTR ( 9, 5);
8056 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8057 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8059 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8062 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8064 fcmpzes (sim_cpu *cpu)
8066 unsigned sn = INSTR ( 9, 5);
8067 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8069 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8073 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8077 if (isnan (dval1) || isnan (dval2))
8081 double result = dval1 - dval2;
8085 else if (result < 0)
8087 else /* (result > 0). */
8091 aarch64_set_CPSR (cpu, flags);
8094 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8096 fcmpd (sim_cpu *cpu)
8098 unsigned sm = INSTR (20, 16);
8099 unsigned sn = INSTR ( 9, 5);
8101 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8102 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8104 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8107 /* Double compare to zero -- Invalid Operation exception
8108 only on signaling NaNs. */
8110 fcmpzd (sim_cpu *cpu)
8112 unsigned sn = INSTR ( 9, 5);
8113 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8115 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8118 /* Double compare -- Invalid Operation exception on all NaNs. */
8120 fcmped (sim_cpu *cpu)
8122 unsigned sm = INSTR (20, 16);
8123 unsigned sn = INSTR ( 9, 5);
8125 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8126 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8128 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8131 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8133 fcmpzed (sim_cpu *cpu)
8135 unsigned sn = INSTR ( 9, 5);
8136 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8138 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8142 dexSimpleFPCompare (sim_cpu *cpu)
8144 /* assert instr[28,25] == 1111
8145 instr[30:24:21:13,10] = 0011000
8146 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8147 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8148 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8149 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8150 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8151 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8154 uint32_t M_S = (INSTR (31, 31) << 1)
8156 uint32_t type = INSTR (23, 22);
8157 uint32_t op = INSTR (15, 14);
8158 uint32_t op2_2_0 = INSTR (2, 0);
8172 /* dispatch on type and top 2 bits of opcode. */
8173 dispatch = (type << 2) | INSTR (4, 3);
8177 case 0: fcmps (cpu); return;
8178 case 1: fcmpzs (cpu); return;
8179 case 2: fcmpes (cpu); return;
8180 case 3: fcmpzes (cpu); return;
8181 case 4: fcmpd (cpu); return;
8182 case 5: fcmpzd (cpu); return;
8183 case 6: fcmped (cpu); return;
8184 case 7: fcmpzed (cpu); return;
8189 do_scalar_FADDP (sim_cpu *cpu)
8191 /* instr [31,23] = 011111100
8192 instr [22] = single(0)/double(1)
8193 instr [21,10] = 1100 0011 0110
8195 instr [4,0] = Fd. */
8197 unsigned Fn = INSTR (9, 5);
8198 unsigned Fd = INSTR (4, 0);
8200 NYI_assert (31, 23, 0x0FC);
8201 NYI_assert (21, 10, 0xC36);
8205 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8206 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8208 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8212 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8213 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8215 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8219 /* Floating point absolute difference. */
8222 do_scalar_FABD (sim_cpu *cpu)
8224 /* instr [31,23] = 0111 1110 1
8225 instr [22] = float(0)/double(1)
8228 instr [15,10] = 1101 01
8230 instr [4, 0] = Rd. */
8232 unsigned rm = INSTR (20, 16);
8233 unsigned rn = INSTR (9, 5);
8234 unsigned rd = INSTR (4, 0);
8236 NYI_assert (31, 23, 0x0FD);
8237 NYI_assert (21, 21, 1);
8238 NYI_assert (15, 10, 0x35);
8241 aarch64_set_FP_double (cpu, rd,
8242 fabs (aarch64_get_FP_double (cpu, rn)
8243 - aarch64_get_FP_double (cpu, rm)));
8245 aarch64_set_FP_float (cpu, rd,
8246 fabsf (aarch64_get_FP_float (cpu, rn)
8247 - aarch64_get_FP_float (cpu, rm)));
8251 do_scalar_CMGT (sim_cpu *cpu)
8253 /* instr [31,21] = 0101 1110 111
8255 instr [15,10] = 00 1101
8257 instr [4, 0] = Rd. */
8259 unsigned rm = INSTR (20, 16);
8260 unsigned rn = INSTR (9, 5);
8261 unsigned rd = INSTR (4, 0);
8263 NYI_assert (31, 21, 0x2F7);
8264 NYI_assert (15, 10, 0x0D);
8266 aarch64_set_vec_u64 (cpu, rd, 0,
8267 aarch64_get_vec_u64 (cpu, rn, 0) >
8268 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8272 do_scalar_USHR (sim_cpu *cpu)
8274 /* instr [31,23] = 0111 1111 0
8275 instr [22,16] = shift amount
8276 instr [15,10] = 0000 01
8278 instr [4, 0] = Rd. */
8280 unsigned amount = 128 - INSTR (22, 16);
8281 unsigned rn = INSTR (9, 5);
8282 unsigned rd = INSTR (4, 0);
8284 NYI_assert (31, 23, 0x0FE);
8285 NYI_assert (15, 10, 0x01);
8287 aarch64_set_vec_u64 (cpu, rd, 0,
8288 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8292 do_scalar_SSHL (sim_cpu *cpu)
8294 /* instr [31,21] = 0101 1110 111
8296 instr [15,10] = 0100 01
8298 instr [4, 0] = Rd. */
8300 unsigned rm = INSTR (20, 16);
8301 unsigned rn = INSTR (9, 5);
8302 unsigned rd = INSTR (4, 0);
8303 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8305 NYI_assert (31, 21, 0x2F7);
8306 NYI_assert (15, 10, 0x11);
8309 aarch64_set_vec_s64 (cpu, rd, 0,
8310 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8312 aarch64_set_vec_s64 (cpu, rd, 0,
8313 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8317 do_scalar_shift (sim_cpu *cpu)
8319 /* instr [31,23] = 0101 1111 0
8320 instr [22,16] = shift amount
8321 instr [15,10] = 0101 01 [SHL]
8322 instr [15,10] = 0000 01 [SSHR]
8324 instr [4, 0] = Rd. */
8326 unsigned rn = INSTR (9, 5);
8327 unsigned rd = INSTR (4, 0);
8330 NYI_assert (31, 23, 0x0BE);
8332 if (INSTR (22, 22) == 0)
8335 switch (INSTR (15, 10))
8337 case 0x01: /* SSHR */
8338 amount = 128 - INSTR (22, 16);
8339 aarch64_set_vec_s64 (cpu, rd, 0,
8340 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8342 case 0x15: /* SHL */
8343 amount = INSTR (22, 16) - 64;
8344 aarch64_set_vec_u64 (cpu, rd, 0,
8345 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8352 /* FCMEQ FCMGT FCMGE. */
8354 do_scalar_FCM (sim_cpu *cpu)
8356 /* instr [31,30] = 01
8358 instr [28,24] = 1 1110
8363 instr [15,12] = 1110
8367 instr [4, 0] = Rd. */
8369 unsigned rm = INSTR (20, 16);
8370 unsigned rn = INSTR (9, 5);
8371 unsigned rd = INSTR (4, 0);
8372 unsigned EUac = (INSTR (23, 23) << 2)
8373 | (INSTR (29, 29) << 1)
8379 NYI_assert (31, 30, 1);
8380 NYI_assert (28, 24, 0x1E);
8381 NYI_assert (21, 21, 1);
8382 NYI_assert (15, 12, 0xE);
8383 NYI_assert (10, 10, 1);
8387 double val1 = aarch64_get_FP_double (cpu, rn);
8388 double val2 = aarch64_get_FP_double (cpu, rm);
8393 result = val1 == val2;
8401 result = val1 >= val2;
8409 result = val1 > val2;
8416 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8420 val1 = aarch64_get_FP_float (cpu, rn);
8421 val2 = aarch64_get_FP_float (cpu, rm);
8426 result = val1 == val2;
8430 val1 = fabsf (val1);
8431 val2 = fabsf (val2);
8434 result = val1 >= val2;
8438 val1 = fabsf (val1);
8439 val2 = fabsf (val2);
8442 result = val1 > val2;
8449 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8452 /* An alias of DUP. */
8454 do_scalar_MOV (sim_cpu *cpu)
8456 /* instr [31,21] = 0101 1110 000
8457 instr [20,16] = imm5
8458 instr [15,10] = 0000 01
8460 instr [4, 0] = Rd. */
8462 unsigned rn = INSTR (9, 5);
8463 unsigned rd = INSTR (4, 0);
8466 NYI_assert (31, 21, 0x2F0);
8467 NYI_assert (15, 10, 0x01);
8472 index = INSTR (20, 17);
8474 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8476 else if (INSTR (17, 17))
8479 index = INSTR (20, 18);
8481 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8483 else if (INSTR (18, 18))
8486 index = INSTR (20, 19);
8488 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8490 else if (INSTR (19, 19))
8493 index = INSTR (20, 20);
8495 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8502 do_scalar_NEG (sim_cpu *cpu)
8504 /* instr [31,10] = 0111 1110 1110 0000 1011 10
8506 instr [4, 0] = Rd. */
8508 unsigned rn = INSTR (9, 5);
8509 unsigned rd = INSTR (4, 0);
8511 NYI_assert (31, 10, 0x1FB82E);
8513 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
8517 do_scalar_USHL (sim_cpu *cpu)
8519 /* instr [31,21] = 0111 1110 111
8521 instr [15,10] = 0100 01
8523 instr [4, 0] = Rd. */
8525 unsigned rm = INSTR (20, 16);
8526 unsigned rn = INSTR (9, 5);
8527 unsigned rd = INSTR (4, 0);
8528 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8530 NYI_assert (31, 21, 0x3F7);
8531 NYI_assert (15, 10, 0x11);
8534 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
8536 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
8540 do_double_add (sim_cpu *cpu)
8542 /* instr [31,21] = 0101 1110 111
8544 instr [15,10] = 1000 01
8546 instr [4,0] = Fd. */
8553 NYI_assert (31, 21, 0x2F7);
8554 NYI_assert (15, 10, 0x21);
8558 Fn = INSTR (20, 16);
8560 val1 = aarch64_get_FP_double (cpu, Fm);
8561 val2 = aarch64_get_FP_double (cpu, Fn);
8563 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8567 do_scalar_vec (sim_cpu *cpu)
8569 /* instr [30] = 1. */
8570 /* instr [28,25] = 1111. */
8571 switch (INSTR (31, 23))
8574 switch (INSTR (15, 10))
8576 case 0x01: do_scalar_MOV (cpu); return;
8577 case 0x39: do_scalar_FCM (cpu); return;
8578 case 0x3B: do_scalar_FCM (cpu); return;
8582 case 0xBE: do_scalar_shift (cpu); return;
8585 switch (INSTR (15, 10))
8587 case 0x36: do_scalar_FADDP (cpu); return;
8588 case 0x39: do_scalar_FCM (cpu); return;
8589 case 0x3B: do_scalar_FCM (cpu); return;
8594 switch (INSTR (15, 10))
8596 case 0x0D: do_scalar_CMGT (cpu); return;
8597 case 0x11: do_scalar_USHL (cpu); return;
8598 case 0x2E: do_scalar_NEG (cpu); return;
8599 case 0x35: do_scalar_FABD (cpu); return;
8600 case 0x39: do_scalar_FCM (cpu); return;
8601 case 0x3B: do_scalar_FCM (cpu); return;
8606 case 0xFE: do_scalar_USHR (cpu); return;
8609 switch (INSTR (15, 10))
8611 case 0x21: do_double_add (cpu); return;
8612 case 0x11: do_scalar_SSHL (cpu); return;
8623 dexAdvSIMD1 (sim_cpu *cpu)
8625 /* instr [28,25] = 1 111. */
8627 /* We are currently only interested in the basic
8628 scalar fp routines which all have bit 30 = 0. */
8630 do_scalar_vec (cpu);
8632 /* instr[24] is set for FP data processing 3-source and clear for
8633 all other basic scalar fp instruction groups. */
8634 else if (INSTR (24, 24))
8635 dexSimpleFPDataProc3Source (cpu);
8637 /* instr[21] is clear for floating <-> fixed conversions and set for
8638 all other basic scalar fp instruction groups. */
8639 else if (!INSTR (21, 21))
8640 dexSimpleFPFixedConvert (cpu);
8642 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
8643 11 ==> cond select, 00 ==> other. */
8645 switch (INSTR (11, 10))
8647 case 1: dexSimpleFPCondCompare (cpu); return;
8648 case 2: dexSimpleFPDataProc2Source (cpu); return;
8649 case 3: dexSimpleFPCondSelect (cpu); return;
8652 /* Now an ordered cascade of tests.
8653 FP immediate has instr [12] == 1.
8654 FP compare has instr [13] == 1.
8655 FP Data Proc 1 Source has instr [14] == 1.
8656 FP floating <--> integer conversions has instr [15] == 0. */
8658 dexSimpleFPImmediate (cpu);
8660 else if (INSTR (13, 13))
8661 dexSimpleFPCompare (cpu);
8663 else if (INSTR (14, 14))
8664 dexSimpleFPDataProc1Source (cpu);
8666 else if (!INSTR (15, 15))
8667 dexSimpleFPIntegerConvert (cpu);
8670 /* If we get here then instr[15] == 1 which means UNALLOC. */
8675 /* PC relative addressing. */
8678 pcadr (sim_cpu *cpu)
8680 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
8681 instr[30,29] = immlo
8682 instr[23,5] = immhi. */
8684 unsigned rd = INSTR (4, 0);
8685 uint32_t isPage = INSTR (31, 31);
8686 union { int64_t u64; uint64_t s64; } imm;
8689 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
8691 offset = (offset << 2) | INSTR (30, 29);
8693 address = aarch64_get_PC (cpu);
8701 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
8704 /* Specific decode and execute for group Data Processing Immediate. */
8707 dexPCRelAddressing (sim_cpu *cpu)
8709 /* assert instr[28,24] = 10000. */
8713 /* Immediate logical.
8714 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
8715 16, 32 or 64 bit sequence pulled out at decode and possibly
8718 N.B. the output register (dest) can normally be Xn or SP
8719 the exception occurs for flag setting instructions which may
8720 only use Xn for the output (dest). The input register can
8723 /* 32 bit and immediate. */
8725 and32 (sim_cpu *cpu, uint32_t bimm)
8727 unsigned rn = INSTR (9, 5);
8728 unsigned rd = INSTR (4, 0);
8730 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8731 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
8734 /* 64 bit and immediate. */
8736 and64 (sim_cpu *cpu, uint64_t bimm)
8738 unsigned rn = INSTR (9, 5);
8739 unsigned rd = INSTR (4, 0);
8741 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8742 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
8745 /* 32 bit and immediate set flags. */
8747 ands32 (sim_cpu *cpu, uint32_t bimm)
8749 unsigned rn = INSTR (9, 5);
8750 unsigned rd = INSTR (4, 0);
8752 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8753 uint32_t value2 = bimm;
8755 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8756 set_flags_for_binop32 (cpu, value1 & value2);
8759 /* 64 bit and immediate set flags. */
8761 ands64 (sim_cpu *cpu, uint64_t bimm)
8763 unsigned rn = INSTR (9, 5);
8764 unsigned rd = INSTR (4, 0);
8766 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8767 uint64_t value2 = bimm;
8769 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8770 set_flags_for_binop64 (cpu, value1 & value2);
8773 /* 32 bit exclusive or immediate. */
8775 eor32 (sim_cpu *cpu, uint32_t bimm)
8777 unsigned rn = INSTR (9, 5);
8778 unsigned rd = INSTR (4, 0);
8780 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8781 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
8784 /* 64 bit exclusive or immediate. */
8786 eor64 (sim_cpu *cpu, uint64_t bimm)
8788 unsigned rn = INSTR (9, 5);
8789 unsigned rd = INSTR (4, 0);
8791 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8792 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
8795 /* 32 bit or immediate. */
8797 orr32 (sim_cpu *cpu, uint32_t bimm)
8799 unsigned rn = INSTR (9, 5);
8800 unsigned rd = INSTR (4, 0);
8802 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8803 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
8806 /* 64 bit or immediate. */
8808 orr64 (sim_cpu *cpu, uint64_t bimm)
8810 unsigned rn = INSTR (9, 5);
8811 unsigned rd = INSTR (4, 0);
8813 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8814 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
8817 /* Logical shifted register.
8818 These allow an optional LSL, ASR, LSR or ROR to the second source
8819 register with a count up to the register bit count.
8820 N.B register args may not be SP. */
8822 /* 32 bit AND shifted register. */
8824 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8826 unsigned rm = INSTR (20, 16);
8827 unsigned rn = INSTR (9, 5);
8828 unsigned rd = INSTR (4, 0);
8831 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8832 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8835 /* 64 bit AND shifted register. */
8837 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8839 unsigned rm = INSTR (20, 16);
8840 unsigned rn = INSTR (9, 5);
8841 unsigned rd = INSTR (4, 0);
8844 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8845 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8848 /* 32 bit AND shifted register setting flags. */
8850 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8852 unsigned rm = INSTR (20, 16);
8853 unsigned rn = INSTR (9, 5);
8854 unsigned rd = INSTR (4, 0);
8856 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8857 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
8860 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8861 set_flags_for_binop32 (cpu, value1 & value2);
8864 /* 64 bit AND shifted register setting flags. */
8866 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8868 unsigned rm = INSTR (20, 16);
8869 unsigned rn = INSTR (9, 5);
8870 unsigned rd = INSTR (4, 0);
8872 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8873 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
8876 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8877 set_flags_for_binop64 (cpu, value1 & value2);
8880 /* 32 bit BIC shifted register. */
8882 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8884 unsigned rm = INSTR (20, 16);
8885 unsigned rn = INSTR (9, 5);
8886 unsigned rd = INSTR (4, 0);
8889 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8890 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8893 /* 64 bit BIC shifted register. */
8895 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8897 unsigned rm = INSTR (20, 16);
8898 unsigned rn = INSTR (9, 5);
8899 unsigned rd = INSTR (4, 0);
8902 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8903 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8906 /* 32 bit BIC shifted register setting flags. */
8908 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8910 unsigned rm = INSTR (20, 16);
8911 unsigned rn = INSTR (9, 5);
8912 unsigned rd = INSTR (4, 0);
8914 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8915 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
8918 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8919 set_flags_for_binop32 (cpu, value1 & value2);
8922 /* 64 bit BIC shifted register setting flags. */
8924 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8926 unsigned rm = INSTR (20, 16);
8927 unsigned rn = INSTR (9, 5);
8928 unsigned rd = INSTR (4, 0);
8930 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8931 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
8934 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8935 set_flags_for_binop64 (cpu, value1 & value2);
8938 /* 32 bit EON shifted register. */
8940 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8942 unsigned rm = INSTR (20, 16);
8943 unsigned rn = INSTR (9, 5);
8944 unsigned rd = INSTR (4, 0);
8947 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8948 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8951 /* 64 bit EON shifted register. */
8953 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8955 unsigned rm = INSTR (20, 16);
8956 unsigned rn = INSTR (9, 5);
8957 unsigned rd = INSTR (4, 0);
8960 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8961 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8964 /* 32 bit EOR shifted register. */
8966 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8968 unsigned rm = INSTR (20, 16);
8969 unsigned rn = INSTR (9, 5);
8970 unsigned rd = INSTR (4, 0);
8973 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8974 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8977 /* 64 bit EOR shifted register. */
8979 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8981 unsigned rm = INSTR (20, 16);
8982 unsigned rn = INSTR (9, 5);
8983 unsigned rd = INSTR (4, 0);
8986 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8987 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8990 /* 32 bit ORR shifted register. */
8992 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8994 unsigned rm = INSTR (20, 16);
8995 unsigned rn = INSTR (9, 5);
8996 unsigned rd = INSTR (4, 0);
8999 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9000 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9003 /* 64 bit ORR shifted register. */
9005 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9007 unsigned rm = INSTR (20, 16);
9008 unsigned rn = INSTR (9, 5);
9009 unsigned rd = INSTR (4, 0);
9012 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9013 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9016 /* 32 bit ORN shifted register. */
9018 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9020 unsigned rm = INSTR (20, 16);
9021 unsigned rn = INSTR (9, 5);
9022 unsigned rd = INSTR (4, 0);
9025 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9026 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9029 /* 64 bit ORN shifted register. */
9031 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9033 unsigned rm = INSTR (20, 16);
9034 unsigned rn = INSTR (9, 5);
9035 unsigned rd = INSTR (4, 0);
9038 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9039 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9043 dexLogicalImmediate (sim_cpu *cpu)
9045 /* assert instr[28,23] = 1001000
9046 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9047 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9048 instr[22] = N : used to construct immediate mask
9054 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9055 uint32_t size = INSTR (31, 31);
9056 uint32_t N = INSTR (22, 22);
9057 /* uint32_t immr = INSTR (21, 16);. */
9058 /* uint32_t imms = INSTR (15, 10);. */
9059 uint32_t index = INSTR (22, 10);
9060 uint64_t bimm64 = LITable [index];
9061 uint32_t dispatch = INSTR (30, 29);
9071 uint32_t bimm = (uint32_t) bimm64;
9075 case 0: and32 (cpu, bimm); return;
9076 case 1: orr32 (cpu, bimm); return;
9077 case 2: eor32 (cpu, bimm); return;
9078 case 3: ands32 (cpu, bimm); return;
9085 case 0: and64 (cpu, bimm64); return;
9086 case 1: orr64 (cpu, bimm64); return;
9087 case 2: eor64 (cpu, bimm64); return;
9088 case 3: ands64 (cpu, bimm64); return;
9095 The uimm argument is a 16 bit value to be inserted into the
9096 target register the pos argument locates the 16 bit word in the
9097 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9099 N.B register arg may not be SP so it should be.
9100 accessed using the setGZRegisterXXX accessors. */
9102 /* 32 bit move 16 bit immediate zero remaining shorts. */
9104 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9106 unsigned rd = INSTR (4, 0);
9108 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9111 /* 64 bit move 16 bit immediate zero remaining shorts. */
9113 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9115 unsigned rd = INSTR (4, 0);
9117 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9120 /* 32 bit move 16 bit immediate negated. */
9122 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9124 unsigned rd = INSTR (4, 0);
9126 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9129 /* 64 bit move 16 bit immediate negated. */
9131 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9133 unsigned rd = INSTR (4, 0);
9136 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9137 ^ 0xffffffffffffffffULL));
9140 /* 32 bit move 16 bit immediate keep remaining shorts. */
9142 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9144 unsigned rd = INSTR (4, 0);
9145 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9146 uint32_t value = val << (pos * 16);
9147 uint32_t mask = ~(0xffffU << (pos * 16));
9149 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9152 /* 64 bit move 16 it immediate keep remaining shorts. */
9154 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9156 unsigned rd = INSTR (4, 0);
9157 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9158 uint64_t value = (uint64_t) val << (pos * 16);
9159 uint64_t mask = ~(0xffffULL << (pos * 16));
9161 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9165 dexMoveWideImmediate (sim_cpu *cpu)
9167 /* assert instr[28:23] = 100101
9168 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9169 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9170 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9171 instr[20,5] = uimm16
9174 /* N.B. the (multiple of 16) shift is applied by the called routine,
9175 we just pass the multiplier. */
9178 uint32_t size = INSTR (31, 31);
9179 uint32_t op = INSTR (30, 29);
9180 uint32_t shift = INSTR (22, 21);
9182 /* 32 bit can only shift 0 or 1 lot of 16.
9183 anything else is an unallocated instruction. */
9184 if (size == 0 && (shift > 1))
9190 imm = INSTR (20, 5);
9195 movn32 (cpu, imm, shift);
9197 movz32 (cpu, imm, shift);
9199 movk32 (cpu, imm, shift);
9204 movn64 (cpu, imm, shift);
9206 movz64 (cpu, imm, shift);
9208 movk64 (cpu, imm, shift);
9212 /* Bitfield operations.
9213 These take a pair of bit positions r and s which are in {0..31}
9214 or {0..63} depending on the instruction word size.
9215 N.B register args may not be SP. */
9217 /* OK, we start with ubfm which just needs to pick
9218 some bits out of source zero the rest and write
9219 the result to dest. Just need two logical shifts. */
9221 /* 32 bit bitfield move, left and right of affected zeroed
9222 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9224 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9227 unsigned rn = INSTR (9, 5);
9228 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9230 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9233 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9234 We want only bits s:xxx:r at the bottom of the word
9235 so we LSL bit s up to bit 31 i.e. by 31 - s
9236 and then we LSR to bring bit 31 down to bit s - r
9237 i.e. by 31 + r - s. */
9239 value >>= 31 + r - s;
9243 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9244 We want only bits s:xxx:0 starting at it 31-(r-1)
9245 so we LSL bit s up to bit 31 i.e. by 31 - s
9246 and then we LSL to bring bit 31 down to 31-(r-1)+s
9247 i.e. by r - (s + 1). */
9249 value >>= r - (s + 1);
9253 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9256 /* 64 bit bitfield move, left and right of affected zeroed
9257 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9259 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9262 unsigned rn = INSTR (9, 5);
9263 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9267 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9268 We want only bits s:xxx:r at the bottom of the word.
9269 So we LSL bit s up to bit 63 i.e. by 63 - s
9270 and then we LSR to bring bit 63 down to bit s - r
9271 i.e. by 63 + r - s. */
9273 value >>= 63 + r - s;
9277 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9278 We want only bits s:xxx:0 starting at it 63-(r-1).
9279 So we LSL bit s up to bit 63 i.e. by 63 - s
9280 and then we LSL to bring bit 63 down to 63-(r-1)+s
9281 i.e. by r - (s + 1). */
9283 value >>= r - (s + 1);
9287 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9290 /* The signed versions need to insert sign bits
9291 on the left of the inserted bit field. so we do
9292 much the same as the unsigned version except we
9293 use an arithmetic shift right -- this just means
9294 we need to operate on signed values. */
9296 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9297 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9299 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9302 unsigned rn = INSTR (9, 5);
9303 /* as per ubfm32 but use an ASR instead of an LSR. */
9304 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9309 value >>= 31 + r - s;
9314 value >>= r - (s + 1);
9318 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9321 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9322 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9324 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9327 unsigned rn = INSTR (9, 5);
9328 /* acpu per ubfm but use an ASR instead of an LSR. */
9329 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9334 value >>= 63 + r - s;
9339 value >>= r - (s + 1);
9343 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9346 /* Finally, these versions leave non-affected bits
9347 as is. so we need to generate the bits as per
9348 ubfm and also generate a mask to pick the
9349 bits from the original and computed values. */
9351 /* 32 bit bitfield move, non-affected bits left as is.
9352 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9354 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9356 unsigned rn = INSTR (9, 5);
9357 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9362 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9365 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9366 We want only bits s:xxx:r at the bottom of the word
9367 so we LSL bit s up to bit 31 i.e. by 31 - s
9368 and then we LSR to bring bit 31 down to bit s - r
9369 i.e. by 31 + r - s. */
9371 value >>= 31 + r - s;
9372 /* the mask must include the same bits. */
9374 mask >>= 31 + r - s;
9378 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9379 We want only bits s:xxx:0 starting at it 31-(r-1)
9380 so we LSL bit s up to bit 31 i.e. by 31 - s
9381 and then we LSL to bring bit 31 down to 31-(r-1)+s
9382 i.e. by r - (s + 1). */
9384 value >>= r - (s + 1);
9385 /* The mask must include the same bits. */
9387 mask >>= r - (s + 1);
9391 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9397 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9400 /* 64 bit bitfield move, non-affected bits left as is.
9401 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9403 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9406 unsigned rn = INSTR (9, 5);
9407 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9408 uint64_t mask = 0xffffffffffffffffULL;
9412 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9413 We want only bits s:xxx:r at the bottom of the word
9414 so we LSL bit s up to bit 63 i.e. by 63 - s
9415 and then we LSR to bring bit 63 down to bit s - r
9416 i.e. by 63 + r - s. */
9418 value >>= 63 + r - s;
9419 /* The mask must include the same bits. */
9421 mask >>= 63 + r - s;
9425 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9426 We want only bits s:xxx:0 starting at it 63-(r-1)
9427 so we LSL bit s up to bit 63 i.e. by 63 - s
9428 and then we LSL to bring bit 63 down to 63-(r-1)+s
9429 i.e. by r - (s + 1). */
9431 value >>= r - (s + 1);
9432 /* The mask must include the same bits. */
9434 mask >>= r - (s + 1);
9439 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
9443 dexBitfieldImmediate (sim_cpu *cpu)
9445 /* assert instr[28:23] = 100110
9446 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9447 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
9448 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
9449 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
9450 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9454 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9457 uint32_t size = INSTR (31, 31);
9458 uint32_t N = INSTR (22, 22);
9459 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
9460 /* or else we have an UNALLOC. */
9461 uint32_t immr = INSTR (21, 16);
9466 if (!size && uimm (immr, 5, 5))
9469 imms = INSTR (15, 10);
9470 if (!size && uimm (imms, 5, 5))
9473 /* Switch on combined size and op. */
9474 dispatch = INSTR (31, 29);
9477 case 0: sbfm32 (cpu, immr, imms); return;
9478 case 1: bfm32 (cpu, immr, imms); return;
9479 case 2: ubfm32 (cpu, immr, imms); return;
9480 case 4: sbfm (cpu, immr, imms); return;
9481 case 5: bfm (cpu, immr, imms); return;
9482 case 6: ubfm (cpu, immr, imms); return;
9483 default: HALT_UNALLOC;
9488 do_EXTR_32 (sim_cpu *cpu)
9490 /* instr[31:21] = 00010011100
9492 instr[15,10] = imms : 0xxxxx for 32 bit
9495 unsigned rm = INSTR (20, 16);
9496 unsigned imms = INSTR (15, 10) & 31;
9497 unsigned rn = INSTR ( 9, 5);
9498 unsigned rd = INSTR ( 4, 0);
9502 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
9504 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9505 val2 <<= (32 - imms);
9507 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
9511 do_EXTR_64 (sim_cpu *cpu)
9513 /* instr[31:21] = 10010011100
9518 unsigned rm = INSTR (20, 16);
9519 unsigned imms = INSTR (15, 10) & 63;
9520 unsigned rn = INSTR ( 9, 5);
9521 unsigned rd = INSTR ( 4, 0);
9524 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
9526 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
9528 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
9532 dexExtractImmediate (sim_cpu *cpu)
9534 /* assert instr[28:23] = 100111
9535 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9536 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
9537 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
9538 instr[21] = op0 : must be 0 or UNALLOC
9540 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9544 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9545 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
9547 uint32_t size = INSTR (31, 31);
9548 uint32_t N = INSTR (22, 22);
9549 /* 32 bit operations must have imms[5] = 0
9550 or else we have an UNALLOC. */
9551 uint32_t imms = INSTR (15, 10);
9556 if (!size && uimm (imms, 5, 5))
9559 /* Switch on combined size and op. */
9560 dispatch = INSTR (31, 29);
9565 else if (dispatch == 4)
9568 else if (dispatch == 1)
9575 dexDPImm (sim_cpu *cpu)
9577 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
9578 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
9579 bits [25,23] of a DPImm are the secondary dispatch vector. */
9580 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
9584 case DPIMM_PCADR_000:
9585 case DPIMM_PCADR_001:
9586 dexPCRelAddressing (cpu);
9589 case DPIMM_ADDSUB_010:
9590 case DPIMM_ADDSUB_011:
9591 dexAddSubtractImmediate (cpu);
9595 dexLogicalImmediate (cpu);
9599 dexMoveWideImmediate (cpu);
9602 case DPIMM_BITF_110:
9603 dexBitfieldImmediate (cpu);
9606 case DPIMM_EXTR_111:
9607 dexExtractImmediate (cpu);
9611 /* Should never reach here. */
9617 dexLoadUnscaledImmediate (sim_cpu *cpu)
9619 /* instr[29,24] == 111_00
9625 instr[20,12] = simm9
9626 instr[9,5] = rn may be SP. */
9627 /* unsigned rt = INSTR (4, 0); */
9628 uint32_t V = INSTR (26, 26);
9629 uint32_t dispatch = ( (INSTR (31, 30) << 2)
9631 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
9635 /* GReg operations. */
9638 case 0: sturb (cpu, imm); return;
9639 case 1: ldurb32 (cpu, imm); return;
9640 case 2: ldursb64 (cpu, imm); return;
9641 case 3: ldursb32 (cpu, imm); return;
9642 case 4: sturh (cpu, imm); return;
9643 case 5: ldurh32 (cpu, imm); return;
9644 case 6: ldursh64 (cpu, imm); return;
9645 case 7: ldursh32 (cpu, imm); return;
9646 case 8: stur32 (cpu, imm); return;
9647 case 9: ldur32 (cpu, imm); return;
9648 case 10: ldursw (cpu, imm); return;
9649 case 12: stur64 (cpu, imm); return;
9650 case 13: ldur64 (cpu, imm); return;
9663 /* FReg operations. */
9666 case 2: fsturq (cpu, imm); return;
9667 case 3: fldurq (cpu, imm); return;
9668 case 8: fsturs (cpu, imm); return;
9669 case 9: fldurs (cpu, imm); return;
9670 case 12: fsturd (cpu, imm); return;
9671 case 13: fldurd (cpu, imm); return;
9673 case 0: /* STUR 8 bit FP. */
9674 case 1: /* LDUR 8 bit FP. */
9675 case 4: /* STUR 16 bit FP. */
9676 case 5: /* LDUR 8 bit FP. */
9690 /* N.B. A preliminary note regarding all the ldrs<x>32
9693 The signed value loaded by these instructions is cast to unsigned
9694 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
9695 64 bit element of the GReg union. this performs a 32 bit sign extension
9696 (as required) but avoids 64 bit sign extension, thus ensuring that the
9697 top half of the register word is zero. this is what the spec demands
9698 when a 32 bit load occurs. */
9700 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
9702 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
9704 unsigned int rn = INSTR (9, 5);
9705 unsigned int rt = INSTR (4, 0);
9707 /* The target register may not be SP but the source may be
9708 there is no scaling required for a byte load. */
9709 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
9710 aarch64_set_reg_u64 (cpu, rt, NO_SP,
9711 (int64_t) aarch64_get_mem_s8 (cpu, address));
9714 /* 32 bit load sign-extended byte scaled or unscaled zero-
9715 or sign-extended 32-bit register offset. */
9717 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9719 unsigned int rm = INSTR (20, 16);
9720 unsigned int rn = INSTR (9, 5);
9721 unsigned int rt = INSTR (4, 0);
9723 /* rn may reference SP, rm and rt must reference ZR. */
9725 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9726 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9729 /* There is no scaling required for a byte load. */
9731 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
9735 /* 32 bit load sign-extended byte unscaled signed 9 bit with
9736 pre- or post-writeback. */
9738 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9741 unsigned int rn = INSTR (9, 5);
9742 unsigned int rt = INSTR (4, 0);
9744 if (rn == rt && wb != NoWriteBack)
9747 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9752 aarch64_set_reg_u64 (cpu, rt, NO_SP,
9753 (int64_t) aarch64_get_mem_s8 (cpu, address));
9758 if (wb != NoWriteBack)
9759 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
9762 /* 8 bit store scaled. */
9764 fstrb_abs (sim_cpu *cpu, uint32_t offset)
9766 unsigned st = INSTR (4, 0);
9767 unsigned rn = INSTR (9, 5);
9769 aarch64_set_mem_u8 (cpu,
9770 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
9771 aarch64_get_vec_u8 (cpu, st, 0));
9774 /* 8 bit store scaled or unscaled zero- or
9775 sign-extended 8-bit register offset. */
9777 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9779 unsigned rm = INSTR (20, 16);
9780 unsigned rn = INSTR (9, 5);
9781 unsigned st = INSTR (4, 0);
9783 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9784 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9786 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
9789 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
9792 /* 16 bit store scaled. */
9794 fstrh_abs (sim_cpu *cpu, uint32_t offset)
9796 unsigned st = INSTR (4, 0);
9797 unsigned rn = INSTR (9, 5);
9801 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
9802 aarch64_get_vec_u16 (cpu, st, 0));
9805 /* 16 bit store scaled or unscaled zero-
9806 or sign-extended 16-bit register offset. */
9808 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9810 unsigned rm = INSTR (20, 16);
9811 unsigned rn = INSTR (9, 5);
9812 unsigned st = INSTR (4, 0);
9814 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9815 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9817 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
9820 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
9823 /* 32 bit store scaled unsigned 12 bit. */
9825 fstrs_abs (sim_cpu *cpu, uint32_t offset)
9827 unsigned st = INSTR (4, 0);
9828 unsigned rn = INSTR (9, 5);
9832 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
9833 aarch64_get_vec_u32 (cpu, st, 0));
9836 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
9838 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9840 unsigned rn = INSTR (9, 5);
9841 unsigned st = INSTR (4, 0);
9843 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9848 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
9853 if (wb != NoWriteBack)
9854 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
9857 /* 32 bit store scaled or unscaled zero-
9858 or sign-extended 32-bit register offset. */
9860 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9862 unsigned rm = INSTR (20, 16);
9863 unsigned rn = INSTR (9, 5);
9864 unsigned st = INSTR (4, 0);
9866 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9867 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9869 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
9872 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
9875 /* 64 bit store scaled unsigned 12 bit. */
9877 fstrd_abs (sim_cpu *cpu, uint32_t offset)
9879 unsigned st = INSTR (4, 0);
9880 unsigned rn = INSTR (9, 5);
9884 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
9885 aarch64_get_vec_u64 (cpu, st, 0));
9888 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
9890 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9892 unsigned rn = INSTR (9, 5);
9893 unsigned st = INSTR (4, 0);
9895 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9900 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
9905 if (wb != NoWriteBack)
9906 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
9909 /* 64 bit store scaled or unscaled zero-
9910 or sign-extended 32-bit register offset. */
9912 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9914 unsigned rm = INSTR (20, 16);
9915 unsigned rn = INSTR (9, 5);
9916 unsigned st = INSTR (4, 0);
9918 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9919 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9921 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
9924 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
9927 /* 128 bit store scaled unsigned 12 bit. */
9929 fstrq_abs (sim_cpu *cpu, uint32_t offset)
9932 unsigned st = INSTR (4, 0);
9933 unsigned rn = INSTR (9, 5);
9936 aarch64_get_FP_long_double (cpu, st, & a);
9938 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
9939 aarch64_set_mem_long_double (cpu, addr, a);
9942 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
9944 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9947 unsigned rn = INSTR (9, 5);
9948 unsigned st = INSTR (4, 0);
9949 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9954 aarch64_get_FP_long_double (cpu, st, & a);
9955 aarch64_set_mem_long_double (cpu, address, a);
9960 if (wb != NoWriteBack)
9961 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
9964 /* 128 bit store scaled or unscaled zero-
9965 or sign-extended 32-bit register offset. */
9967 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9969 unsigned rm = INSTR (20, 16);
9970 unsigned rn = INSTR (9, 5);
9971 unsigned st = INSTR (4, 0);
9973 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9974 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9976 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
9980 aarch64_get_FP_long_double (cpu, st, & a);
9981 aarch64_set_mem_long_double (cpu, address + displacement, a);
9985 dexLoadImmediatePrePost (sim_cpu *cpu)
9987 /* instr[31,30] = size
9993 instr[20,12] = simm9
9994 instr[11] = wb : 0 ==> Post, 1 ==> Pre
9996 instr[9,5] = Rn may be SP.
9999 uint32_t V = INSTR (26, 26);
10000 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10001 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10002 WriteBack wb = INSTR (11, 11);
10006 /* GReg operations. */
10009 case 0: strb_wb (cpu, imm, wb); return;
10010 case 1: ldrb32_wb (cpu, imm, wb); return;
10011 case 2: ldrsb_wb (cpu, imm, wb); return;
10012 case 3: ldrsb32_wb (cpu, imm, wb); return;
10013 case 4: strh_wb (cpu, imm, wb); return;
10014 case 5: ldrh32_wb (cpu, imm, wb); return;
10015 case 6: ldrsh64_wb (cpu, imm, wb); return;
10016 case 7: ldrsh32_wb (cpu, imm, wb); return;
10017 case 8: str32_wb (cpu, imm, wb); return;
10018 case 9: ldr32_wb (cpu, imm, wb); return;
10019 case 10: ldrsw_wb (cpu, imm, wb); return;
10020 case 12: str_wb (cpu, imm, wb); return;
10021 case 13: ldr_wb (cpu, imm, wb); return;
10031 /* FReg operations. */
10034 case 2: fstrq_wb (cpu, imm, wb); return;
10035 case 3: fldrq_wb (cpu, imm, wb); return;
10036 case 8: fstrs_wb (cpu, imm, wb); return;
10037 case 9: fldrs_wb (cpu, imm, wb); return;
10038 case 12: fstrd_wb (cpu, imm, wb); return;
10039 case 13: fldrd_wb (cpu, imm, wb); return;
10041 case 0: /* STUR 8 bit FP. */
10042 case 1: /* LDUR 8 bit FP. */
10043 case 4: /* STUR 16 bit FP. */
10044 case 5: /* LDUR 8 bit FP. */
10059 dexLoadRegisterOffset (sim_cpu *cpu)
10061 /* instr[31,30] = size
10068 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10069 110 ==> SXTW, 111 ==> SXTX,
10074 instr[4,0] = rt. */
10076 uint32_t V = INSTR (26, 26);
10077 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10078 Scaling scale = INSTR (12, 12);
10079 Extension extensionType = INSTR (15, 13);
10081 /* Check for illegal extension types. */
10082 if (uimm (extensionType, 1, 1) == 0)
10085 if (extensionType == UXTX || extensionType == SXTX)
10086 extensionType = NoExtension;
10090 /* GReg operations. */
10093 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10094 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10095 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10096 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10097 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10098 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10099 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10100 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10101 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10102 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10103 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10104 case 12: str_scale_ext (cpu, scale, extensionType); return;
10105 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10106 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10115 /* FReg operations. */
10118 case 1: /* LDUR 8 bit FP. */
10120 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10121 case 5: /* LDUR 8 bit FP. */
10123 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10124 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10126 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10127 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10128 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10129 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10130 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10144 dexLoadUnsignedImmediate (sim_cpu *cpu)
10146 /* instr[29,24] == 111_01
10147 instr[31,30] = size
10150 instr[21,10] = uimm12 : unsigned immediate offset
10151 instr[9,5] = rn may be SP.
10152 instr[4,0] = rt. */
10154 uint32_t V = INSTR (26,26);
10155 uint32_t dispatch = ( (INSTR (31, 30) << 2)
10157 uint32_t imm = INSTR (21, 10);
10161 /* GReg operations. */
10164 case 0: strb_abs (cpu, imm); return;
10165 case 1: ldrb32_abs (cpu, imm); return;
10166 case 2: ldrsb_abs (cpu, imm); return;
10167 case 3: ldrsb32_abs (cpu, imm); return;
10168 case 4: strh_abs (cpu, imm); return;
10169 case 5: ldrh32_abs (cpu, imm); return;
10170 case 6: ldrsh_abs (cpu, imm); return;
10171 case 7: ldrsh32_abs (cpu, imm); return;
10172 case 8: str32_abs (cpu, imm); return;
10173 case 9: ldr32_abs (cpu, imm); return;
10174 case 10: ldrsw_abs (cpu, imm); return;
10175 case 12: str_abs (cpu, imm); return;
10176 case 13: ldr_abs (cpu, imm); return;
10177 case 14: prfm_abs (cpu, imm); return;
10186 /* FReg operations. */
10189 case 0: fstrb_abs (cpu, imm); return;
10190 case 4: fstrh_abs (cpu, imm); return;
10191 case 8: fstrs_abs (cpu, imm); return;
10192 case 12: fstrd_abs (cpu, imm); return;
10193 case 2: fstrq_abs (cpu, imm); return;
10195 case 1: fldrb_abs (cpu, imm); return;
10196 case 5: fldrh_abs (cpu, imm); return;
10197 case 9: fldrs_abs (cpu, imm); return;
10198 case 13: fldrd_abs (cpu, imm); return;
10199 case 3: fldrq_abs (cpu, imm); return;
10213 dexLoadExclusive (sim_cpu *cpu)
10215 /* assert instr[29:24] = 001000;
10216 instr[31,30] = size
10217 instr[23] = 0 if exclusive
10218 instr[22] = L : 1 if load, 0 if store
10219 instr[21] = 1 if pair
10221 instr[15] = o0 : 1 if ordered
10224 instr[4.0] = Rt. */
10226 switch (INSTR (22, 21))
10228 case 2: ldxr (cpu); return;
10229 case 0: stxr (cpu); return;
10235 dexLoadOther (sim_cpu *cpu)
10239 /* instr[29,25] = 111_0
10240 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10241 instr[21:11,10] is the secondary dispatch. */
10242 if (INSTR (24, 24))
10244 dexLoadUnsignedImmediate (cpu);
10248 dispatch = ( (INSTR (21, 21) << 2)
10252 case 0: dexLoadUnscaledImmediate (cpu); return;
10253 case 1: dexLoadImmediatePrePost (cpu); return;
10254 case 3: dexLoadImmediatePrePost (cpu); return;
10255 case 6: dexLoadRegisterOffset (cpu); return;
10267 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10269 unsigned rn = INSTR (14, 10);
10270 unsigned rd = INSTR (9, 5);
10271 unsigned rm = INSTR (4, 0);
10272 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10274 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10275 HALT_UNALLOC; /* ??? */
10282 aarch64_set_mem_u32 (cpu, address,
10283 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10284 aarch64_set_mem_u32 (cpu, address + 4,
10285 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10290 if (wb != NoWriteBack)
10291 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10295 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10297 unsigned rn = INSTR (14, 10);
10298 unsigned rd = INSTR (9, 5);
10299 unsigned rm = INSTR (4, 0);
10300 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10302 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10303 HALT_UNALLOC; /* ??? */
10310 aarch64_set_mem_u64 (cpu, address,
10311 aarch64_get_reg_u64 (cpu, rm, SP_OK));
10312 aarch64_set_mem_u64 (cpu, address + 8,
10313 aarch64_get_reg_u64 (cpu, rn, SP_OK));
10318 if (wb != NoWriteBack)
10319 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10323 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10325 unsigned rn = INSTR (14, 10);
10326 unsigned rd = INSTR (9, 5);
10327 unsigned rm = INSTR (4, 0);
10328 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10330 /* treat this as unalloc to make sure we don't do it. */
10339 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10340 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10345 if (wb != NoWriteBack)
10346 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10350 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10352 unsigned rn = INSTR (14, 10);
10353 unsigned rd = INSTR (9, 5);
10354 unsigned rm = INSTR (4, 0);
10355 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10357 /* Treat this as unalloc to make sure we don't do it. */
10366 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10367 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10372 if (wb != NoWriteBack)
10373 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10377 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10379 unsigned rn = INSTR (14, 10);
10380 unsigned rd = INSTR (9, 5);
10381 unsigned rm = INSTR (4, 0);
10382 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10384 /* Treat this as unalloc to make sure we don't do it. */
10393 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10394 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10399 if (wb != NoWriteBack)
10400 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10404 dex_load_store_pair_gr (sim_cpu *cpu)
10406 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10407 instr[29,25] = instruction encoding: 101_0
10408 instr[26] = V : 1 if fp 0 if gp
10409 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10410 instr[22] = load/store (1=> load)
10411 instr[21,15] = signed, scaled, offset
10414 instr[ 4, 0] = Rm. */
10416 uint32_t dispatch = ((INSTR (31, 30) << 3)
10418 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10422 case 2: store_pair_u32 (cpu, offset, Post); return;
10423 case 3: load_pair_u32 (cpu, offset, Post); return;
10424 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10425 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
10426 case 6: store_pair_u32 (cpu, offset, Pre); return;
10427 case 7: load_pair_u32 (cpu, offset, Pre); return;
10429 case 11: load_pair_s32 (cpu, offset, Post); return;
10430 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
10431 case 15: load_pair_s32 (cpu, offset, Pre); return;
10433 case 18: store_pair_u64 (cpu, offset, Post); return;
10434 case 19: load_pair_u64 (cpu, offset, Post); return;
10435 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
10436 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
10437 case 22: store_pair_u64 (cpu, offset, Pre); return;
10438 case 23: load_pair_u64 (cpu, offset, Pre); return;
10446 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10448 unsigned rn = INSTR (14, 10);
10449 unsigned rd = INSTR (9, 5);
10450 unsigned rm = INSTR (4, 0);
10451 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10458 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
10459 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
10464 if (wb != NoWriteBack)
10465 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10469 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10471 unsigned rn = INSTR (14, 10);
10472 unsigned rd = INSTR (9, 5);
10473 unsigned rm = INSTR (4, 0);
10474 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10481 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
10482 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
10487 if (wb != NoWriteBack)
10488 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10492 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10495 unsigned rn = INSTR (14, 10);
10496 unsigned rd = INSTR (9, 5);
10497 unsigned rm = INSTR (4, 0);
10498 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10505 aarch64_get_FP_long_double (cpu, rm, & a);
10506 aarch64_set_mem_long_double (cpu, address, a);
10507 aarch64_get_FP_long_double (cpu, rn, & a);
10508 aarch64_set_mem_long_double (cpu, address + 16, a);
10513 if (wb != NoWriteBack)
10514 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10518 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10520 unsigned rn = INSTR (14, 10);
10521 unsigned rd = INSTR (9, 5);
10522 unsigned rm = INSTR (4, 0);
10523 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10533 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
10534 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
10539 if (wb != NoWriteBack)
10540 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10544 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10546 unsigned rn = INSTR (14, 10);
10547 unsigned rd = INSTR (9, 5);
10548 unsigned rm = INSTR (4, 0);
10549 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10559 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
10560 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
10565 if (wb != NoWriteBack)
10566 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10570 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10573 unsigned rn = INSTR (14, 10);
10574 unsigned rd = INSTR (9, 5);
10575 unsigned rm = INSTR (4, 0);
10576 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10586 aarch64_get_mem_long_double (cpu, address, & a);
10587 aarch64_set_FP_long_double (cpu, rm, a);
10588 aarch64_get_mem_long_double (cpu, address + 16, & a);
10589 aarch64_set_FP_long_double (cpu, rn, a);
10594 if (wb != NoWriteBack)
10595 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10599 dex_load_store_pair_fp (sim_cpu *cpu)
10601 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
10602 instr[29,25] = instruction encoding
10603 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10604 instr[22] = load/store (1=> load)
10605 instr[21,15] = signed, scaled, offset
10608 instr[ 4, 0] = Rm */
10610 uint32_t dispatch = ((INSTR (31, 30) << 3)
10612 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10616 case 2: store_pair_float (cpu, offset, Post); return;
10617 case 3: load_pair_float (cpu, offset, Post); return;
10618 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
10619 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
10620 case 6: store_pair_float (cpu, offset, Pre); return;
10621 case 7: load_pair_float (cpu, offset, Pre); return;
10623 case 10: store_pair_double (cpu, offset, Post); return;
10624 case 11: load_pair_double (cpu, offset, Post); return;
10625 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
10626 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
10627 case 14: store_pair_double (cpu, offset, Pre); return;
10628 case 15: load_pair_double (cpu, offset, Pre); return;
10630 case 18: store_pair_long_double (cpu, offset, Post); return;
10631 case 19: load_pair_long_double (cpu, offset, Post); return;
10632 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
10633 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
10634 case 22: store_pair_long_double (cpu, offset, Pre); return;
10635 case 23: load_pair_long_double (cpu, offset, Pre); return;
10642 static inline unsigned
10643 vec_reg (unsigned v, unsigned o)
10645 return (v + o) & 0x3F;
10648 /* Load multiple N-element structures to N consecutive registers. */
10650 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
10652 int all = INSTR (30, 30);
10653 unsigned size = INSTR (11, 10);
10654 unsigned vd = INSTR (4, 0);
10659 case 0: /* 8-bit operations. */
10661 for (i = 0; i < (16 * N); i++)
10662 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
10663 aarch64_get_mem_u8 (cpu, address + i));
10665 for (i = 0; i < (8 * N); i++)
10666 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
10667 aarch64_get_mem_u8 (cpu, address + i));
10670 case 1: /* 16-bit operations. */
10672 for (i = 0; i < (8 * N); i++)
10673 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
10674 aarch64_get_mem_u16 (cpu, address + i * 2));
10676 for (i = 0; i < (4 * N); i++)
10677 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
10678 aarch64_get_mem_u16 (cpu, address + i * 2));
10681 case 2: /* 32-bit operations. */
10683 for (i = 0; i < (4 * N); i++)
10684 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
10685 aarch64_get_mem_u32 (cpu, address + i * 4));
10687 for (i = 0; i < (2 * N); i++)
10688 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
10689 aarch64_get_mem_u32 (cpu, address + i * 4));
10692 case 3: /* 64-bit operations. */
10694 for (i = 0; i < (2 * N); i++)
10695 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
10696 aarch64_get_mem_u64 (cpu, address + i * 8));
10698 for (i = 0; i < N; i++)
10699 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
10700 aarch64_get_mem_u64 (cpu, address + i * 8));
10705 /* LD4: load multiple 4-element to four consecutive registers. */
10707 LD4 (sim_cpu *cpu, uint64_t address)
10709 vec_load (cpu, address, 4);
10712 /* LD3: load multiple 3-element structures to three consecutive registers. */
10714 LD3 (sim_cpu *cpu, uint64_t address)
10716 vec_load (cpu, address, 3);
10719 /* LD2: load multiple 2-element structures to two consecutive registers. */
10721 LD2 (sim_cpu *cpu, uint64_t address)
10723 vec_load (cpu, address, 2);
10726 /* Load multiple 1-element structures into one register. */
10728 LD1_1 (sim_cpu *cpu, uint64_t address)
10730 int all = INSTR (30, 30);
10731 unsigned size = INSTR (11, 10);
10732 unsigned vd = INSTR (4, 0);
10738 /* LD1 {Vd.16b}, addr, #16 */
10739 /* LD1 {Vd.8b}, addr, #8 */
10740 for (i = 0; i < (all ? 16 : 8); i++)
10741 aarch64_set_vec_u8 (cpu, vd, i,
10742 aarch64_get_mem_u8 (cpu, address + i));
10746 /* LD1 {Vd.8h}, addr, #16 */
10747 /* LD1 {Vd.4h}, addr, #8 */
10748 for (i = 0; i < (all ? 8 : 4); i++)
10749 aarch64_set_vec_u16 (cpu, vd, i,
10750 aarch64_get_mem_u16 (cpu, address + i * 2));
10754 /* LD1 {Vd.4s}, addr, #16 */
10755 /* LD1 {Vd.2s}, addr, #8 */
10756 for (i = 0; i < (all ? 4 : 2); i++)
10757 aarch64_set_vec_u32 (cpu, vd, i,
10758 aarch64_get_mem_u32 (cpu, address + i * 4));
10762 /* LD1 {Vd.2d}, addr, #16 */
10763 /* LD1 {Vd.1d}, addr, #8 */
10764 for (i = 0; i < (all ? 2 : 1); i++)
10765 aarch64_set_vec_u64 (cpu, vd, i,
10766 aarch64_get_mem_u64 (cpu, address + i * 8));
10771 /* Load multiple 1-element structures into two registers. */
10773 LD1_2 (sim_cpu *cpu, uint64_t address)
10775 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
10776 So why have two different instructions ? There must be something
10777 wrong somewhere. */
10778 vec_load (cpu, address, 2);
10781 /* Load multiple 1-element structures into three registers. */
10783 LD1_3 (sim_cpu *cpu, uint64_t address)
10785 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
10786 So why have two different instructions ? There must be something
10787 wrong somewhere. */
10788 vec_load (cpu, address, 3);
10791 /* Load multiple 1-element structures into four registers. */
10793 LD1_4 (sim_cpu *cpu, uint64_t address)
10795 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
10796 So why have two different instructions ? There must be something
10797 wrong somewhere. */
10798 vec_load (cpu, address, 4);
10801 /* Store multiple N-element structures to N consecutive registers. */
10803 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
10805 int all = INSTR (30, 30);
10806 unsigned size = INSTR (11, 10);
10807 unsigned vd = INSTR (4, 0);
10812 case 0: /* 8-bit operations. */
10814 for (i = 0; i < (16 * N); i++)
10817 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
10819 for (i = 0; i < (8 * N); i++)
10822 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
10825 case 1: /* 16-bit operations. */
10827 for (i = 0; i < (8 * N); i++)
10828 aarch64_set_mem_u16
10829 (cpu, address + i * 2,
10830 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
10832 for (i = 0; i < (4 * N); i++)
10833 aarch64_set_mem_u16
10834 (cpu, address + i * 2,
10835 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
10838 case 2: /* 32-bit operations. */
10840 for (i = 0; i < (4 * N); i++)
10841 aarch64_set_mem_u32
10842 (cpu, address + i * 4,
10843 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
10845 for (i = 0; i < (2 * N); i++)
10846 aarch64_set_mem_u32
10847 (cpu, address + i * 4,
10848 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
10851 case 3: /* 64-bit operations. */
10853 for (i = 0; i < (2 * N); i++)
10854 aarch64_set_mem_u64
10855 (cpu, address + i * 8,
10856 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
10858 for (i = 0; i < N; i++)
10859 aarch64_set_mem_u64
10860 (cpu, address + i * 8,
10861 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
10866 /* Store multiple 4-element structure to four consecutive registers. */
10868 ST4 (sim_cpu *cpu, uint64_t address)
10870 vec_store (cpu, address, 4);
10873 /* Store multiple 3-element structures to three consecutive registers. */
10875 ST3 (sim_cpu *cpu, uint64_t address)
10877 vec_store (cpu, address, 3);
10880 /* Store multiple 2-element structures to two consecutive registers. */
10882 ST2 (sim_cpu *cpu, uint64_t address)
10884 vec_store (cpu, address, 2);
10887 /* Store multiple 1-element structures into one register. */
10889 ST1_1 (sim_cpu *cpu, uint64_t address)
10891 int all = INSTR (30, 30);
10892 unsigned size = INSTR (11, 10);
10893 unsigned vd = INSTR (4, 0);
10899 for (i = 0; i < (all ? 16 : 8); i++)
10900 aarch64_set_mem_u8 (cpu, address + i,
10901 aarch64_get_vec_u8 (cpu, vd, i));
10905 for (i = 0; i < (all ? 8 : 4); i++)
10906 aarch64_set_mem_u16 (cpu, address + i * 2,
10907 aarch64_get_vec_u16 (cpu, vd, i));
10911 for (i = 0; i < (all ? 4 : 2); i++)
10912 aarch64_set_mem_u32 (cpu, address + i * 4,
10913 aarch64_get_vec_u32 (cpu, vd, i));
10917 for (i = 0; i < (all ? 2 : 1); i++)
10918 aarch64_set_mem_u64 (cpu, address + i * 8,
10919 aarch64_get_vec_u64 (cpu, vd, i));
10924 /* Store multiple 1-element structures into two registers. */
10926 ST1_2 (sim_cpu *cpu, uint64_t address)
10928 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
10929 So why have two different instructions ? There must be
10930 something wrong somewhere. */
10931 vec_store (cpu, address, 2);
10934 /* Store multiple 1-element structures into three registers. */
10936 ST1_3 (sim_cpu *cpu, uint64_t address)
10938 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
10939 So why have two different instructions ? There must be
10940 something wrong somewhere. */
10941 vec_store (cpu, address, 3);
10944 /* Store multiple 1-element structures into four registers. */
10946 ST1_4 (sim_cpu *cpu, uint64_t address)
10948 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
10949 So why have two different instructions ? There must be
10950 something wrong somewhere. */
10951 vec_store (cpu, address, 4);
10955 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
10958 instr[30] = element selector 0=>half, 1=>all elements
10959 instr[29,24] = 00 1101
10960 instr[23] = 0=>simple, 1=>post
10962 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
10963 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
10964 11111 (immediate post inc)
10966 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
10968 instr[11,10] = element size 00=> byte(b), 01=> half(h),
10969 10=> word(s), 11=> double(d)
10970 instr[9,5] = address
10973 unsigned full = INSTR (30, 30);
10974 unsigned vd = INSTR (4, 0);
10975 unsigned size = INSTR (11, 10);
10978 NYI_assert (29, 24, 0x0D);
10979 NYI_assert (22, 22, 1);
10980 NYI_assert (15, 14, 3);
10981 NYI_assert (12, 12, 0);
10983 switch ((INSTR (13, 13) << 1)
10986 case 0: /* LD1R. */
10991 uint8_t val = aarch64_get_mem_u8 (cpu, address);
10992 for (i = 0; i < (full ? 16 : 8); i++)
10993 aarch64_set_vec_u8 (cpu, vd, i, val);
10999 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11000 for (i = 0; i < (full ? 8 : 4); i++)
11001 aarch64_set_vec_u16 (cpu, vd, i, val);
11007 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11008 for (i = 0; i < (full ? 4 : 2); i++)
11009 aarch64_set_vec_u32 (cpu, vd, i, val);
11015 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11016 for (i = 0; i < (full ? 2 : 1); i++)
11017 aarch64_set_vec_u64 (cpu, vd, i, val);
11026 case 1: /* LD2R. */
11031 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11032 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11034 for (i = 0; i < (full ? 16 : 8); i++)
11036 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11037 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11044 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11045 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11047 for (i = 0; i < (full ? 8 : 4); i++)
11049 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11050 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11057 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11058 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11060 for (i = 0; i < (full ? 4 : 2); i++)
11062 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11063 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11070 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11071 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11073 for (i = 0; i < (full ? 2 : 1); i++)
11075 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11076 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11086 case 2: /* LD3R. */
11091 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11092 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11093 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11095 for (i = 0; i < (full ? 16 : 8); i++)
11097 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11098 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11099 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11106 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11107 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11108 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11110 for (i = 0; i < (full ? 8 : 4); i++)
11112 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11113 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11114 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11121 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11122 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11123 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11125 for (i = 0; i < (full ? 4 : 2); i++)
11127 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11128 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11129 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11136 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11137 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11138 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11140 for (i = 0; i < (full ? 2 : 1); i++)
11142 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11143 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11144 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11154 case 3: /* LD4R. */
11159 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11160 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11161 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11162 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11164 for (i = 0; i < (full ? 16 : 8); i++)
11166 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11167 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11168 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11169 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11176 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11177 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11178 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11179 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11181 for (i = 0; i < (full ? 8 : 4); i++)
11183 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11184 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11185 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11186 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11193 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11194 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11195 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11196 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11198 for (i = 0; i < (full ? 4 : 2); i++)
11200 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11201 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11202 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11203 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11210 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11211 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11212 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11213 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11215 for (i = 0; i < (full ? 2 : 1); i++)
11217 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11218 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11219 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11220 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11236 do_vec_load_store (sim_cpu *cpu)
11238 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11241 instr[30] = element selector 0=>half, 1=>all elements
11242 instr[29,25] = 00110
11244 instr[23] = 0=>simple, 1=>post
11245 instr[22] = 0=>store, 1=>load
11246 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11247 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11248 11111 (immediate post inc)
11249 instr[15,12] = elements and destinations. eg for load:
11250 0000=>LD4 => load multiple 4-element to
11251 four consecutive registers
11252 0100=>LD3 => load multiple 3-element to
11253 three consecutive registers
11254 1000=>LD2 => load multiple 2-element to
11255 two consecutive registers
11256 0010=>LD1 => load multiple 1-element to
11257 four consecutive registers
11258 0110=>LD1 => load multiple 1-element to
11259 three consecutive registers
11260 1010=>LD1 => load multiple 1-element to
11261 two consecutive registers
11262 0111=>LD1 => load multiple 1-element to
11266 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11267 10=> word(s), 11=> double(d)
11268 instr[9,5] = Vn, can be SP
11277 if (INSTR (31, 31) != 0
11278 || INSTR (29, 25) != 0x06)
11281 type = INSTR (15, 12);
11282 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11285 post = INSTR (23, 23);
11286 load = INSTR (22, 22);
11288 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11292 unsigned vm = INSTR (20, 16);
11296 unsigned sizeof_operation;
11300 case 0: sizeof_operation = 32; break;
11301 case 4: sizeof_operation = 24; break;
11302 case 8: sizeof_operation = 16; break;
11305 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11306 sizeof_operation <<= INSTR (11, 10);
11310 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11311 sizeof_operation <<= INSTR (11, 10);
11315 /* One register, immediate offset variant. */
11316 sizeof_operation = 8;
11320 /* Two registers, immediate offset variant. */
11321 sizeof_operation = 16;
11325 /* Three registers, immediate offset variant. */
11326 sizeof_operation = 24;
11330 /* Four registers, immediate offset variant. */
11331 sizeof_operation = 32;
11338 if (INSTR (30, 30))
11339 sizeof_operation *= 2;
11341 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11344 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11345 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11349 NYI_assert (20, 16, 0);
11356 case 0: LD4 (cpu, address); return;
11357 case 4: LD3 (cpu, address); return;
11358 case 8: LD2 (cpu, address); return;
11359 case 2: LD1_4 (cpu, address); return;
11360 case 6: LD1_3 (cpu, address); return;
11361 case 10: LD1_2 (cpu, address); return;
11362 case 7: LD1_1 (cpu, address); return;
11365 case 0xC: do_vec_LDnR (cpu, address); return;
11375 case 0: ST4 (cpu, address); return;
11376 case 4: ST3 (cpu, address); return;
11377 case 8: ST2 (cpu, address); return;
11378 case 2: ST1_4 (cpu, address); return;
11379 case 6: ST1_3 (cpu, address); return;
11380 case 10: ST1_2 (cpu, address); return;
11381 case 7: ST1_1 (cpu, address); return;
11388 dexLdSt (sim_cpu *cpu)
11390 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11391 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11392 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11393 bits [29,28:26] of a LS are the secondary dispatch vector. */
11394 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11399 dexLoadExclusive (cpu); return;
11403 dexLoadLiteral (cpu); return;
11407 dexLoadOther (cpu); return;
11409 case LS_ADVSIMD_001:
11410 do_vec_load_store (cpu); return;
11413 dex_load_store_pair_gr (cpu); return;
11416 dex_load_store_pair_fp (cpu); return;
11419 /* Should never reach here. */
11424 /* Specific decode and execute for group Data Processing Register. */
11427 dexLogicalShiftedRegister (sim_cpu *cpu)
11429 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11431 instr[28:24] = 01010
11432 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11435 instr[15,10] = count : must be 0xxxxx for 32 bit
11439 uint32_t size = INSTR (31, 31);
11440 Shift shiftType = INSTR (23, 22);
11441 uint32_t count = INSTR (15, 10);
11443 /* 32 bit operations must have count[5] = 0.
11444 or else we have an UNALLOC. */
11445 if (size == 0 && uimm (count, 5, 5))
11448 /* Dispatch on size:op:N. */
11449 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
11451 case 0: and32_shift (cpu, shiftType, count); return;
11452 case 1: bic32_shift (cpu, shiftType, count); return;
11453 case 2: orr32_shift (cpu, shiftType, count); return;
11454 case 3: orn32_shift (cpu, shiftType, count); return;
11455 case 4: eor32_shift (cpu, shiftType, count); return;
11456 case 5: eon32_shift (cpu, shiftType, count); return;
11457 case 6: ands32_shift (cpu, shiftType, count); return;
11458 case 7: bics32_shift (cpu, shiftType, count); return;
11459 case 8: and64_shift (cpu, shiftType, count); return;
11460 case 9: bic64_shift (cpu, shiftType, count); return;
11461 case 10:orr64_shift (cpu, shiftType, count); return;
11462 case 11:orn64_shift (cpu, shiftType, count); return;
11463 case 12:eor64_shift (cpu, shiftType, count); return;
11464 case 13:eon64_shift (cpu, shiftType, count); return;
11465 case 14:ands64_shift (cpu, shiftType, count); return;
11466 case 15:bics64_shift (cpu, shiftType, count); return;
11470 /* 32 bit conditional select. */
11472 csel32 (sim_cpu *cpu, CondCode cc)
11474 unsigned rm = INSTR (20, 16);
11475 unsigned rn = INSTR (9, 5);
11476 unsigned rd = INSTR (4, 0);
11478 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11479 testConditionCode (cpu, cc)
11480 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11481 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
11484 /* 64 bit conditional select. */
11486 csel64 (sim_cpu *cpu, CondCode cc)
11488 unsigned rm = INSTR (20, 16);
11489 unsigned rn = INSTR (9, 5);
11490 unsigned rd = INSTR (4, 0);
11492 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11493 testConditionCode (cpu, cc)
11494 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11495 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
11498 /* 32 bit conditional increment. */
11500 csinc32 (sim_cpu *cpu, CondCode cc)
11502 unsigned rm = INSTR (20, 16);
11503 unsigned rn = INSTR (9, 5);
11504 unsigned rd = INSTR (4, 0);
11506 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11507 testConditionCode (cpu, cc)
11508 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11509 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
11512 /* 64 bit conditional increment. */
11514 csinc64 (sim_cpu *cpu, CondCode cc)
11516 unsigned rm = INSTR (20, 16);
11517 unsigned rn = INSTR (9, 5);
11518 unsigned rd = INSTR (4, 0);
11520 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11521 testConditionCode (cpu, cc)
11522 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11523 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
11526 /* 32 bit conditional invert. */
11528 csinv32 (sim_cpu *cpu, CondCode cc)
11530 unsigned rm = INSTR (20, 16);
11531 unsigned rn = INSTR (9, 5);
11532 unsigned rd = INSTR (4, 0);
11534 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11535 testConditionCode (cpu, cc)
11536 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11537 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
11540 /* 64 bit conditional invert. */
11542 csinv64 (sim_cpu *cpu, CondCode cc)
11544 unsigned rm = INSTR (20, 16);
11545 unsigned rn = INSTR (9, 5);
11546 unsigned rd = INSTR (4, 0);
11548 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11549 testConditionCode (cpu, cc)
11550 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11551 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
11554 /* 32 bit conditional negate. */
11556 csneg32 (sim_cpu *cpu, CondCode cc)
11558 unsigned rm = INSTR (20, 16);
11559 unsigned rn = INSTR (9, 5);
11560 unsigned rd = INSTR (4, 0);
11562 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11563 testConditionCode (cpu, cc)
11564 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11565 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
11568 /* 64 bit conditional negate. */
11570 csneg64 (sim_cpu *cpu, CondCode cc)
11572 unsigned rm = INSTR (20, 16);
11573 unsigned rn = INSTR (9, 5);
11574 unsigned rd = INSTR (4, 0);
11576 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11577 testConditionCode (cpu, cc)
11578 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11579 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
11583 dexCondSelect (sim_cpu *cpu)
11585 /* instr[28,21] = 11011011
11586 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11587 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
11588 100 ==> CSINV, 101 ==> CSNEG,
11590 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
11591 instr[15,12] = cond
11592 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
11594 CondCode cc = INSTR (15, 12);
11595 uint32_t S = INSTR (29, 29);
11596 uint32_t op2 = INSTR (11, 10);
11604 switch ((INSTR (31, 30) << 1) | op2)
11606 case 0: csel32 (cpu, cc); return;
11607 case 1: csinc32 (cpu, cc); return;
11608 case 2: csinv32 (cpu, cc); return;
11609 case 3: csneg32 (cpu, cc); return;
11610 case 4: csel64 (cpu, cc); return;
11611 case 5: csinc64 (cpu, cc); return;
11612 case 6: csinv64 (cpu, cc); return;
11613 case 7: csneg64 (cpu, cc); return;
11617 /* Some helpers for counting leading 1 or 0 bits. */
11619 /* Counts the number of leading bits which are the same
11620 in a 32 bit value in the range 1 to 32. */
11622 leading32 (uint32_t value)
11624 int32_t mask= 0xffff0000;
11625 uint32_t count= 16; /* Counts number of bits set in mask. */
11626 uint32_t lo = 1; /* Lower bound for number of sign bits. */
11627 uint32_t hi = 32; /* Upper bound for number of sign bits. */
11629 while (lo + 1 < hi)
11631 int32_t test = (value & mask);
11633 if (test == 0 || test == mask)
11636 count = (lo + hi) / 2;
11637 mask >>= (count - lo);
11642 count = (lo + hi) / 2;
11643 mask <<= hi - count;
11652 test = (value & mask);
11654 if (test == 0 || test == mask)
11663 /* Counts the number of leading bits which are the same
11664 in a 64 bit value in the range 1 to 64. */
11666 leading64 (uint64_t value)
11668 int64_t mask= 0xffffffff00000000LL;
11669 uint64_t count = 32; /* Counts number of bits set in mask. */
11670 uint64_t lo = 1; /* Lower bound for number of sign bits. */
11671 uint64_t hi = 64; /* Upper bound for number of sign bits. */
11673 while (lo + 1 < hi)
11675 int64_t test = (value & mask);
11677 if (test == 0 || test == mask)
11680 count = (lo + hi) / 2;
11681 mask >>= (count - lo);
11686 count = (lo + hi) / 2;
11687 mask <<= hi - count;
11696 test = (value & mask);
11698 if (test == 0 || test == mask)
11707 /* Bit operations. */
11708 /* N.B register args may not be SP. */
11710 /* 32 bit count leading sign bits. */
11712 cls32 (sim_cpu *cpu)
11714 unsigned rn = INSTR (9, 5);
11715 unsigned rd = INSTR (4, 0);
11717 /* N.B. the result needs to exclude the leading bit. */
11718 aarch64_set_reg_u64
11719 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
11722 /* 64 bit count leading sign bits. */
11724 cls64 (sim_cpu *cpu)
11726 unsigned rn = INSTR (9, 5);
11727 unsigned rd = INSTR (4, 0);
11729 /* N.B. the result needs to exclude the leading bit. */
11730 aarch64_set_reg_u64
11731 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
11734 /* 32 bit count leading zero bits. */
11736 clz32 (sim_cpu *cpu)
11738 unsigned rn = INSTR (9, 5);
11739 unsigned rd = INSTR (4, 0);
11740 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11742 /* if the sign (top) bit is set then the count is 0. */
11743 if (pick32 (value, 31, 31))
11744 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11746 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
11749 /* 64 bit count leading zero bits. */
11751 clz64 (sim_cpu *cpu)
11753 unsigned rn = INSTR (9, 5);
11754 unsigned rd = INSTR (4, 0);
11755 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11757 /* if the sign (top) bit is set then the count is 0. */
11758 if (pick64 (value, 63, 63))
11759 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11761 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
11764 /* 32 bit reverse bits. */
11766 rbit32 (sim_cpu *cpu)
11768 unsigned rn = INSTR (9, 5);
11769 unsigned rd = INSTR (4, 0);
11770 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11771 uint32_t result = 0;
11774 for (i = 0; i < 32; i++)
11777 result |= (value & 1);
11780 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11783 /* 64 bit reverse bits. */
11785 rbit64 (sim_cpu *cpu)
11787 unsigned rn = INSTR (9, 5);
11788 unsigned rd = INSTR (4, 0);
11789 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11790 uint64_t result = 0;
11793 for (i = 0; i < 64; i++)
11796 result |= (value & 1UL);
11799 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11802 /* 32 bit reverse bytes. */
11804 rev32 (sim_cpu *cpu)
11806 unsigned rn = INSTR (9, 5);
11807 unsigned rd = INSTR (4, 0);
11808 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11809 uint32_t result = 0;
11812 for (i = 0; i < 4; i++)
11815 result |= (value & 0xff);
11818 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11821 /* 64 bit reverse bytes. */
11823 rev64 (sim_cpu *cpu)
11825 unsigned rn = INSTR (9, 5);
11826 unsigned rd = INSTR (4, 0);
11827 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11828 uint64_t result = 0;
11831 for (i = 0; i < 8; i++)
11834 result |= (value & 0xffULL);
11837 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11840 /* 32 bit reverse shorts. */
11841 /* N.B.this reverses the order of the bytes in each half word. */
11843 revh32 (sim_cpu *cpu)
11845 unsigned rn = INSTR (9, 5);
11846 unsigned rd = INSTR (4, 0);
11847 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11848 uint32_t result = 0;
11851 for (i = 0; i < 2; i++)
11854 result |= (value & 0x00ff00ff);
11857 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11860 /* 64 bit reverse shorts. */
11861 /* N.B.this reverses the order of the bytes in each half word. */
11863 revh64 (sim_cpu *cpu)
11865 unsigned rn = INSTR (9, 5);
11866 unsigned rd = INSTR (4, 0);
11867 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11868 uint64_t result = 0;
11871 for (i = 0; i < 2; i++)
11874 result |= (value & 0x00ff00ff00ff00ffULL);
11877 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11881 dexDataProc1Source (sim_cpu *cpu)
11884 instr[28,21] = 111010110
11885 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11886 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
11887 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
11888 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
11889 000010 ==> REV, 000011 ==> UNALLOC
11890 000100 ==> CLZ, 000101 ==> CLS
11892 instr[9,5] = rn : may not be SP
11893 instr[4,0] = rd : may not be SP. */
11895 uint32_t S = INSTR (29, 29);
11896 uint32_t opcode2 = INSTR (20, 16);
11897 uint32_t opcode = INSTR (15, 10);
11898 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
11911 case 0: rbit32 (cpu); return;
11912 case 1: revh32 (cpu); return;
11913 case 2: rev32 (cpu); return;
11914 case 4: clz32 (cpu); return;
11915 case 5: cls32 (cpu); return;
11916 case 8: rbit64 (cpu); return;
11917 case 9: revh64 (cpu); return;
11918 case 10:rev32 (cpu); return;
11919 case 11:rev64 (cpu); return;
11920 case 12:clz64 (cpu); return;
11921 case 13:cls64 (cpu); return;
11922 default: HALT_UNALLOC;
11927 Shifts by count supplied in register.
11928 N.B register args may not be SP.
11929 These all use the shifted auxiliary function for
11930 simplicity and clarity. Writing the actual shift
11931 inline would avoid a branch and so be faster but
11932 would also necessitate getting signs right. */
11934 /* 32 bit arithmetic shift right. */
11936 asrv32 (sim_cpu *cpu)
11938 unsigned rm = INSTR (20, 16);
11939 unsigned rn = INSTR (9, 5);
11940 unsigned rd = INSTR (4, 0);
11942 aarch64_set_reg_u64
11944 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
11945 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
11948 /* 64 bit arithmetic shift right. */
11950 asrv64 (sim_cpu *cpu)
11952 unsigned rm = INSTR (20, 16);
11953 unsigned rn = INSTR (9, 5);
11954 unsigned rd = INSTR (4, 0);
11956 aarch64_set_reg_u64
11958 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
11959 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
11962 /* 32 bit logical shift left. */
11964 lslv32 (sim_cpu *cpu)
11966 unsigned rm = INSTR (20, 16);
11967 unsigned rn = INSTR (9, 5);
11968 unsigned rd = INSTR (4, 0);
11970 aarch64_set_reg_u64
11972 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
11973 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
11976 /* 64 bit arithmetic shift left. */
11978 lslv64 (sim_cpu *cpu)
11980 unsigned rm = INSTR (20, 16);
11981 unsigned rn = INSTR (9, 5);
11982 unsigned rd = INSTR (4, 0);
11984 aarch64_set_reg_u64
11986 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
11987 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
11990 /* 32 bit logical shift right. */
11992 lsrv32 (sim_cpu *cpu)
11994 unsigned rm = INSTR (20, 16);
11995 unsigned rn = INSTR (9, 5);
11996 unsigned rd = INSTR (4, 0);
11998 aarch64_set_reg_u64
12000 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12001 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12004 /* 64 bit logical shift right. */
12006 lsrv64 (sim_cpu *cpu)
12008 unsigned rm = INSTR (20, 16);
12009 unsigned rn = INSTR (9, 5);
12010 unsigned rd = INSTR (4, 0);
12012 aarch64_set_reg_u64
12014 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12015 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12018 /* 32 bit rotate right. */
12020 rorv32 (sim_cpu *cpu)
12022 unsigned rm = INSTR (20, 16);
12023 unsigned rn = INSTR (9, 5);
12024 unsigned rd = INSTR (4, 0);
12026 aarch64_set_reg_u64
12028 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12029 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12032 /* 64 bit rotate right. */
12034 rorv64 (sim_cpu *cpu)
12036 unsigned rm = INSTR (20, 16);
12037 unsigned rn = INSTR (9, 5);
12038 unsigned rd = INSTR (4, 0);
12040 aarch64_set_reg_u64
12042 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12043 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12049 /* 32 bit signed divide. */
12051 cpuiv32 (sim_cpu *cpu)
12053 unsigned rm = INSTR (20, 16);
12054 unsigned rn = INSTR (9, 5);
12055 unsigned rd = INSTR (4, 0);
12056 /* N.B. the pseudo-code does the divide using 64 bit data. */
12057 /* TODO : check that this rounds towards zero as required. */
12058 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12059 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12061 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12062 divisor ? ((int32_t) (dividend / divisor)) : 0);
12065 /* 64 bit signed divide. */
12067 cpuiv64 (sim_cpu *cpu)
12069 unsigned rm = INSTR (20, 16);
12070 unsigned rn = INSTR (9, 5);
12071 unsigned rd = INSTR (4, 0);
12073 /* TODO : check that this rounds towards zero as required. */
12074 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12076 aarch64_set_reg_s64
12078 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12081 /* 32 bit unsigned divide. */
12083 udiv32 (sim_cpu *cpu)
12085 unsigned rm = INSTR (20, 16);
12086 unsigned rn = INSTR (9, 5);
12087 unsigned rd = INSTR (4, 0);
12089 /* N.B. the pseudo-code does the divide using 64 bit data. */
12090 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12091 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12093 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12094 divisor ? (uint32_t) (dividend / divisor) : 0);
12097 /* 64 bit unsigned divide. */
12099 udiv64 (sim_cpu *cpu)
12101 unsigned rm = INSTR (20, 16);
12102 unsigned rn = INSTR (9, 5);
12103 unsigned rd = INSTR (4, 0);
12105 /* TODO : check that this rounds towards zero as required. */
12106 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12108 aarch64_set_reg_u64
12110 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12114 dexDataProc2Source (sim_cpu *cpu)
12116 /* assert instr[30] == 0
12117 instr[28,21] == 11010110
12118 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12119 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12120 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12121 001000 ==> LSLV, 001001 ==> LSRV
12122 001010 ==> ASRV, 001011 ==> RORV
12126 uint32_t S = INSTR (29, 29);
12127 uint32_t opcode = INSTR (15, 10);
12135 dispatch = ( (INSTR (31, 31) << 3)
12136 | (uimm (opcode, 3, 3) << 2)
12137 | uimm (opcode, 1, 0));
12140 case 2: udiv32 (cpu); return;
12141 case 3: cpuiv32 (cpu); return;
12142 case 4: lslv32 (cpu); return;
12143 case 5: lsrv32 (cpu); return;
12144 case 6: asrv32 (cpu); return;
12145 case 7: rorv32 (cpu); return;
12146 case 10: udiv64 (cpu); return;
12147 case 11: cpuiv64 (cpu); return;
12148 case 12: lslv64 (cpu); return;
12149 case 13: lsrv64 (cpu); return;
12150 case 14: asrv64 (cpu); return;
12151 case 15: rorv64 (cpu); return;
12152 default: HALT_UNALLOC;
12159 /* 32 bit multiply and add. */
12161 madd32 (sim_cpu *cpu)
12163 unsigned rm = INSTR (20, 16);
12164 unsigned ra = INSTR (14, 10);
12165 unsigned rn = INSTR (9, 5);
12166 unsigned rd = INSTR (4, 0);
12168 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12169 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12170 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12171 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12174 /* 64 bit multiply and add. */
12176 madd64 (sim_cpu *cpu)
12178 unsigned rm = INSTR (20, 16);
12179 unsigned ra = INSTR (14, 10);
12180 unsigned rn = INSTR (9, 5);
12181 unsigned rd = INSTR (4, 0);
12183 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12184 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12185 + aarch64_get_reg_u64 (cpu, rn, NO_SP)
12186 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12189 /* 32 bit multiply and sub. */
12191 msub32 (sim_cpu *cpu)
12193 unsigned rm = INSTR (20, 16);
12194 unsigned ra = INSTR (14, 10);
12195 unsigned rn = INSTR (9, 5);
12196 unsigned rd = INSTR (4, 0);
12198 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12199 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12200 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12201 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12204 /* 64 bit multiply and sub. */
12206 msub64 (sim_cpu *cpu)
12208 unsigned rm = INSTR (20, 16);
12209 unsigned ra = INSTR (14, 10);
12210 unsigned rn = INSTR (9, 5);
12211 unsigned rd = INSTR (4, 0);
12213 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12214 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12215 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12216 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12219 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12221 smaddl (sim_cpu *cpu)
12223 unsigned rm = INSTR (20, 16);
12224 unsigned ra = INSTR (14, 10);
12225 unsigned rn = INSTR (9, 5);
12226 unsigned rd = INSTR (4, 0);
12228 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12229 obtain a 64 bit product. */
12230 aarch64_set_reg_s64
12232 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12233 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12234 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12237 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12239 smsubl (sim_cpu *cpu)
12241 unsigned rm = INSTR (20, 16);
12242 unsigned ra = INSTR (14, 10);
12243 unsigned rn = INSTR (9, 5);
12244 unsigned rd = INSTR (4, 0);
12246 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12247 obtain a 64 bit product. */
12248 aarch64_set_reg_s64
12250 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12251 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12252 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12255 /* Integer Multiply/Divide. */
12257 /* First some macros and a helper function. */
12258 /* Macros to test or access elements of 64 bit words. */
12260 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12261 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12262 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12263 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12264 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12265 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12267 /* Offset of sign bit in 64 bit signed integger. */
12268 #define SIGN_SHIFT_U64 63
12269 /* The sign bit itself -- also identifies the minimum negative int value. */
12270 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12271 /* Return true if a 64 bit signed int presented as an unsigned int is the
12272 most negative value. */
12273 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12274 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12275 int has its sign bit set to false. */
12276 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12277 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12278 an unsigned int has its sign bit set or not. */
12279 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12280 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12281 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12283 /* Multiply two 64 bit ints and return.
12284 the hi 64 bits of the 128 bit product. */
12287 mul64hi (uint64_t value1, uint64_t value2)
12289 uint64_t resultmid1;
12291 uint64_t value1_lo = lowWordToU64 (value1);
12292 uint64_t value1_hi = highWordToU64 (value1) ;
12293 uint64_t value2_lo = lowWordToU64 (value2);
12294 uint64_t value2_hi = highWordToU64 (value2);
12296 /* Cross-multiply and collect results. */
12298 uint64_t xproductlo = value1_lo * value2_lo;
12299 uint64_t xproductmid1 = value1_lo * value2_hi;
12300 uint64_t xproductmid2 = value1_hi * value2_lo;
12301 uint64_t xproducthi = value1_hi * value2_hi;
12302 uint64_t carry = 0;
12303 /* Start accumulating 64 bit results. */
12304 /* Drop bottom half of lowest cross-product. */
12305 uint64_t resultmid = xproductlo >> 32;
12306 /* Add in middle products. */
12307 resultmid = resultmid + xproductmid1;
12309 /* Check for overflow. */
12310 if (resultmid < xproductmid1)
12311 /* Carry over 1 into top cross-product. */
12314 resultmid1 = resultmid + xproductmid2;
12316 /* Check for overflow. */
12317 if (resultmid1 < xproductmid2)
12318 /* Carry over 1 into top cross-product. */
12321 /* Drop lowest 32 bits of middle cross-product. */
12322 result = resultmid1 >> 32;
12324 /* Add top cross-product plus and any carry. */
12325 result += xproducthi + carry;
12330 /* Signed multiply high, source, source2 :
12331 64 bit, dest <-- high 64-bit of result. */
12333 smulh (sim_cpu *cpu)
12337 unsigned rm = INSTR (20, 16);
12338 unsigned rn = INSTR (9, 5);
12339 unsigned rd = INSTR (4, 0);
12340 GReg ra = INSTR (14, 10);
12341 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12342 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12345 int64_t signum = 1;
12350 /* Convert to unsigned and use the unsigned mul64hi routine
12351 the fix the sign up afterwards. */
12372 uresult = mul64hi (uvalue1, uvalue2);
12376 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12379 /* Unsigned multiply add long -- source, source2 :
12380 32 bit, source3 : 64 bit. */
12382 umaddl (sim_cpu *cpu)
12384 unsigned rm = INSTR (20, 16);
12385 unsigned ra = INSTR (14, 10);
12386 unsigned rn = INSTR (9, 5);
12387 unsigned rd = INSTR (4, 0);
12389 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12390 obtain a 64 bit product. */
12391 aarch64_set_reg_u64
12393 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12394 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12395 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12398 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12400 umsubl (sim_cpu *cpu)
12402 unsigned rm = INSTR (20, 16);
12403 unsigned ra = INSTR (14, 10);
12404 unsigned rn = INSTR (9, 5);
12405 unsigned rd = INSTR (4, 0);
12407 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12408 obtain a 64 bit product. */
12409 aarch64_set_reg_u64
12411 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12412 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12413 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12416 /* Unsigned multiply high, source, source2 :
12417 64 bit, dest <-- high 64-bit of result. */
12419 umulh (sim_cpu *cpu)
12421 unsigned rm = INSTR (20, 16);
12422 unsigned rn = INSTR (9, 5);
12423 unsigned rd = INSTR (4, 0);
12424 GReg ra = INSTR (14, 10);
12429 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12430 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
12431 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12435 dexDataProc3Source (sim_cpu *cpu)
12437 /* assert instr[28,24] == 11011. */
12438 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
12439 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
12440 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
12441 instr[15] = o0 : 0/1 ==> ok
12442 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
12443 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
12444 0100 ==> SMULH, (64 bit only)
12445 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
12446 1100 ==> UMULH (64 bit only)
12450 uint32_t size = INSTR (31, 31);
12451 uint32_t op54 = INSTR (30, 29);
12452 uint32_t op31 = INSTR (23, 21);
12453 uint32_t o0 = INSTR (15, 15);
12470 dispatch = (op31 << 1) | o0;
12474 case 0: madd64 (cpu); return;
12475 case 1: msub64 (cpu); return;
12476 case 2: smaddl (cpu); return;
12477 case 3: smsubl (cpu); return;
12478 case 4: smulh (cpu); return;
12479 case 10: umaddl (cpu); return;
12480 case 11: umsubl (cpu); return;
12481 case 12: umulh (cpu); return;
12482 default: HALT_UNALLOC;
12487 dexDPReg (sim_cpu *cpu)
12489 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12490 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
12491 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
12492 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
12496 case DPREG_LOG_000:
12497 case DPREG_LOG_001:
12498 dexLogicalShiftedRegister (cpu); return;
12500 case DPREG_ADDSHF_010:
12501 dexAddSubtractShiftedRegister (cpu); return;
12503 case DPREG_ADDEXT_011:
12504 dexAddSubtractExtendedRegister (cpu); return;
12506 case DPREG_ADDCOND_100:
12508 /* This set bundles a variety of different operations. */
12510 /* 1) add/sub w carry. */
12511 uint32_t mask1 = 0x1FE00000U;
12512 uint32_t val1 = 0x1A000000U;
12513 /* 2) cond compare register/immediate. */
12514 uint32_t mask2 = 0x1FE00000U;
12515 uint32_t val2 = 0x1A400000U;
12516 /* 3) cond select. */
12517 uint32_t mask3 = 0x1FE00000U;
12518 uint32_t val3 = 0x1A800000U;
12519 /* 4) data proc 1/2 source. */
12520 uint32_t mask4 = 0x1FE00000U;
12521 uint32_t val4 = 0x1AC00000U;
12523 if ((aarch64_get_instr (cpu) & mask1) == val1)
12524 dexAddSubtractWithCarry (cpu);
12526 else if ((aarch64_get_instr (cpu) & mask2) == val2)
12529 else if ((aarch64_get_instr (cpu) & mask3) == val3)
12530 dexCondSelect (cpu);
12532 else if ((aarch64_get_instr (cpu) & mask4) == val4)
12534 /* Bit 30 is clear for data proc 2 source
12535 and set for data proc 1 source. */
12536 if (aarch64_get_instr (cpu) & (1U << 30))
12537 dexDataProc1Source (cpu);
12539 dexDataProc2Source (cpu);
12543 /* Should not reach here. */
12549 case DPREG_3SRC_110:
12550 dexDataProc3Source (cpu); return;
12552 case DPREG_UNALLOC_101:
12555 case DPREG_3SRC_111:
12556 dexDataProc3Source (cpu); return;
12559 /* Should never reach here. */
12564 /* Unconditional Branch immediate.
12565 Offset is a PC-relative byte offset in the range +/- 128MiB.
12566 The offset is assumed to be raw from the decode i.e. the
12567 simulator is expected to scale them from word offsets to byte. */
12569 /* Unconditional branch. */
12571 buc (sim_cpu *cpu, int32_t offset)
12573 aarch64_set_next_PC_by_offset (cpu, offset);
12576 static unsigned stack_depth = 0;
12578 /* Unconditional branch and link -- writes return PC to LR. */
12580 bl (sim_cpu *cpu, int32_t offset)
12582 aarch64_save_LR (cpu);
12583 aarch64_set_next_PC_by_offset (cpu, offset);
12585 if (TRACE_BRANCH_P (cpu))
12589 " %*scall %" PRIx64 " [%s]"
12590 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12591 stack_depth, " ", aarch64_get_next_PC (cpu),
12592 aarch64_get_func (aarch64_get_next_PC (cpu)),
12593 aarch64_get_reg_u64 (cpu, 0, NO_SP),
12594 aarch64_get_reg_u64 (cpu, 1, NO_SP),
12595 aarch64_get_reg_u64 (cpu, 2, NO_SP)
12600 /* Unconditional Branch register.
12601 Branch/return address is in source register. */
12603 /* Unconditional branch. */
12607 unsigned rn = INSTR (9, 5);
12608 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12611 /* Unconditional branch and link -- writes return PC to LR. */
12615 unsigned rn = INSTR (9, 5);
12617 /* The pseudo code in the spec says we update LR before fetching.
12618 the value from the rn. */
12619 aarch64_save_LR (cpu);
12620 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12622 if (TRACE_BRANCH_P (cpu))
12626 " %*scall %" PRIx64 " [%s]"
12627 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12628 stack_depth, " ", aarch64_get_next_PC (cpu),
12629 aarch64_get_func (aarch64_get_next_PC (cpu)),
12630 aarch64_get_reg_u64 (cpu, 0, NO_SP),
12631 aarch64_get_reg_u64 (cpu, 1, NO_SP),
12632 aarch64_get_reg_u64 (cpu, 2, NO_SP)
12637 /* Return -- assembler will default source to LR this is functionally
12638 equivalent to br but, presumably, unlike br it side effects the
12639 branch predictor. */
12643 unsigned rn = INSTR (9, 5);
12644 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12646 if (TRACE_BRANCH_P (cpu))
12649 " %*sreturn [result: %" PRIx64 "]",
12650 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
12655 /* NOP -- we implement this and call it from the decode in case we
12656 want to intercept it later. */
12663 /* Data synchronization barrier. */
12670 /* Data memory barrier. */
12677 /* Instruction synchronization barrier. */
12685 dexBranchImmediate (sim_cpu *cpu)
12687 /* assert instr[30,26] == 00101
12688 instr[31] ==> 0 == B, 1 == BL
12689 instr[25,0] == imm26 branch offset counted in words. */
12691 uint32_t top = INSTR (31, 31);
12692 /* We have a 26 byte signed word offset which we need to pass to the
12693 execute routine as a signed byte offset. */
12694 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
12702 /* Control Flow. */
12704 /* Conditional branch
12706 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
12707 a bit position in the range 0 .. 63
12709 cc is a CondCode enum value as pulled out of the decode
12711 N.B. any offset register (source) can only be Xn or Wn. */
12714 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
12716 /* the test returns TRUE if CC is met. */
12717 if (testConditionCode (cpu, cc))
12718 aarch64_set_next_PC_by_offset (cpu, offset);
12721 /* 32 bit branch on register non-zero. */
12723 cbnz32 (sim_cpu *cpu, int32_t offset)
12725 unsigned rt = INSTR (4, 0);
12727 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
12728 aarch64_set_next_PC_by_offset (cpu, offset);
12731 /* 64 bit branch on register zero. */
12733 cbnz (sim_cpu *cpu, int32_t offset)
12735 unsigned rt = INSTR (4, 0);
12737 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
12738 aarch64_set_next_PC_by_offset (cpu, offset);
12741 /* 32 bit branch on register non-zero. */
12743 cbz32 (sim_cpu *cpu, int32_t offset)
12745 unsigned rt = INSTR (4, 0);
12747 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
12748 aarch64_set_next_PC_by_offset (cpu, offset);
12751 /* 64 bit branch on register zero. */
12753 cbz (sim_cpu *cpu, int32_t offset)
12755 unsigned rt = INSTR (4, 0);
12757 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
12758 aarch64_set_next_PC_by_offset (cpu, offset);
12761 /* Branch on register bit test non-zero -- one size fits all. */
12763 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
12765 unsigned rt = INSTR (4, 0);
12767 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
12768 aarch64_set_next_PC_by_offset (cpu, offset);
12771 /* branch on register bit test zero -- one size fits all. */
12773 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
12775 unsigned rt = INSTR (4, 0);
12777 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
12778 aarch64_set_next_PC_by_offset (cpu, offset);
12782 dexCompareBranchImmediate (sim_cpu *cpu)
12784 /* instr[30,25] = 01 1010
12785 instr[31] = size : 0 ==> 32, 1 ==> 64
12786 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
12787 instr[23,5] = simm19 branch offset counted in words
12790 uint32_t size = INSTR (31, 31);
12791 uint32_t op = INSTR (24, 24);
12792 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
12797 cbz32 (cpu, offset);
12799 cbnz32 (cpu, offset);
12806 cbnz (cpu, offset);
12811 dexTestBranchImmediate (sim_cpu *cpu)
12813 /* instr[31] = b5 : bit 5 of test bit idx
12814 instr[30,25] = 01 1011
12815 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
12816 instr[23,19] = b40 : bits 4 to 0 of test bit idx
12817 instr[18,5] = simm14 : signed offset counted in words
12818 instr[4,0] = uimm5 */
12820 uint32_t pos = ((INSTR (31, 31) << 4)
12822 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
12824 NYI_assert (30, 25, 0x1b);
12826 if (INSTR (24, 24) == 0)
12827 tbz (cpu, pos, offset);
12829 tbnz (cpu, pos, offset);
12833 dexCondBranchImmediate (sim_cpu *cpu)
12835 /* instr[31,25] = 010 1010
12836 instr[24] = op1; op => 00 ==> B.cond
12837 instr[23,5] = simm19 : signed offset counted in words
12839 instr[3,0] = cond */
12842 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
12844 NYI_assert (31, 25, 0x2a);
12849 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
12851 bcc (cpu, offset, INSTR (3, 0));
12855 dexBranchRegister (sim_cpu *cpu)
12857 /* instr[31,25] = 110 1011
12858 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
12859 instr[20,16] = op2 : must be 11111
12860 instr[15,10] = op3 : must be 000000
12861 instr[4,0] = op2 : must be 11111. */
12863 uint32_t op = INSTR (24, 21);
12864 uint32_t op2 = INSTR (20, 16);
12865 uint32_t op3 = INSTR (15, 10);
12866 uint32_t op4 = INSTR (4, 0);
12868 NYI_assert (31, 25, 0x6b);
12870 if (op2 != 0x1F || op3 != 0 || op4 != 0)
12884 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
12885 /* anything else is unallocated. */
12886 uint32_t rn = INSTR (4, 0);
12891 if (op == 4 || op == 5)
12898 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
12899 but this may not be available. So instead we define the values we need
12901 #define AngelSVC_Reason_Open 0x01
12902 #define AngelSVC_Reason_Close 0x02
12903 #define AngelSVC_Reason_Write 0x05
12904 #define AngelSVC_Reason_Read 0x06
12905 #define AngelSVC_Reason_IsTTY 0x09
12906 #define AngelSVC_Reason_Seek 0x0A
12907 #define AngelSVC_Reason_FLen 0x0C
12908 #define AngelSVC_Reason_Remove 0x0E
12909 #define AngelSVC_Reason_Rename 0x0F
12910 #define AngelSVC_Reason_Clock 0x10
12911 #define AngelSVC_Reason_Time 0x11
12912 #define AngelSVC_Reason_System 0x12
12913 #define AngelSVC_Reason_Errno 0x13
12914 #define AngelSVC_Reason_GetCmdLine 0x15
12915 #define AngelSVC_Reason_HeapInfo 0x16
12916 #define AngelSVC_Reason_ReportException 0x18
12917 #define AngelSVC_Reason_Elapsed 0x30
12921 handle_halt (sim_cpu *cpu, uint32_t val)
12923 uint64_t result = 0;
12927 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
12928 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
12929 sim_stopped, SIM_SIGTRAP);
12932 /* We have encountered an Angel SVC call. See if we can process it. */
12933 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
12935 case AngelSVC_Reason_HeapInfo:
12937 /* Get the values. */
12938 uint64_t stack_top = aarch64_get_stack_start (cpu);
12939 uint64_t heap_base = aarch64_get_heap_start (cpu);
12941 /* Get the pointer */
12942 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
12943 ptr = aarch64_get_mem_u64 (cpu, ptr);
12945 /* Fill in the memory block. */
12946 /* Start addr of heap. */
12947 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
12948 /* End addr of heap. */
12949 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
12950 /* Lowest stack addr. */
12951 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
12952 /* Initial stack addr. */
12953 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
12955 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
12959 case AngelSVC_Reason_Open:
12961 /* Get the pointer */
12962 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
12963 /* FIXME: For now we just assume that we will only be asked
12964 to open the standard file descriptors. */
12968 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
12972 case AngelSVC_Reason_Close:
12974 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
12975 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
12980 case AngelSVC_Reason_Errno:
12982 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
12985 case AngelSVC_Reason_Clock:
12987 #ifdef CLOCKS_PER_SEC
12988 (CLOCKS_PER_SEC >= 100)
12989 ? (clock () / (CLOCKS_PER_SEC / 100))
12990 : ((clock () * 100) / CLOCKS_PER_SEC)
12992 /* Presume unix... clock() returns microseconds. */
12996 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
12999 case AngelSVC_Reason_GetCmdLine:
13001 /* Get the pointer */
13002 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13003 ptr = aarch64_get_mem_u64 (cpu, ptr);
13005 /* FIXME: No command line for now. */
13006 aarch64_set_mem_u64 (cpu, ptr, 0);
13007 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13011 case AngelSVC_Reason_IsTTY:
13013 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13016 case AngelSVC_Reason_Write:
13018 /* Get the pointer */
13019 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13020 /* Get the write control block. */
13021 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13022 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13023 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13025 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13026 PRIx64 " on descriptor %" PRIx64,
13031 TRACE_SYSCALL (cpu,
13032 " AngelSVC: Write: Suspiciously long write: %ld",
13034 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13035 sim_stopped, SIM_SIGBUS);
13039 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13043 TRACE (cpu, 0, "\n");
13044 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13045 (int) len, aarch64_get_mem_ptr (cpu, buf));
13046 TRACE (cpu, 0, "\n");
13050 TRACE_SYSCALL (cpu,
13051 " AngelSVC: Write: Unexpected file handle: %d",
13053 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13054 sim_stopped, SIM_SIGABRT);
13059 case AngelSVC_Reason_ReportException:
13061 /* Get the pointer */
13062 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13063 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13064 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13065 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13067 TRACE_SYSCALL (cpu,
13068 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13071 if (type == 0x20026)
13072 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13073 sim_exited, state);
13075 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13076 sim_stopped, SIM_SIGINT);
13080 case AngelSVC_Reason_Read:
13081 case AngelSVC_Reason_FLen:
13082 case AngelSVC_Reason_Seek:
13083 case AngelSVC_Reason_Remove:
13084 case AngelSVC_Reason_Time:
13085 case AngelSVC_Reason_System:
13086 case AngelSVC_Reason_Rename:
13087 case AngelSVC_Reason_Elapsed:
13089 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13090 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13091 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13092 sim_stopped, SIM_SIGTRAP);
13095 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13099 dexExcpnGen (sim_cpu *cpu)
13101 /* instr[31:24] = 11010100
13102 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13103 010 ==> HLT, 101 ==> DBG GEN EXCPN
13104 instr[20,5] = imm16
13105 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13106 instr[1,0] = LL : discriminates opc */
13108 uint32_t opc = INSTR (23, 21);
13109 uint32_t imm16 = INSTR (20, 5);
13110 uint32_t opc2 = INSTR (4, 2);
13113 NYI_assert (31, 24, 0xd4);
13120 /* We only implement HLT and BRK for now. */
13121 if (opc == 1 && LL == 0)
13123 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13124 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13125 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13128 if (opc == 2 && LL == 0)
13129 handle_halt (cpu, imm16);
13131 else if (opc == 0 || opc == 5)
13138 /* Stub for accessing system registers. */
13141 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13142 unsigned crm, unsigned op2)
13144 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13145 /* DCZID_EL0 - the Data Cache Zero ID register.
13146 We do not support DC ZVA at the moment, so
13147 we return a value with the disable bit set.
13148 We implement support for the DCZID register since
13149 it is used by the C library's memset function. */
13150 return ((uint64_t) 1) << 4;
13152 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13153 /* Cache Type Register. */
13154 return 0x80008000UL;
13156 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13157 /* TPIDR_EL0 - thread pointer id. */
13158 return aarch64_get_thread_id (cpu);
13160 if (op1 == 3 && crm == 4 && op2 == 0)
13161 return aarch64_get_FPCR (cpu);
13163 if (op1 == 3 && crm == 4 && op2 == 1)
13164 return aarch64_get_FPSR (cpu);
13166 else if (op1 == 3 && crm == 2 && op2 == 0)
13167 return aarch64_get_CPSR (cpu);
13173 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13174 unsigned crm, unsigned op2, uint64_t val)
13176 if (op1 == 3 && crm == 4 && op2 == 0)
13177 aarch64_set_FPCR (cpu, val);
13179 else if (op1 == 3 && crm == 4 && op2 == 1)
13180 aarch64_set_FPSR (cpu, val);
13182 else if (op1 == 3 && crm == 2 && op2 == 0)
13183 aarch64_set_CPSR (cpu, val);
13190 do_mrs (sim_cpu *cpu)
13192 /* instr[31:20] = 1101 0101 0001 1
13199 unsigned sys_op0 = INSTR (19, 19) + 2;
13200 unsigned sys_op1 = INSTR (18, 16);
13201 unsigned sys_crn = INSTR (15, 12);
13202 unsigned sys_crm = INSTR (11, 8);
13203 unsigned sys_op2 = INSTR (7, 5);
13204 unsigned rt = INSTR (4, 0);
13206 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13207 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13211 do_MSR_immediate (sim_cpu *cpu)
13213 /* instr[31:19] = 1101 0101 0000 0
13215 instr[15,12] = 0100
13218 instr[4,0] = 1 1111 */
13220 unsigned op1 = INSTR (18, 16);
13221 /*unsigned crm = INSTR (11, 8);*/
13222 unsigned op2 = INSTR (7, 5);
13224 NYI_assert (31, 19, 0x1AA0);
13225 NYI_assert (15, 12, 0x4);
13226 NYI_assert (4, 0, 0x1F);
13231 HALT_NYI; /* set SPSel. */
13238 HALT_NYI; /* set DAIFset. */
13240 HALT_NYI; /* set DAIFclr. */
13249 do_MSR_reg (sim_cpu *cpu)
13251 /* instr[31:20] = 1101 0101 0001
13259 unsigned sys_op0 = INSTR (19, 19) + 2;
13260 unsigned sys_op1 = INSTR (18, 16);
13261 unsigned sys_crn = INSTR (15, 12);
13262 unsigned sys_crm = INSTR (11, 8);
13263 unsigned sys_op2 = INSTR (7, 5);
13264 unsigned rt = INSTR (4, 0);
13266 NYI_assert (31, 20, 0xD51);
13268 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13269 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13273 do_SYS (sim_cpu *cpu)
13275 /* instr[31,19] = 1101 0101 0000 1
13281 NYI_assert (31, 19, 0x1AA1);
13283 /* FIXME: For now we just silently accept system ops. */
13287 dexSystem (sim_cpu *cpu)
13289 /* instr[31:22] = 1101 01010 0
13296 instr[4,0] = uimm5 */
13298 /* We are interested in HINT, DSB, DMB and ISB
13300 Hint #0 encodes NOOP (this is the only hint we care about)
13301 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13302 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13304 DSB, DMB, ISB are data store barrier, data memory barrier and
13305 instruction store barrier, respectively, where
13307 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13308 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13309 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13310 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13311 10 ==> InerShareable, 11 ==> FullSystem
13312 types : 01 ==> Reads, 10 ==> Writes,
13313 11 ==> All, 00 ==> All (domain == FullSystem). */
13315 unsigned rt = INSTR (4, 0);
13317 NYI_assert (31, 22, 0x354);
13319 switch (INSTR (21, 12))
13324 /* NOP has CRm != 0000 OR. */
13325 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13326 uint32_t crm = INSTR (11, 8);
13327 uint32_t op2 = INSTR (7, 5);
13329 if (crm != 0 || (op2 == 0 || op2 > 5))
13331 /* Actually call nop method so we can reimplement it later. */
13340 uint32_t op2 = INSTR (7, 5);
13345 case 4: dsb (cpu); return;
13346 case 5: dmb (cpu); return;
13347 case 6: isb (cpu); return;
13348 default: HALT_UNALLOC;
13359 do_SYS (cpu); /* DC is an alias of SYS. */
13363 if (INSTR (21, 20) == 0x1)
13365 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13366 do_MSR_immediate (cpu);
13374 dexBr (sim_cpu *cpu)
13376 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13377 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13378 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13379 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13384 return dexBranchImmediate (cpu);
13386 case BR_IMMCMP_001:
13387 /* Compare has bit 25 clear while test has it set. */
13388 if (!INSTR (25, 25))
13389 dexCompareBranchImmediate (cpu);
13391 dexTestBranchImmediate (cpu);
13394 case BR_IMMCOND_010:
13395 /* This is a conditional branch if bit 25 is clear otherwise
13397 if (!INSTR (25, 25))
13398 dexCondBranchImmediate (cpu);
13403 case BR_UNALLOC_011:
13407 dexBranchImmediate (cpu);
13410 case BR_IMMCMP_101:
13411 /* Compare has bit 25 clear while test has it set. */
13412 if (!INSTR (25, 25))
13413 dexCompareBranchImmediate (cpu);
13415 dexTestBranchImmediate (cpu);
13419 /* Unconditional branch reg has bit 25 set. */
13420 if (INSTR (25, 25))
13421 dexBranchRegister (cpu);
13423 /* This includes both Excpn Gen, System and unalloc operations.
13424 We need to decode the Excpn Gen operation BRK so we can plant
13425 debugger entry points.
13426 Excpn Gen operations have instr [24] = 0.
13427 we need to decode at least one of the System operations NOP
13428 which is an alias for HINT #0.
13429 System operations have instr [24,22] = 100. */
13430 else if (INSTR (24, 24) == 0)
13433 else if (INSTR (24, 22) == 4)
13441 case BR_UNALLOC_111:
13445 /* Should never reach here. */
13451 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
13453 /* We need to check if gdb wants an in here. */
13454 /* checkBreak (cpu);. */
13456 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
13460 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
13461 case GROUP_LDST_0100: dexLdSt (cpu); break;
13462 case GROUP_DPREG_0101: dexDPReg (cpu); break;
13463 case GROUP_LDST_0110: dexLdSt (cpu); break;
13464 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
13465 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
13466 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
13467 case GROUP_BREXSYS_1010: dexBr (cpu); break;
13468 case GROUP_BREXSYS_1011: dexBr (cpu); break;
13469 case GROUP_LDST_1100: dexLdSt (cpu); break;
13470 case GROUP_DPREG_1101: dexDPReg (cpu); break;
13471 case GROUP_LDST_1110: dexLdSt (cpu); break;
13472 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
13474 case GROUP_UNALLOC_0001:
13475 case GROUP_UNALLOC_0010:
13476 case GROUP_UNALLOC_0011:
13480 /* Should never reach here. */
13486 aarch64_step (sim_cpu *cpu)
13488 uint64_t pc = aarch64_get_PC (cpu);
13490 if (pc == TOP_LEVEL_RETURN_PC)
13493 aarch64_set_next_PC (cpu, pc + 4);
13494 aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc);
13496 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
13497 aarch64_get_instr (cpu));
13498 TRACE_DISASM (cpu, pc);
13500 aarch64_decode_and_execute (cpu, pc);
13506 aarch64_run (SIM_DESC sd)
13508 sim_cpu *cpu = STATE_CPU (sd, 0);
13510 while (aarch64_step (cpu))
13511 aarch64_update_PC (cpu);
13513 sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
13514 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13518 aarch64_init (sim_cpu *cpu, uint64_t pc)
13520 uint64_t sp = aarch64_get_stack_start (cpu);
13522 /* Install SP, FP and PC and set LR to -20
13523 so we can detect a top-level return. */
13524 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
13525 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
13526 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
13527 aarch64_set_next_PC (cpu, pc);
13528 aarch64_update_PC (cpu);
13529 aarch64_init_LIT_table ();