1 /* simulator.c -- Interface for the AArch64 simulator.
3 Copyright (C) 2015-2019 Free Software Foundation, Inc.
5 Contributed by Red Hat.
7 This file is part of GDB.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include <sys/types.h>
31 #include "simulator.h"
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
45 #define HALT_UNALLOC \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
74 #define NYI_assert(HI, LO, EXPECTED) \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
82 /* Helper functions used by expandLogicalImmediate. */
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
95 return pickbits64 (val, N, N);
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
110 mask = 0xffffffffffffffffull;
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
136 /* Rotate to the left by simd_size - R. */
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
140 /* Replicate the value according to SIMD size. */
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
161 aarch64_init_LIT_table (void)
165 for (index = 0; index < LI_TABLE_SIZE; index++)
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
171 LITable [index] = expand_logical_immediate (imms, immr, N);
176 dexNotify (sim_cpu *cpu)
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
195 /* aarch64_notifyMethodExit (); */
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
204 /* secondary decode within top level groups */
207 dexPseudo (sim_cpu *cpu)
209 /* assert instr[28,27] = 00
211 We provide 2 pseudo instructions:
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
235 dispatch = INSTR (31, 15);
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
245 else if (dispatch == PSEUDO_NOTIFY)
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
261 ldur32 (sim_cpu *cpu, int32_t offset)
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
274 ldur64 (sim_cpu *cpu, int32_t offset)
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
287 ldurb32 (sim_cpu *cpu, int32_t offset)
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
300 ldursb32 (sim_cpu *cpu, int32_t offset)
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
313 ldursb64 (sim_cpu *cpu, int32_t offset)
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
326 ldurh32 (sim_cpu *cpu, int32_t offset)
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
339 ldursh32 (sim_cpu *cpu, int32_t offset)
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
352 ldursh64 (sim_cpu *cpu, int32_t offset)
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
365 ldursw (sim_cpu *cpu, int32_t offset)
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
381 stur32 (sim_cpu *cpu, int32_t offset)
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
392 /* 64 bit store 64 bit unscaled signed 9 bit */
394 stur64 (sim_cpu *cpu, int32_t offset)
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
405 /* 32 bit store byte unscaled signed 9 bit */
407 sturb (sim_cpu *cpu, int32_t offset)
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
418 /* 32 bit store short unscaled signed 9 bit */
420 sturh (sim_cpu *cpu, int32_t offset)
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
435 /* 32 bit pc-relative load */
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
439 unsigned rd = INSTR (4, 0);
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
447 /* 64 bit pc-relative load */
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
451 unsigned rd = INSTR (4, 0);
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
459 /* sign extended 32 bit pc-relative load */
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
463 unsigned rd = INSTR (4, 0);
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
471 /* float pc-relative load */
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
475 unsigned int rd = INSTR (4, 0);
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
483 /* double pc-relative load */
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
487 unsigned int st = INSTR (4, 0);
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
495 /* long double pc-relative load. */
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
547 /* Scalar Floating Point
549 FP load/store single register (4 addressing modes)
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
574 /* Load 8 bit with unsigned 12 bit offset. */
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
586 /* Load 16 bit scaled unsigned 12 bit. */
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
598 /* Load 32 bit scaled unsigned 12 bit. */
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
610 /* Load 64 bit scaled unsigned 12 bit. */
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
622 /* Load 128 bit scaled unsigned 12 bit. */
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
681 fldrd_wb (cpu, displacement, NoWriteBack);
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
715 fldrq_wb (cpu, displacement, NoWriteBack);
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
740 A separate method is provided for each possible addressing mode. */
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
764 if (rn == rt && wb != NoWriteBack)
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
823 if (rn == rt && wb != NoWriteBack)
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
883 if (rn == rt && wb != NoWriteBack)
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
931 if (rn == rt && wb != NoWriteBack)
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
954 ldrsb_wb (cpu, offset, NoWriteBack);
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
1000 if (rn == rt && wb != NoWriteBack)
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1061 if (rn == rt && wb != NoWriteBack)
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1125 if (rn == rt && wb != NoWriteBack)
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1153 /* rn may reference SP, rm and rt must reference ZR */
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1189 if (rn == rt && wb != NoWriteBack)
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1229 /* 32 bit store scaled unsigned 12 bit. */
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1251 if (rn == rt && wb != NoWriteBack)
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1286 /* 64 bit store scaled unsigned 12 bit. */
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1308 if (rn == rt && wb != NoWriteBack)
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1369 if (rn == rt && wb != NoWriteBack)
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1407 /* 32 bit store short scaled unsigned 12 bit. */
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1429 if (rn == rt && wb != NoWriteBack)
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1466 /* Prefetch unsigned 12 bit. */
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1481 /* TODO : implement prefetch of address. */
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1503 /* TODO : implement prefetch of address */
1506 /* 64 bit pc-relative prefetch. */
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1520 /* TODO : implement this */
1523 /* Load-store exclusive. */
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1576 dexLoadLiteral (sim_cpu *cpu)
1578 /* instr[29,27] == 011
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1614 /* 32 bit add immediate. */
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1626 /* 64 bit add immediate. */
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1650 if (result & (1 << 31))
1653 if (uresult != (uint32_t)result)
1656 if (sresult != result)
1659 aarch64_set_CPSR (cpu, flags);
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1668 uint64_t result = value1 + value2;
1670 uint64_t signbit = 1ULL << 63;
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1687 aarch64_set_CPSR (cpu, flags);
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1693 uint32_t result = value1 - value2;
1695 uint32_t signbit = 1U << 31;
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1712 aarch64_set_CPSR (cpu, flags);
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1718 uint64_t result = value1 - value2;
1720 uint64_t signbit = 1ULL << 63;
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1737 aarch64_set_CPSR (cpu, flags);
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1750 if (result & (1 << 31))
1755 aarch64_set_CPSR (cpu, flags);
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1768 if (result & (1ULL << 63))
1773 aarch64_set_CPSR (cpu, flags);
1776 /* 32 bit add immediate set flags. */
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1790 /* 64 bit add immediate set flags. */
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1804 /* 32 bit sub immediate. */
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1816 /* 64 bit sub immediate. */
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1828 /* 32 bit sub immediate set flags. */
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1842 /* 64 bit sub immediate set flags. */
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1856 /* Data Processing Register. */
1858 /* First two helpers to perform the shift operations. */
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1867 return (value << count);
1869 return (value >> count);
1872 int32_t svalue = value;
1873 return (svalue >> count);
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1891 return (value << count);
1893 return (value >> count);
1896 int64_t svalue = value;
1897 return (svalue >> count);
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1912 N.B register args may not be SP. */
1914 /* 32 bit ADD shifted register. */
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1929 /* 64 bit ADD shifted register. */
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1944 /* 32 bit ADD shifted register setting flags. */
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1961 /* 64 bit ADD shifted register setting flags. */
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1978 /* 32 bit SUB shifted register. */
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1993 /* 64 bit SUB shifted register. */
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2008 /* 32 bit SUB shifted register setting flags. */
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2025 /* 64 bit SUB shifted register setting flags. */
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2090 /* 32 bit ADD extending register. */
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2119 /* 32 bit ADD extending register setting flags. */
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2152 /* 32 bit SUB extending register. */
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2181 /* 32 bit SUB extending register setting flags. */
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2231 NYI_assert (28, 24, 0x11);
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2319 /* Shift may not exceed 4. */
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2344 adc32 (sim_cpu *cpu)
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2357 /* 64 bit add with carry */
2359 adc64 (sim_cpu *cpu)
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2372 /* 32 bit add with carry setting flags. */
2374 adcs32 (sim_cpu *cpu)
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2389 /* 64 bit add with carry setting flags. */
2391 adcs64 (sim_cpu *cpu)
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2406 /* 32 bit sub with carry. */
2408 sbc32 (sim_cpu *cpu)
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2421 /* 64 bit sub with carry */
2423 sbc64 (sim_cpu *cpu)
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2436 /* 32 bit sub with carry setting flags */
2438 sbcs32 (sim_cpu *cpu)
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2454 /* 64 bit sub with carry setting flags */
2456 sbcs64 (sim_cpu *cpu)
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2484 uint32_t op2 = INSTR (15, 10);
2486 NYI_assert (28, 21, 0xD0);
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2513 For now we do it with a switch. */
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2549 instr[11] = compare reg (0) or const (1)
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2602 instr[15,10] = 000111
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2612 if (INSTR (20, 16) != vs)
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2623 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,16] = element size and index
2629 instr[15,10] = 00 0010 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635 unsigned imm5 = INSTR (20, 16);
2636 unsigned full = INSTR (30, 30);
2639 NYI_assert (29, 21, 0x070);
2640 NYI_assert (15, 10, 0x0B);
2642 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2647 index = (imm5 >> 1) & 0xF;
2649 else if (imm5 & 0x2)
2652 index = (imm5 >> 2) & 0x7;
2654 else if (full && (imm5 & 0x4))
2657 index = (imm5 >> 3) & 0x3;
2666 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2667 aarch64_get_vec_s8 (cpu, vs, index));
2669 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2670 aarch64_get_vec_s8 (cpu, vs, index));
2675 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2676 aarch64_get_vec_s16 (cpu, vs, index));
2678 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2679 aarch64_get_vec_s16 (cpu, vs, index));
2683 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2684 aarch64_get_vec_s32 (cpu, vs, index));
2693 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2696 instr[30] = word(0)/long(1)
2697 instr[29,21] = 00 1110 000
2698 instr[20,16] = element size and index
2699 instr[15,10] = 00 0011 11
2700 instr[9,5] = V source
2701 instr[4,0] = R dest */
2703 unsigned vs = INSTR (9, 5);
2704 unsigned rd = INSTR (4, 0);
2705 unsigned imm5 = INSTR (20, 16);
2706 unsigned full = INSTR (30, 30);
2709 NYI_assert (29, 21, 0x070);
2710 NYI_assert (15, 10, 0x0F);
2712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2719 index = (imm5 >> 1) & 0xF;
2721 else if (imm5 & 0x2)
2724 index = (imm5 >> 2) & 0x7;
2726 else if (imm5 & 0x4)
2729 index = (imm5 >> 3) & 0x3;
2734 else if (imm5 & 0x8)
2737 index = (imm5 >> 4) & 0x1;
2745 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2746 aarch64_get_vec_u8 (cpu, vs, index));
2750 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2751 aarch64_get_vec_u16 (cpu, vs, index));
2755 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2756 aarch64_get_vec_u32 (cpu, vs, index));
2760 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2761 aarch64_get_vec_u64 (cpu, vs, index));
2770 do_vec_INS (sim_cpu *cpu)
2772 /* instr[31,21] = 01001110000
2773 instr[20,16] = element size and index
2774 instr[15,10] = 000111
2775 instr[9,5] = W source
2776 instr[4,0] = V dest */
2779 unsigned rs = INSTR (9, 5);
2780 unsigned vd = INSTR (4, 0);
2782 NYI_assert (31, 21, 0x270);
2783 NYI_assert (15, 10, 0x07);
2785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 index = INSTR (20, 17);
2789 aarch64_set_vec_u8 (cpu, vd, index,
2790 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2792 else if (INSTR (17, 17))
2794 index = INSTR (20, 18);
2795 aarch64_set_vec_u16 (cpu, vd, index,
2796 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2798 else if (INSTR (18, 18))
2800 index = INSTR (20, 19);
2801 aarch64_set_vec_u32 (cpu, vd, index,
2802 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2804 else if (INSTR (19, 19))
2806 index = INSTR (20, 20);
2807 aarch64_set_vec_u64 (cpu, vd, index,
2808 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2815 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2818 instr[30] = half(0)/full(1)
2819 instr[29,21] = 00 1110 000
2820 instr[20,16] = element size and index
2821 instr[15,10] = 0000 01
2822 instr[9,5] = V source
2823 instr[4,0] = V dest. */
2825 unsigned full = INSTR (30, 30);
2826 unsigned vs = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2830 NYI_assert (29, 21, 0x070);
2831 NYI_assert (15, 10, 0x01);
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2836 index = INSTR (20, 17);
2838 for (i = 0; i < (full ? 16 : 8); i++)
2839 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2841 else if (INSTR (17, 17))
2843 index = INSTR (20, 18);
2845 for (i = 0; i < (full ? 8 : 4); i++)
2846 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2848 else if (INSTR (18, 18))
2850 index = INSTR (20, 19);
2852 for (i = 0; i < (full ? 4 : 2); i++)
2853 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2857 if (INSTR (19, 19) == 0)
2863 index = INSTR (20, 20);
2865 for (i = 0; i < 2; i++)
2866 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2871 do_vec_TBL (sim_cpu *cpu)
2874 instr[30] = half(0)/full(1)
2875 instr[29,21] = 00 1110 000
2878 instr[14,13] = vec length
2880 instr[9,5] = V start
2881 instr[4,0] = V dest */
2883 int full = INSTR (30, 30);
2884 int len = INSTR (14, 13) + 1;
2885 unsigned vm = INSTR (20, 16);
2886 unsigned vn = INSTR (9, 5);
2887 unsigned vd = INSTR (4, 0);
2890 NYI_assert (29, 21, 0x070);
2891 NYI_assert (12, 10, 0);
2893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2894 for (i = 0; i < (full ? 16 : 8); i++)
2896 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2900 val = aarch64_get_vec_u8 (cpu, vn, selector);
2901 else if (selector < 32)
2902 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2903 else if (selector < 48)
2904 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2905 else if (selector < 64)
2906 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2910 aarch64_set_vec_u8 (cpu, vd, i, val);
2915 do_vec_TRN (sim_cpu *cpu)
2918 instr[30] = half(0)/full(1)
2919 instr[29,24] = 00 1110
2924 instr[14] = TRN1 (0) / TRN2 (1)
2926 instr[9,5] = V source
2927 instr[4,0] = V dest. */
2929 int full = INSTR (30, 30);
2930 int second = INSTR (14, 14);
2931 unsigned vm = INSTR (20, 16);
2932 unsigned vn = INSTR (9, 5);
2933 unsigned vd = INSTR (4, 0);
2936 NYI_assert (29, 24, 0x0E);
2937 NYI_assert (13, 10, 0xA);
2939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2940 switch (INSTR (23, 22))
2943 for (i = 0; i < (full ? 8 : 4); i++)
2947 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2949 (cpu, vd, 1 * 2 + 1,
2950 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2955 for (i = 0; i < (full ? 4 : 2); i++)
2959 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2961 (cpu, vd, 1 * 2 + 1,
2962 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2968 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2970 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2972 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2974 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2981 aarch64_set_vec_u64 (cpu, vd, 0,
2982 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2983 aarch64_set_vec_u64 (cpu, vd, 1,
2984 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2990 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2993 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2994 [must be 1 for 64-bit xfer]
2995 instr[29,20] = 00 1110 0000
2996 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2997 0100=> 32-bits. 1000=>64-bits
2998 instr[15,10] = 0000 11
2999 instr[9,5] = W source
3000 instr[4,0] = V dest. */
3003 unsigned Vd = INSTR (4, 0);
3004 unsigned Rs = INSTR (9, 5);
3005 int both = INSTR (30, 30);
3007 NYI_assert (29, 20, 0x0E0);
3008 NYI_assert (15, 10, 0x03);
3010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3011 switch (INSTR (19, 16))
3014 for (i = 0; i < (both ? 16 : 8); i++)
3015 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3019 for (i = 0; i < (both ? 8 : 4); i++)
3020 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3024 for (i = 0; i < (both ? 4 : 2); i++)
3025 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3031 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3032 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3041 do_vec_UZP (sim_cpu *cpu)
3044 instr[30] = half(0)/full(1)
3045 instr[29,24] = 00 1110
3046 instr[23,22] = size: byte(00), half(01), word (10), long (11)
3050 instr[14] = lower (0) / upper (1)
3055 int full = INSTR (30, 30);
3056 int upper = INSTR (14, 14);
3058 unsigned vm = INSTR (20, 16);
3059 unsigned vn = INSTR (9, 5);
3060 unsigned vd = INSTR (4, 0);
3062 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3063 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3064 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3065 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3070 uint64_t input2 = full ? val_n2 : val_m1;
3072 NYI_assert (29, 24, 0x0E);
3073 NYI_assert (21, 21, 0);
3074 NYI_assert (15, 15, 0);
3075 NYI_assert (13, 10, 6);
3077 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3078 switch (INSTR (23, 22))
3081 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3082 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3083 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3084 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3086 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3087 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3088 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3089 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3093 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3094 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3095 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3096 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3098 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3099 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3100 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3101 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3106 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3107 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3109 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3110 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3114 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3115 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3117 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3118 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3123 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3124 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3128 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3129 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3137 val1 = upper ? val_n2 : val_n1;
3138 val2 = upper ? val_m2 : val_m1;
3142 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3144 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3148 do_vec_ZIP (sim_cpu *cpu)
3151 instr[30] = half(0)/full(1)
3152 instr[29,24] = 00 1110
3153 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3157 instr[14] = lower (0) / upper (1)
3162 int full = INSTR (30, 30);
3163 int upper = INSTR (14, 14);
3165 unsigned vm = INSTR (20, 16);
3166 unsigned vn = INSTR (9, 5);
3167 unsigned vd = INSTR (4, 0);
3169 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3170 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3171 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3172 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3177 uint64_t input1 = upper ? val_n1 : val_m1;
3178 uint64_t input2 = upper ? val_n2 : val_m2;
3180 NYI_assert (29, 24, 0x0E);
3181 NYI_assert (21, 21, 0);
3182 NYI_assert (15, 15, 0);
3183 NYI_assert (13, 10, 0xE);
3185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3186 switch (INSTR (23, 23))
3190 ((input1 << 0) & (0xFF << 0))
3191 | ((input2 << 8) & (0xFF << 8))
3192 | ((input1 << 8) & (0xFF << 16))
3193 | ((input2 << 16) & (0xFF << 24))
3194 | ((input1 << 16) & (0xFFULL << 32))
3195 | ((input2 << 24) & (0xFFULL << 40))
3196 | ((input1 << 24) & (0xFFULL << 48))
3197 | ((input2 << 32) & (0xFFULL << 56));
3200 ((input1 >> 32) & (0xFF << 0))
3201 | ((input2 >> 24) & (0xFF << 8))
3202 | ((input1 >> 24) & (0xFF << 16))
3203 | ((input2 >> 16) & (0xFF << 24))
3204 | ((input1 >> 16) & (0xFFULL << 32))
3205 | ((input2 >> 8) & (0xFFULL << 40))
3206 | ((input1 >> 8) & (0xFFULL << 48))
3207 | ((input2 >> 0) & (0xFFULL << 56));
3212 ((input1 << 0) & (0xFFFF << 0))
3213 | ((input2 << 16) & (0xFFFF << 16))
3214 | ((input1 << 16) & (0xFFFFULL << 32))
3215 | ((input2 << 32) & (0xFFFFULL << 48));
3218 ((input1 >> 32) & (0xFFFF << 0))
3219 | ((input2 >> 16) & (0xFFFF << 16))
3220 | ((input1 >> 16) & (0xFFFFULL << 32))
3221 | ((input2 >> 0) & (0xFFFFULL << 48));
3225 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3226 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3235 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3237 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3240 /* Floating point immediates are encoded in 8 bits.
3241 fpimm[7] = sign bit.
3242 fpimm[6:4] = signed exponent.
3243 fpimm[3:0] = fraction (assuming leading 1).
3244 i.e. F = s * 1.f * 2^(e - b). */
3247 fp_immediate_for_encoding_32 (uint32_t imm8)
3250 uint32_t s, e, f, i;
3252 s = (imm8 >> 7) & 0x1;
3253 e = (imm8 >> 4) & 0x7;
3256 /* The fp value is s * n/16 * 2r where n is 16+e. */
3257 u = (16.0 + f) / 16.0;
3259 /* N.B. exponent is signed. */
3264 for (i = 0; i <= epos; i++)
3271 for (i = 0; i < eneg; i++)
3282 fp_immediate_for_encoding_64 (uint32_t imm8)
3285 uint32_t s, e, f, i;
3287 s = (imm8 >> 7) & 0x1;
3288 e = (imm8 >> 4) & 0x7;
3291 /* The fp value is s * n/16 * 2r where n is 16+e. */
3292 u = (16.0 + f) / 16.0;
3294 /* N.B. exponent is signed. */
3299 for (i = 0; i <= epos; i++)
3306 for (i = 0; i < eneg; i++)
3317 do_vec_MOV_immediate (sim_cpu *cpu)
3320 instr[30] = full/half selector
3321 instr[29,19] = 00111100000
3322 instr[18,16] = high 3 bits of uimm8
3323 instr[15,12] = size & shift:
3325 0010 => 32-bit + LSL#8
3326 0100 => 32-bit + LSL#16
3327 0110 => 32-bit + LSL#24
3328 1010 => 16-bit + LSL#8
3330 1101 => 32-bit + MSL#16
3331 1100 => 32-bit + MSL#8
3335 instr[9,5] = low 5-bits of uimm8
3338 int full = INSTR (30, 30);
3339 unsigned vd = INSTR (4, 0);
3340 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3343 NYI_assert (29, 19, 0x1E0);
3344 NYI_assert (11, 10, 1);
3346 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3347 switch (INSTR (15, 12))
3349 case 0x0: /* 32-bit, no shift. */
3350 case 0x2: /* 32-bit, shift by 8. */
3351 case 0x4: /* 32-bit, shift by 16. */
3352 case 0x6: /* 32-bit, shift by 24. */
3353 val <<= (8 * INSTR (14, 13));
3354 for (i = 0; i < (full ? 4 : 2); i++)
3355 aarch64_set_vec_u32 (cpu, vd, i, val);
3358 case 0xa: /* 16-bit, shift by 8. */
3361 case 0x8: /* 16-bit, no shift. */
3362 for (i = 0; i < (full ? 8 : 4); i++)
3363 aarch64_set_vec_u16 (cpu, vd, i, val);
3366 case 0xd: /* 32-bit, mask shift by 16. */
3370 case 0xc: /* 32-bit, mask shift by 8. */
3373 for (i = 0; i < (full ? 4 : 2); i++)
3374 aarch64_set_vec_u32 (cpu, vd, i, val);
3377 case 0xe: /* 8-bit, no shift. */
3378 for (i = 0; i < (full ? 16 : 8); i++)
3379 aarch64_set_vec_u8 (cpu, vd, i, val);
3382 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3384 float u = fp_immediate_for_encoding_32 (val);
3385 for (i = 0; i < (full ? 4 : 2); i++)
3386 aarch64_set_vec_float (cpu, vd, i, u);
3396 do_vec_MVNI (sim_cpu *cpu)
3399 instr[30] = full/half selector
3400 instr[29,19] = 10111100000
3401 instr[18,16] = high 3 bits of uimm8
3402 instr[15,12] = selector
3404 instr[9,5] = low 5-bits of uimm8
3407 int full = INSTR (30, 30);
3408 unsigned vd = INSTR (4, 0);
3409 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3412 NYI_assert (29, 19, 0x5E0);
3413 NYI_assert (11, 10, 1);
3415 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3416 switch (INSTR (15, 12))
3418 case 0x0: /* 32-bit, no shift. */
3419 case 0x2: /* 32-bit, shift by 8. */
3420 case 0x4: /* 32-bit, shift by 16. */
3421 case 0x6: /* 32-bit, shift by 24. */
3422 val <<= (8 * INSTR (14, 13));
3424 for (i = 0; i < (full ? 4 : 2); i++)
3425 aarch64_set_vec_u32 (cpu, vd, i, val);
3428 case 0xa: /* 16-bit, 8 bit shift. */
3430 case 0x8: /* 16-bit, no shift. */
3432 for (i = 0; i < (full ? 8 : 4); i++)
3433 aarch64_set_vec_u16 (cpu, vd, i, val);
3436 case 0xd: /* 32-bit, mask shift by 16. */
3439 case 0xc: /* 32-bit, mask shift by 8. */
3443 for (i = 0; i < (full ? 4 : 2); i++)
3444 aarch64_set_vec_u32 (cpu, vd, i, val);
3447 case 0xE: /* MOVI Dn, #mask64 */
3451 for (i = 0; i < 8; i++)
3453 mask |= (0xFFUL << (i * 8));
3454 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3455 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3459 case 0xf: /* FMOV Vd.2D, #fpimm. */
3461 double u = fp_immediate_for_encoding_64 (val);
3466 aarch64_set_vec_double (cpu, vd, 0, u);
3467 aarch64_set_vec_double (cpu, vd, 1, u);
3476 #define ABS(A) ((A) < 0 ? - (A) : (A))
3479 do_vec_ABS (sim_cpu *cpu)
3482 instr[30] = half(0)/full(1)
3483 instr[29,24] = 00 1110
3484 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3485 instr[21,10] = 10 0000 1011 10
3489 unsigned vn = INSTR (9, 5);
3490 unsigned vd = INSTR (4, 0);
3491 unsigned full = INSTR (30, 30);
3494 NYI_assert (29, 24, 0x0E);
3495 NYI_assert (21, 10, 0x82E);
3497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3498 switch (INSTR (23, 22))
3501 for (i = 0; i < (full ? 16 : 8); i++)
3502 aarch64_set_vec_s8 (cpu, vd, i,
3503 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3507 for (i = 0; i < (full ? 8 : 4); i++)
3508 aarch64_set_vec_s16 (cpu, vd, i,
3509 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3513 for (i = 0; i < (full ? 4 : 2); i++)
3514 aarch64_set_vec_s32 (cpu, vd, i,
3515 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3521 for (i = 0; i < 2; i++)
3522 aarch64_set_vec_s64 (cpu, vd, i,
3523 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3529 do_vec_ADDV (sim_cpu *cpu)
3532 instr[30] = full/half selector
3533 instr[29,24] = 00 1110
3534 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3535 instr[21,10] = 11 0001 1011 10
3539 unsigned vm = INSTR (9, 5);
3540 unsigned rd = INSTR (4, 0);
3542 int full = INSTR (30, 30);
3544 NYI_assert (29, 24, 0x0E);
3545 NYI_assert (21, 10, 0xC6E);
3547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3548 switch (INSTR (23, 22))
3553 for (i = 0; i < (full ? 16 : 8); i++)
3554 val += aarch64_get_vec_u8 (cpu, vm, i);
3555 aarch64_set_vec_u64 (cpu, rd, 0, val);
3562 for (i = 0; i < (full ? 8 : 4); i++)
3563 val += aarch64_get_vec_u16 (cpu, vm, i);
3564 aarch64_set_vec_u64 (cpu, rd, 0, val);
3573 for (i = 0; i < 4; i++)
3574 val += aarch64_get_vec_u32 (cpu, vm, i);
3575 aarch64_set_vec_u64 (cpu, rd, 0, val);
3585 do_vec_ins_2 (sim_cpu *cpu)
3587 /* instr[31,21] = 01001110000
3588 instr[20,18] = size & element selector
3590 instr[13] = direction: to vec(0), from vec (1)
3596 unsigned vm = INSTR (9, 5);
3597 unsigned vd = INSTR (4, 0);
3599 NYI_assert (31, 21, 0x270);
3600 NYI_assert (17, 14, 0);
3601 NYI_assert (12, 10, 7);
3603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3604 if (INSTR (13, 13) == 1)
3606 if (INSTR (18, 18) == 1)
3609 elem = INSTR (20, 19);
3610 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3611 aarch64_get_vec_u32 (cpu, vm, elem));
3616 if (INSTR (19, 19) != 1)
3619 elem = INSTR (20, 20);
3620 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3621 aarch64_get_vec_u64 (cpu, vm, elem));
3626 if (INSTR (18, 18) == 1)
3629 elem = INSTR (20, 19);
3630 aarch64_set_vec_u32 (cpu, vd, elem,
3631 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3636 if (INSTR (19, 19) != 1)
3639 elem = INSTR (20, 20);
3640 aarch64_set_vec_u64 (cpu, vd, elem,
3641 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3646 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3649 DST_TYPE a[N], b[N]; \
3651 for (i = 0; i < (N); i++) \
3653 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3654 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3656 for (i = 0; i < (N); i++) \
3657 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3662 do_vec_mull (sim_cpu *cpu)
3665 instr[30] = lower(0)/upper(1) selector
3666 instr[29] = signed(0)/unsigned(1)
3667 instr[28,24] = 0 1110
3668 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3671 instr[15,10] = 11 0000
3675 int unsign = INSTR (29, 29);
3676 int bias = INSTR (30, 30);
3677 unsigned vm = INSTR (20, 16);
3678 unsigned vn = INSTR ( 9, 5);
3679 unsigned vd = INSTR ( 4, 0);
3682 NYI_assert (28, 24, 0x0E);
3683 NYI_assert (15, 10, 0x30);
3685 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3686 /* NB: Read source values before writing results, in case
3687 the source and destination vectors are the same. */
3688 switch (INSTR (23, 22))
3694 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3696 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3703 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3705 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3712 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3714 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3723 do_vec_fadd (sim_cpu *cpu)
3726 instr[30] = half(0)/full(1)
3727 instr[29,24] = 001110
3728 instr[23] = FADD(0)/FSUB(1)
3729 instr[22] = float (0)/double(1)
3732 instr[15,10] = 110101
3736 unsigned vm = INSTR (20, 16);
3737 unsigned vn = INSTR (9, 5);
3738 unsigned vd = INSTR (4, 0);
3740 int full = INSTR (30, 30);
3742 NYI_assert (29, 24, 0x0E);
3743 NYI_assert (21, 21, 1);
3744 NYI_assert (15, 10, 0x35);
3746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3754 for (i = 0; i < 2; i++)
3755 aarch64_set_vec_double (cpu, vd, i,
3756 aarch64_get_vec_double (cpu, vn, i)
3757 - aarch64_get_vec_double (cpu, vm, i));
3761 for (i = 0; i < (full ? 4 : 2); i++)
3762 aarch64_set_vec_float (cpu, vd, i,
3763 aarch64_get_vec_float (cpu, vn, i)
3764 - aarch64_get_vec_float (cpu, vm, i));
3774 for (i = 0; i < 2; i++)
3775 aarch64_set_vec_double (cpu, vd, i,
3776 aarch64_get_vec_double (cpu, vm, i)
3777 + aarch64_get_vec_double (cpu, vn, i));
3781 for (i = 0; i < (full ? 4 : 2); i++)
3782 aarch64_set_vec_float (cpu, vd, i,
3783 aarch64_get_vec_float (cpu, vm, i)
3784 + aarch64_get_vec_float (cpu, vn, i));
3790 do_vec_add (sim_cpu *cpu)
3793 instr[30] = full/half selector
3794 instr[29,24] = 001110
3795 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3798 instr[15,10] = 100001
3802 unsigned vm = INSTR (20, 16);
3803 unsigned vn = INSTR (9, 5);
3804 unsigned vd = INSTR (4, 0);
3806 int full = INSTR (30, 30);
3808 NYI_assert (29, 24, 0x0E);
3809 NYI_assert (21, 21, 1);
3810 NYI_assert (15, 10, 0x21);
3812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3813 switch (INSTR (23, 22))
3816 for (i = 0; i < (full ? 16 : 8); i++)
3817 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3818 + aarch64_get_vec_u8 (cpu, vm, i));
3822 for (i = 0; i < (full ? 8 : 4); i++)
3823 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3824 + aarch64_get_vec_u16 (cpu, vm, i));
3828 for (i = 0; i < (full ? 4 : 2); i++)
3829 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3830 + aarch64_get_vec_u32 (cpu, vm, i));
3836 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3837 + aarch64_get_vec_u64 (cpu, vm, 0));
3838 aarch64_set_vec_u64 (cpu, vd, 1,
3839 aarch64_get_vec_u64 (cpu, vn, 1)
3840 + aarch64_get_vec_u64 (cpu, vm, 1));
3846 do_vec_mul (sim_cpu *cpu)
3849 instr[30] = full/half selector
3850 instr[29,24] = 00 1110
3851 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3854 instr[15,10] = 10 0111
3858 unsigned vm = INSTR (20, 16);
3859 unsigned vn = INSTR (9, 5);
3860 unsigned vd = INSTR (4, 0);
3862 int full = INSTR (30, 30);
3865 NYI_assert (29, 24, 0x0E);
3866 NYI_assert (21, 21, 1);
3867 NYI_assert (15, 10, 0x27);
3869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3870 switch (INSTR (23, 22))
3873 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3877 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3881 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3890 do_vec_MLA (sim_cpu *cpu)
3893 instr[30] = full/half selector
3894 instr[29,24] = 00 1110
3895 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3898 instr[15,10] = 1001 01
3902 unsigned vm = INSTR (20, 16);
3903 unsigned vn = INSTR (9, 5);
3904 unsigned vd = INSTR (4, 0);
3906 int full = INSTR (30, 30);
3908 NYI_assert (29, 24, 0x0E);
3909 NYI_assert (21, 21, 1);
3910 NYI_assert (15, 10, 0x25);
3912 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3913 switch (INSTR (23, 22))
3916 for (i = 0; i < (full ? 16 : 8); i++)
3917 aarch64_set_vec_u8 (cpu, vd, i,
3918 aarch64_get_vec_u8 (cpu, vd, i)
3919 + (aarch64_get_vec_u8 (cpu, vn, i)
3920 * aarch64_get_vec_u8 (cpu, vm, i)));
3924 for (i = 0; i < (full ? 8 : 4); i++)
3925 aarch64_set_vec_u16 (cpu, vd, i,
3926 aarch64_get_vec_u16 (cpu, vd, i)
3927 + (aarch64_get_vec_u16 (cpu, vn, i)
3928 * aarch64_get_vec_u16 (cpu, vm, i)));
3932 for (i = 0; i < (full ? 4 : 2); i++)
3933 aarch64_set_vec_u32 (cpu, vd, i,
3934 aarch64_get_vec_u32 (cpu, vd, i)
3935 + (aarch64_get_vec_u32 (cpu, vn, i)
3936 * aarch64_get_vec_u32 (cpu, vm, i)));
3945 fmaxnm (float a, float b)
3950 return a > b ? a : b;
3953 else if (! isnan (b))
3959 fminnm (float a, float b)
3964 return a < b ? a : b;
3967 else if (! isnan (b))
3973 dmaxnm (double a, double b)
3978 return a > b ? a : b;
3981 else if (! isnan (b))
3987 dminnm (double a, double b)
3992 return a < b ? a : b;
3995 else if (! isnan (b))
4001 do_vec_FminmaxNMP (sim_cpu *cpu)
4004 instr [30] = half (0)/full (1)
4005 instr [29,24] = 10 1110
4006 instr [23] = max(0)/min(1)
4007 instr [22] = float (0)/double (1)
4010 instr [15,10] = 1100 01
4012 instr [4.0] = Vd. */
4014 unsigned vm = INSTR (20, 16);
4015 unsigned vn = INSTR (9, 5);
4016 unsigned vd = INSTR (4, 0);
4017 int full = INSTR (30, 30);
4019 NYI_assert (29, 24, 0x2E);
4020 NYI_assert (21, 21, 1);
4021 NYI_assert (15, 10, 0x31);
4023 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4026 double (* fn)(double, double) = INSTR (23, 23)
4031 aarch64_set_vec_double (cpu, vd, 0,
4032 fn (aarch64_get_vec_double (cpu, vn, 0),
4033 aarch64_get_vec_double (cpu, vn, 1)));
4034 aarch64_set_vec_double (cpu, vd, 0,
4035 fn (aarch64_get_vec_double (cpu, vm, 0),
4036 aarch64_get_vec_double (cpu, vm, 1)));
4040 float (* fn)(float, float) = INSTR (23, 23)
4043 aarch64_set_vec_float (cpu, vd, 0,
4044 fn (aarch64_get_vec_float (cpu, vn, 0),
4045 aarch64_get_vec_float (cpu, vn, 1)));
4047 aarch64_set_vec_float (cpu, vd, 1,
4048 fn (aarch64_get_vec_float (cpu, vn, 2),
4049 aarch64_get_vec_float (cpu, vn, 3)));
4051 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4052 fn (aarch64_get_vec_float (cpu, vm, 0),
4053 aarch64_get_vec_float (cpu, vm, 1)));
4055 aarch64_set_vec_float (cpu, vd, 3,
4056 fn (aarch64_get_vec_float (cpu, vm, 2),
4057 aarch64_get_vec_float (cpu, vm, 3)));
4062 do_vec_AND (sim_cpu *cpu)
4065 instr[30] = half (0)/full (1)
4066 instr[29,21] = 001110001
4068 instr[15,10] = 000111
4072 unsigned vm = INSTR (20, 16);
4073 unsigned vn = INSTR (9, 5);
4074 unsigned vd = INSTR (4, 0);
4076 int full = INSTR (30, 30);
4078 NYI_assert (29, 21, 0x071);
4079 NYI_assert (15, 10, 0x07);
4081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4082 for (i = 0; i < (full ? 4 : 2); i++)
4083 aarch64_set_vec_u32 (cpu, vd, i,
4084 aarch64_get_vec_u32 (cpu, vn, i)
4085 & aarch64_get_vec_u32 (cpu, vm, i));
4089 do_vec_BSL (sim_cpu *cpu)
4092 instr[30] = half (0)/full (1)
4093 instr[29,21] = 101110011
4095 instr[15,10] = 000111
4099 unsigned vm = INSTR (20, 16);
4100 unsigned vn = INSTR (9, 5);
4101 unsigned vd = INSTR (4, 0);
4103 int full = INSTR (30, 30);
4105 NYI_assert (29, 21, 0x173);
4106 NYI_assert (15, 10, 0x07);
4108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4109 for (i = 0; i < (full ? 16 : 8); i++)
4110 aarch64_set_vec_u8 (cpu, vd, i,
4111 ( aarch64_get_vec_u8 (cpu, vd, i)
4112 & aarch64_get_vec_u8 (cpu, vn, i))
4113 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4114 & aarch64_get_vec_u8 (cpu, vm, i)));
4118 do_vec_EOR (sim_cpu *cpu)
4121 instr[30] = half (0)/full (1)
4122 instr[29,21] = 10 1110 001
4124 instr[15,10] = 000111
4128 unsigned vm = INSTR (20, 16);
4129 unsigned vn = INSTR (9, 5);
4130 unsigned vd = INSTR (4, 0);
4132 int full = INSTR (30, 30);
4134 NYI_assert (29, 21, 0x171);
4135 NYI_assert (15, 10, 0x07);
4137 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4138 for (i = 0; i < (full ? 4 : 2); i++)
4139 aarch64_set_vec_u32 (cpu, vd, i,
4140 aarch64_get_vec_u32 (cpu, vn, i)
4141 ^ aarch64_get_vec_u32 (cpu, vm, i));
4145 do_vec_bit (sim_cpu *cpu)
4148 instr[30] = half (0)/full (1)
4149 instr[29,23] = 10 1110 1
4150 instr[22] = BIT (0) / BIF (1)
4153 instr[15,10] = 0001 11
4157 unsigned vm = INSTR (20, 16);
4158 unsigned vn = INSTR (9, 5);
4159 unsigned vd = INSTR (4, 0);
4160 unsigned full = INSTR (30, 30);
4161 unsigned test_false = INSTR (22, 22);
4164 NYI_assert (29, 23, 0x5D);
4165 NYI_assert (21, 21, 1);
4166 NYI_assert (15, 10, 0x07);
4168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4169 for (i = 0; i < (full ? 4 : 2); i++)
4171 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4172 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4173 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4175 aarch64_set_vec_u32 (cpu, vd, i,
4176 (vd_val & vm_val) | (vn_val & ~vm_val));
4178 aarch64_set_vec_u32 (cpu, vd, i,
4179 (vd_val & ~vm_val) | (vn_val & vm_val));
4184 do_vec_ORN (sim_cpu *cpu)
4187 instr[30] = half (0)/full (1)
4188 instr[29,21] = 00 1110 111
4190 instr[15,10] = 00 0111
4194 unsigned vm = INSTR (20, 16);
4195 unsigned vn = INSTR (9, 5);
4196 unsigned vd = INSTR (4, 0);
4198 int full = INSTR (30, 30);
4200 NYI_assert (29, 21, 0x077);
4201 NYI_assert (15, 10, 0x07);
4203 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4204 for (i = 0; i < (full ? 16 : 8); i++)
4205 aarch64_set_vec_u8 (cpu, vd, i,
4206 aarch64_get_vec_u8 (cpu, vn, i)
4207 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4211 do_vec_ORR (sim_cpu *cpu)
4214 instr[30] = half (0)/full (1)
4215 instr[29,21] = 00 1110 101
4217 instr[15,10] = 0001 11
4221 unsigned vm = INSTR (20, 16);
4222 unsigned vn = INSTR (9, 5);
4223 unsigned vd = INSTR (4, 0);
4225 int full = INSTR (30, 30);
4227 NYI_assert (29, 21, 0x075);
4228 NYI_assert (15, 10, 0x07);
4230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4231 for (i = 0; i < (full ? 16 : 8); i++)
4232 aarch64_set_vec_u8 (cpu, vd, i,
4233 aarch64_get_vec_u8 (cpu, vn, i)
4234 | aarch64_get_vec_u8 (cpu, vm, i));
4238 do_vec_BIC (sim_cpu *cpu)
4241 instr[30] = half (0)/full (1)
4242 instr[29,21] = 00 1110 011
4244 instr[15,10] = 00 0111
4248 unsigned vm = INSTR (20, 16);
4249 unsigned vn = INSTR (9, 5);
4250 unsigned vd = INSTR (4, 0);
4252 int full = INSTR (30, 30);
4254 NYI_assert (29, 21, 0x073);
4255 NYI_assert (15, 10, 0x07);
4257 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4258 for (i = 0; i < (full ? 16 : 8); i++)
4259 aarch64_set_vec_u8 (cpu, vd, i,
4260 aarch64_get_vec_u8 (cpu, vn, i)
4261 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4265 do_vec_XTN (sim_cpu *cpu)
4268 instr[30] = first part (0)/ second part (1)
4269 instr[29,24] = 00 1110
4270 instr[23,22] = size: byte(00), half(01), word (10)
4271 instr[21,10] = 1000 0100 1010
4275 unsigned vs = INSTR (9, 5);
4276 unsigned vd = INSTR (4, 0);
4277 unsigned bias = INSTR (30, 30);
4280 NYI_assert (29, 24, 0x0E);
4281 NYI_assert (21, 10, 0x84A);
4283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4284 switch (INSTR (23, 22))
4287 for (i = 0; i < 8; i++)
4288 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4289 aarch64_get_vec_u16 (cpu, vs, i));
4293 for (i = 0; i < 4; i++)
4294 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4295 aarch64_get_vec_u32 (cpu, vs, i));
4299 for (i = 0; i < 2; i++)
4300 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4301 aarch64_get_vec_u64 (cpu, vs, i));
4306 /* Return the number of bits set in the input value. */
4307 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4308 # define popcount __builtin_popcount
4311 popcount (unsigned char x)
4313 static const unsigned char popcnt[16] =
4321 /* Only counts the low 8 bits of the input as that is all we need. */
4322 return popcnt[x % 16] + popcnt[x / 16];
4327 do_vec_CNT (sim_cpu *cpu)
4330 instr[30] = half (0)/ full (1)
4331 instr[29,24] = 00 1110
4332 instr[23,22] = size: byte(00)
4333 instr[21,10] = 1000 0001 0110
4337 unsigned vs = INSTR (9, 5);
4338 unsigned vd = INSTR (4, 0);
4339 int full = INSTR (30, 30);
4340 int size = INSTR (23, 22);
4343 NYI_assert (29, 24, 0x0E);
4344 NYI_assert (21, 10, 0x816);
4349 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4351 for (i = 0; i < (full ? 16 : 8); i++)
4352 aarch64_set_vec_u8 (cpu, vd, i,
4353 popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4357 do_vec_maxv (sim_cpu *cpu)
4360 instr[30] = half(0)/full(1)
4361 instr[29] = signed (0)/unsigned(1)
4362 instr[28,24] = 0 1110
4363 instr[23,22] = size: byte(00), half(01), word (10)
4365 instr[20,17] = 1 000
4366 instr[16] = max(0)/min(1)
4367 instr[15,10] = 1010 10
4368 instr[9,5] = V source
4369 instr[4.0] = R dest. */
4371 unsigned vs = INSTR (9, 5);
4372 unsigned rd = INSTR (4, 0);
4373 unsigned full = INSTR (30, 30);
4376 NYI_assert (28, 24, 0x0E);
4377 NYI_assert (21, 21, 1);
4378 NYI_assert (20, 17, 8);
4379 NYI_assert (15, 10, 0x2A);
4381 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4382 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4384 case 0: /* SMAXV. */
4387 switch (INSTR (23, 22))
4390 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4391 for (i = 1; i < (full ? 16 : 8); i++)
4392 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4395 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4396 for (i = 1; i < (full ? 8 : 4); i++)
4397 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4400 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4401 for (i = 1; i < (full ? 4 : 2); i++)
4402 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4407 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4411 case 1: /* SMINV. */
4414 switch (INSTR (23, 22))
4417 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4418 for (i = 1; i < (full ? 16 : 8); i++)
4419 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4422 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4423 for (i = 1; i < (full ? 8 : 4); i++)
4424 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4427 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4428 for (i = 1; i < (full ? 4 : 2); i++)
4429 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4435 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4439 case 2: /* UMAXV. */
4442 switch (INSTR (23, 22))
4445 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4446 for (i = 1; i < (full ? 16 : 8); i++)
4447 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4450 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4451 for (i = 1; i < (full ? 8 : 4); i++)
4452 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4455 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4456 for (i = 1; i < (full ? 4 : 2); i++)
4457 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4463 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4467 case 3: /* UMINV. */
4470 switch (INSTR (23, 22))
4473 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4474 for (i = 1; i < (full ? 16 : 8); i++)
4475 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4478 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4479 for (i = 1; i < (full ? 8 : 4); i++)
4480 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4483 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4484 for (i = 1; i < (full ? 4 : 2); i++)
4485 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4491 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4498 do_vec_fminmaxV (sim_cpu *cpu)
4500 /* instr[31,24] = 0110 1110
4501 instr[23] = max(0)/min(1)
4502 instr[22,14] = 011 0000 11
4503 instr[13,12] = nm(00)/normal(11)
4505 instr[9,5] = V source
4506 instr[4.0] = R dest. */
4508 unsigned vs = INSTR (9, 5);
4509 unsigned rd = INSTR (4, 0);
4511 float res = aarch64_get_vec_float (cpu, vs, 0);
4513 NYI_assert (31, 24, 0x6E);
4514 NYI_assert (22, 14, 0x0C3);
4515 NYI_assert (11, 10, 2);
4517 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4520 switch (INSTR (13, 12))
4522 case 0: /* FMNINNMV. */
4523 for (i = 1; i < 4; i++)
4524 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4527 case 3: /* FMINV. */
4528 for (i = 1; i < 4; i++)
4529 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4538 switch (INSTR (13, 12))
4540 case 0: /* FMNAXNMV. */
4541 for (i = 1; i < 4; i++)
4542 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4545 case 3: /* FMAXV. */
4546 for (i = 1; i < 4; i++)
4547 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4555 aarch64_set_FP_float (cpu, rd, res);
4559 do_vec_Fminmax (sim_cpu *cpu)
4562 instr[30] = half(0)/full(1)
4563 instr[29,24] = 00 1110
4564 instr[23] = max(0)/min(1)
4565 instr[22] = float(0)/double(1)
4569 instr[13,12] = nm(00)/normal(11)
4574 unsigned vm = INSTR (20, 16);
4575 unsigned vn = INSTR (9, 5);
4576 unsigned vd = INSTR (4, 0);
4577 unsigned full = INSTR (30, 30);
4578 unsigned min = INSTR (23, 23);
4581 NYI_assert (29, 24, 0x0E);
4582 NYI_assert (21, 21, 1);
4583 NYI_assert (15, 14, 3);
4584 NYI_assert (11, 10, 1);
4586 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4589 double (* func)(double, double);
4594 if (INSTR (13, 12) == 0)
4595 func = min ? dminnm : dmaxnm;
4596 else if (INSTR (13, 12) == 3)
4597 func = min ? fmin : fmax;
4601 for (i = 0; i < 2; i++)
4602 aarch64_set_vec_double (cpu, vd, i,
4603 func (aarch64_get_vec_double (cpu, vn, i),
4604 aarch64_get_vec_double (cpu, vm, i)));
4608 float (* func)(float, float);
4610 if (INSTR (13, 12) == 0)
4611 func = min ? fminnm : fmaxnm;
4612 else if (INSTR (13, 12) == 3)
4613 func = min ? fminf : fmaxf;
4617 for (i = 0; i < (full ? 4 : 2); i++)
4618 aarch64_set_vec_float (cpu, vd, i,
4619 func (aarch64_get_vec_float (cpu, vn, i),
4620 aarch64_get_vec_float (cpu, vm, i)));
4625 do_vec_SCVTF (sim_cpu *cpu)
4629 instr[29,23] = 00 1110 0
4630 instr[22] = float(0)/double(1)
4631 instr[21,10] = 10 0001 1101 10
4635 unsigned vn = INSTR (9, 5);
4636 unsigned vd = INSTR (4, 0);
4637 unsigned full = INSTR (30, 30);
4638 unsigned size = INSTR (22, 22);
4641 NYI_assert (29, 23, 0x1C);
4642 NYI_assert (21, 10, 0x876);
4644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4650 for (i = 0; i < 2; i++)
4652 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4653 aarch64_set_vec_double (cpu, vd, i, val);
4658 for (i = 0; i < (full ? 4 : 2); i++)
4660 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4661 aarch64_set_vec_float (cpu, vd, i, val);
4666 #define VEC_CMP(SOURCE, CMP) \
4672 for (i = 0; i < (full ? 16 : 8); i++) \
4673 aarch64_set_vec_u8 (cpu, vd, i, \
4674 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4676 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4680 for (i = 0; i < (full ? 8 : 4); i++) \
4681 aarch64_set_vec_u16 (cpu, vd, i, \
4682 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4684 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4688 for (i = 0; i < (full ? 4 : 2); i++) \
4689 aarch64_set_vec_u32 (cpu, vd, i, \
4690 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4692 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4698 for (i = 0; i < 2; i++) \
4699 aarch64_set_vec_u64 (cpu, vd, i, \
4700 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4702 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4709 #define VEC_CMP0(SOURCE, CMP) \
4715 for (i = 0; i < (full ? 16 : 8); i++) \
4716 aarch64_set_vec_u8 (cpu, vd, i, \
4717 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4721 for (i = 0; i < (full ? 8 : 4); i++) \
4722 aarch64_set_vec_u16 (cpu, vd, i, \
4723 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4727 for (i = 0; i < (full ? 4 : 2); i++) \
4728 aarch64_set_vec_u32 (cpu, vd, i, \
4729 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4735 for (i = 0; i < 2; i++) \
4736 aarch64_set_vec_u64 (cpu, vd, i, \
4737 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4738 CMP 0 ? -1ULL : 0); \
4744 #define VEC_FCMP0(CMP) \
4749 if (INSTR (22, 22)) \
4753 for (i = 0; i < 2; i++) \
4754 aarch64_set_vec_u64 (cpu, vd, i, \
4755 aarch64_get_vec_double (cpu, vn, i) \
4756 CMP 0.0 ? -1 : 0); \
4760 for (i = 0; i < (full ? 4 : 2); i++) \
4761 aarch64_set_vec_u32 (cpu, vd, i, \
4762 aarch64_get_vec_float (cpu, vn, i) \
4763 CMP 0.0 ? -1 : 0); \
4769 #define VEC_FCMP(CMP) \
4772 if (INSTR (22, 22)) \
4776 for (i = 0; i < 2; i++) \
4777 aarch64_set_vec_u64 (cpu, vd, i, \
4778 aarch64_get_vec_double (cpu, vn, i) \
4780 aarch64_get_vec_double (cpu, vm, i) \
4785 for (i = 0; i < (full ? 4 : 2); i++) \
4786 aarch64_set_vec_u32 (cpu, vd, i, \
4787 aarch64_get_vec_float (cpu, vn, i) \
4789 aarch64_get_vec_float (cpu, vm, i) \
4797 do_vec_compare (sim_cpu *cpu)
4800 instr[30] = half(0)/full(1)
4801 instr[29] = part-of-comparison-type
4802 instr[28,24] = 0 1110
4803 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4804 type of float compares: single (-0) / double (-1)
4806 instr[20,16] = Vm or 00000 (compare vs 0)
4807 instr[15,10] = part-of-comparison-type
4811 int full = INSTR (30, 30);
4812 int size = INSTR (23, 22);
4813 unsigned vm = INSTR (20, 16);
4814 unsigned vn = INSTR (9, 5);
4815 unsigned vd = INSTR (4, 0);
4818 NYI_assert (28, 24, 0x0E);
4819 NYI_assert (21, 21, 1);
4821 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4824 || ((INSTR (11, 11) == 0
4825 && INSTR (10, 10) == 0)))
4827 /* A compare vs 0. */
4830 if (INSTR (15, 10) == 0x2A)
4832 else if (INSTR (15, 10) == 0x32
4833 || INSTR (15, 10) == 0x3E)
4834 do_vec_fminmaxV (cpu);
4835 else if (INSTR (29, 23) == 0x1C
4836 && INSTR (21, 10) == 0x876)
4846 /* A floating point compare. */
4847 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4850 NYI_assert (15, 15, 1);
4854 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4855 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4856 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4857 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4858 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4859 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4860 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4861 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4869 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4873 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4874 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4875 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4876 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4877 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4878 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4879 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4880 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4881 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4882 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4883 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4893 do_vec_SSHL (sim_cpu *cpu)
4896 instr[30] = first part (0)/ second part (1)
4897 instr[29,24] = 00 1110
4898 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4901 instr[15,10] = 0100 01
4905 unsigned full = INSTR (30, 30);
4906 unsigned vm = INSTR (20, 16);
4907 unsigned vn = INSTR (9, 5);
4908 unsigned vd = INSTR (4, 0);
4912 NYI_assert (29, 24, 0x0E);
4913 NYI_assert (21, 21, 1);
4914 NYI_assert (15, 10, 0x11);
4916 /* FIXME: What is a signed shift left in this context ?. */
4918 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4919 switch (INSTR (23, 22))
4922 for (i = 0; i < (full ? 16 : 8); i++)
4924 shift = aarch64_get_vec_s8 (cpu, vm, i);
4926 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4929 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4935 for (i = 0; i < (full ? 8 : 4); i++)
4937 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4939 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4942 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4948 for (i = 0; i < (full ? 4 : 2); i++)
4950 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4952 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4955 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4963 for (i = 0; i < 2; i++)
4965 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4967 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4970 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4978 do_vec_USHL (sim_cpu *cpu)
4981 instr[30] = first part (0)/ second part (1)
4982 instr[29,24] = 10 1110
4983 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4986 instr[15,10] = 0100 01
4990 unsigned full = INSTR (30, 30);
4991 unsigned vm = INSTR (20, 16);
4992 unsigned vn = INSTR (9, 5);
4993 unsigned vd = INSTR (4, 0);
4997 NYI_assert (29, 24, 0x2E);
4998 NYI_assert (15, 10, 0x11);
5000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5001 switch (INSTR (23, 22))
5004 for (i = 0; i < (full ? 16 : 8); i++)
5006 shift = aarch64_get_vec_s8 (cpu, vm, i);
5008 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5011 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5017 for (i = 0; i < (full ? 8 : 4); i++)
5019 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5021 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5024 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5030 for (i = 0; i < (full ? 4 : 2); i++)
5032 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5034 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5037 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5045 for (i = 0; i < 2; i++)
5047 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5049 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5052 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5060 do_vec_FMLA (sim_cpu *cpu)
5063 instr[30] = full/half selector
5064 instr[29,23] = 0011100
5065 instr[22] = size: 0=>float, 1=>double
5068 instr[15,10] = 1100 11
5072 unsigned vm = INSTR (20, 16);
5073 unsigned vn = INSTR (9, 5);
5074 unsigned vd = INSTR (4, 0);
5076 int full = INSTR (30, 30);
5078 NYI_assert (29, 23, 0x1C);
5079 NYI_assert (21, 21, 1);
5080 NYI_assert (15, 10, 0x33);
5082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5087 for (i = 0; i < 2; i++)
5088 aarch64_set_vec_double (cpu, vd, i,
5089 aarch64_get_vec_double (cpu, vn, i) *
5090 aarch64_get_vec_double (cpu, vm, i) +
5091 aarch64_get_vec_double (cpu, vd, i));
5095 for (i = 0; i < (full ? 4 : 2); i++)
5096 aarch64_set_vec_float (cpu, vd, i,
5097 aarch64_get_vec_float (cpu, vn, i) *
5098 aarch64_get_vec_float (cpu, vm, i) +
5099 aarch64_get_vec_float (cpu, vd, i));
5104 do_vec_max (sim_cpu *cpu)
5107 instr[30] = full/half selector
5108 instr[29] = SMAX (0) / UMAX (1)
5109 instr[28,24] = 0 1110
5110 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5113 instr[15,10] = 0110 01
5117 unsigned vm = INSTR (20, 16);
5118 unsigned vn = INSTR (9, 5);
5119 unsigned vd = INSTR (4, 0);
5121 int full = INSTR (30, 30);
5123 NYI_assert (28, 24, 0x0E);
5124 NYI_assert (21, 21, 1);
5125 NYI_assert (15, 10, 0x19);
5127 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5130 switch (INSTR (23, 22))
5133 for (i = 0; i < (full ? 16 : 8); i++)
5134 aarch64_set_vec_u8 (cpu, vd, i,
5135 aarch64_get_vec_u8 (cpu, vn, i)
5136 > aarch64_get_vec_u8 (cpu, vm, i)
5137 ? aarch64_get_vec_u8 (cpu, vn, i)
5138 : aarch64_get_vec_u8 (cpu, vm, i));
5142 for (i = 0; i < (full ? 8 : 4); i++)
5143 aarch64_set_vec_u16 (cpu, vd, i,
5144 aarch64_get_vec_u16 (cpu, vn, i)
5145 > aarch64_get_vec_u16 (cpu, vm, i)
5146 ? aarch64_get_vec_u16 (cpu, vn, i)
5147 : aarch64_get_vec_u16 (cpu, vm, i));
5151 for (i = 0; i < (full ? 4 : 2); i++)
5152 aarch64_set_vec_u32 (cpu, vd, i,
5153 aarch64_get_vec_u32 (cpu, vn, i)
5154 > aarch64_get_vec_u32 (cpu, vm, i)
5155 ? aarch64_get_vec_u32 (cpu, vn, i)
5156 : aarch64_get_vec_u32 (cpu, vm, i));
5165 switch (INSTR (23, 22))
5168 for (i = 0; i < (full ? 16 : 8); i++)
5169 aarch64_set_vec_s8 (cpu, vd, i,
5170 aarch64_get_vec_s8 (cpu, vn, i)
5171 > aarch64_get_vec_s8 (cpu, vm, i)
5172 ? aarch64_get_vec_s8 (cpu, vn, i)
5173 : aarch64_get_vec_s8 (cpu, vm, i));
5177 for (i = 0; i < (full ? 8 : 4); i++)
5178 aarch64_set_vec_s16 (cpu, vd, i,
5179 aarch64_get_vec_s16 (cpu, vn, i)
5180 > aarch64_get_vec_s16 (cpu, vm, i)
5181 ? aarch64_get_vec_s16 (cpu, vn, i)
5182 : aarch64_get_vec_s16 (cpu, vm, i));
5186 for (i = 0; i < (full ? 4 : 2); i++)
5187 aarch64_set_vec_s32 (cpu, vd, i,
5188 aarch64_get_vec_s32 (cpu, vn, i)
5189 > aarch64_get_vec_s32 (cpu, vm, i)
5190 ? aarch64_get_vec_s32 (cpu, vn, i)
5191 : aarch64_get_vec_s32 (cpu, vm, i));
5201 do_vec_min (sim_cpu *cpu)
5204 instr[30] = full/half selector
5205 instr[29] = SMIN (0) / UMIN (1)
5206 instr[28,24] = 0 1110
5207 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5210 instr[15,10] = 0110 11
5214 unsigned vm = INSTR (20, 16);
5215 unsigned vn = INSTR (9, 5);
5216 unsigned vd = INSTR (4, 0);
5218 int full = INSTR (30, 30);
5220 NYI_assert (28, 24, 0x0E);
5221 NYI_assert (21, 21, 1);
5222 NYI_assert (15, 10, 0x1B);
5224 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5227 switch (INSTR (23, 22))
5230 for (i = 0; i < (full ? 16 : 8); i++)
5231 aarch64_set_vec_u8 (cpu, vd, i,
5232 aarch64_get_vec_u8 (cpu, vn, i)
5233 < aarch64_get_vec_u8 (cpu, vm, i)
5234 ? aarch64_get_vec_u8 (cpu, vn, i)
5235 : aarch64_get_vec_u8 (cpu, vm, i));
5239 for (i = 0; i < (full ? 8 : 4); i++)
5240 aarch64_set_vec_u16 (cpu, vd, i,
5241 aarch64_get_vec_u16 (cpu, vn, i)
5242 < aarch64_get_vec_u16 (cpu, vm, i)
5243 ? aarch64_get_vec_u16 (cpu, vn, i)
5244 : aarch64_get_vec_u16 (cpu, vm, i));
5248 for (i = 0; i < (full ? 4 : 2); i++)
5249 aarch64_set_vec_u32 (cpu, vd, i,
5250 aarch64_get_vec_u32 (cpu, vn, i)
5251 < aarch64_get_vec_u32 (cpu, vm, i)
5252 ? aarch64_get_vec_u32 (cpu, vn, i)
5253 : aarch64_get_vec_u32 (cpu, vm, i));
5262 switch (INSTR (23, 22))
5265 for (i = 0; i < (full ? 16 : 8); i++)
5266 aarch64_set_vec_s8 (cpu, vd, i,
5267 aarch64_get_vec_s8 (cpu, vn, i)
5268 < aarch64_get_vec_s8 (cpu, vm, i)
5269 ? aarch64_get_vec_s8 (cpu, vn, i)
5270 : aarch64_get_vec_s8 (cpu, vm, i));
5274 for (i = 0; i < (full ? 8 : 4); i++)
5275 aarch64_set_vec_s16 (cpu, vd, i,
5276 aarch64_get_vec_s16 (cpu, vn, i)
5277 < aarch64_get_vec_s16 (cpu, vm, i)
5278 ? aarch64_get_vec_s16 (cpu, vn, i)
5279 : aarch64_get_vec_s16 (cpu, vm, i));
5283 for (i = 0; i < (full ? 4 : 2); i++)
5284 aarch64_set_vec_s32 (cpu, vd, i,
5285 aarch64_get_vec_s32 (cpu, vn, i)
5286 < aarch64_get_vec_s32 (cpu, vm, i)
5287 ? aarch64_get_vec_s32 (cpu, vn, i)
5288 : aarch64_get_vec_s32 (cpu, vm, i));
5298 do_vec_sub_long (sim_cpu *cpu)
5301 instr[30] = lower (0) / upper (1)
5302 instr[29] = signed (0) / unsigned (1)
5303 instr[28,24] = 0 1110
5304 instr[23,22] = size: bytes (00), half (01), word (10)
5307 instr[15,10] = 0010 00
5309 instr[4,0] = V dest. */
5311 unsigned size = INSTR (23, 22);
5312 unsigned vm = INSTR (20, 16);
5313 unsigned vn = INSTR (9, 5);
5314 unsigned vd = INSTR (4, 0);
5318 NYI_assert (28, 24, 0x0E);
5319 NYI_assert (21, 21, 1);
5320 NYI_assert (15, 10, 0x08);
5325 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5326 switch (INSTR (30, 29))
5328 case 2: /* SSUBL2. */
5330 case 0: /* SSUBL. */
5335 for (i = 0; i < 8; i++)
5336 aarch64_set_vec_s16 (cpu, vd, i,
5337 aarch64_get_vec_s8 (cpu, vn, i + bias)
5338 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5343 for (i = 0; i < 4; i++)
5344 aarch64_set_vec_s32 (cpu, vd, i,
5345 aarch64_get_vec_s16 (cpu, vn, i + bias)
5346 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5350 for (i = 0; i < 2; i++)
5351 aarch64_set_vec_s64 (cpu, vd, i,
5352 aarch64_get_vec_s32 (cpu, vn, i + bias)
5353 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5361 case 3: /* USUBL2. */
5363 case 1: /* USUBL. */
5368 for (i = 0; i < 8; i++)
5369 aarch64_set_vec_u16 (cpu, vd, i,
5370 aarch64_get_vec_u8 (cpu, vn, i + bias)
5371 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5376 for (i = 0; i < 4; i++)
5377 aarch64_set_vec_u32 (cpu, vd, i,
5378 aarch64_get_vec_u16 (cpu, vn, i + bias)
5379 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5383 for (i = 0; i < 2; i++)
5384 aarch64_set_vec_u64 (cpu, vd, i,
5385 aarch64_get_vec_u32 (cpu, vn, i + bias)
5386 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5397 do_vec_ADDP (sim_cpu *cpu)
5400 instr[30] = half(0)/full(1)
5401 instr[29,24] = 00 1110
5402 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5405 instr[15,10] = 1011 11
5407 instr[4,0] = V dest. */
5411 unsigned full = INSTR (30, 30);
5412 unsigned size = INSTR (23, 22);
5413 unsigned vm = INSTR (20, 16);
5414 unsigned vn = INSTR (9, 5);
5415 unsigned vd = INSTR (4, 0);
5418 NYI_assert (29, 24, 0x0E);
5419 NYI_assert (21, 21, 1);
5420 NYI_assert (15, 10, 0x2F);
5422 /* Make copies of the source registers in case vd == vn/vm. */
5423 copy_vn = cpu->fr[vn];
5424 copy_vm = cpu->fr[vm];
5426 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5430 range = full ? 8 : 4;
5431 for (i = 0; i < range; i++)
5433 aarch64_set_vec_u8 (cpu, vd, i,
5434 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5435 aarch64_set_vec_u8 (cpu, vd, i + range,
5436 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5441 range = full ? 4 : 2;
5442 for (i = 0; i < range; i++)
5444 aarch64_set_vec_u16 (cpu, vd, i,
5445 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5446 aarch64_set_vec_u16 (cpu, vd, i + range,
5447 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5452 range = full ? 2 : 1;
5453 for (i = 0; i < range; i++)
5455 aarch64_set_vec_u32 (cpu, vd, i,
5456 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5457 aarch64_set_vec_u32 (cpu, vd, i + range,
5458 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5465 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5466 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5471 /* Float point vector convert to longer (precision). */
5473 do_vec_FCVTL (sim_cpu *cpu)
5476 instr[30] = half (0) / all (1)
5477 instr[29,23] = 00 1110 0
5478 instr[22] = single (0) / double (1)
5479 instr[21,10] = 10 0001 0111 10
5483 unsigned rn = INSTR (9, 5);
5484 unsigned rd = INSTR (4, 0);
5485 unsigned full = INSTR (30, 30);
5488 NYI_assert (31, 31, 0);
5489 NYI_assert (29, 23, 0x1C);
5490 NYI_assert (21, 10, 0x85E);
5492 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5495 for (i = 0; i < 2; i++)
5496 aarch64_set_vec_double (cpu, rd, i,
5497 aarch64_get_vec_float (cpu, rn, i + 2*full));
5504 /* TODO: Implement missing half-float support. */
5505 for (i = 0; i < 4; i++)
5506 aarch64_set_vec_float (cpu, rd, i,
5507 aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5513 do_vec_FABS (sim_cpu *cpu)
5516 instr[30] = half(0)/full(1)
5517 instr[29,23] = 00 1110 1
5518 instr[22] = float(0)/double(1)
5519 instr[21,16] = 10 0000
5520 instr[15,10] = 1111 10
5524 unsigned vn = INSTR (9, 5);
5525 unsigned vd = INSTR (4, 0);
5526 unsigned full = INSTR (30, 30);
5529 NYI_assert (29, 23, 0x1D);
5530 NYI_assert (21, 10, 0x83E);
5532 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5538 for (i = 0; i < 2; i++)
5539 aarch64_set_vec_double (cpu, vd, i,
5540 fabs (aarch64_get_vec_double (cpu, vn, i)));
5544 for (i = 0; i < (full ? 4 : 2); i++)
5545 aarch64_set_vec_float (cpu, vd, i,
5546 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5551 do_vec_FCVTZS (sim_cpu *cpu)
5554 instr[30] = half (0) / all (1)
5555 instr[29,23] = 00 1110 1
5556 instr[22] = single (0) / double (1)
5557 instr[21,10] = 10 0001 1011 10
5561 unsigned rn = INSTR (9, 5);
5562 unsigned rd = INSTR (4, 0);
5563 unsigned full = INSTR (30, 30);
5566 NYI_assert (31, 31, 0);
5567 NYI_assert (29, 23, 0x1D);
5568 NYI_assert (21, 10, 0x86E);
5570 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5576 for (i = 0; i < 2; i++)
5577 aarch64_set_vec_s64 (cpu, rd, i,
5578 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5581 for (i = 0; i < (full ? 4 : 2); i++)
5582 aarch64_set_vec_s32 (cpu, rd, i,
5583 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5587 do_vec_REV64 (sim_cpu *cpu)
5590 instr[30] = full/half
5591 instr[29,24] = 00 1110
5593 instr[21,10] = 10 0000 0000 10
5597 unsigned rn = INSTR (9, 5);
5598 unsigned rd = INSTR (4, 0);
5599 unsigned size = INSTR (23, 22);
5600 unsigned full = INSTR (30, 30);
5604 NYI_assert (29, 24, 0x0E);
5605 NYI_assert (21, 10, 0x802);
5607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5611 for (i = 0; i < (full ? 16 : 8); i++)
5612 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5616 for (i = 0; i < (full ? 8 : 4); i++)
5617 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5621 for (i = 0; i < (full ? 4 : 2); i++)
5622 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5629 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5631 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5635 do_vec_REV16 (sim_cpu *cpu)
5638 instr[30] = full/half
5639 instr[29,24] = 00 1110
5641 instr[21,10] = 10 0000 0001 10
5645 unsigned rn = INSTR (9, 5);
5646 unsigned rd = INSTR (4, 0);
5647 unsigned size = INSTR (23, 22);
5648 unsigned full = INSTR (30, 30);
5652 NYI_assert (29, 24, 0x0E);
5653 NYI_assert (21, 10, 0x806);
5655 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5659 for (i = 0; i < (full ? 16 : 8); i++)
5660 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5667 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5669 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5673 do_vec_op1 (sim_cpu *cpu)
5676 instr[30] = half/full
5677 instr[29,24] = 00 1110
5680 instr[15,10] = sub-opcode
5683 NYI_assert (29, 24, 0x0E);
5685 if (INSTR (21, 21) == 0)
5687 if (INSTR (23, 22) == 0)
5689 if (INSTR (30, 30) == 1
5690 && INSTR (17, 14) == 0
5691 && INSTR (12, 10) == 7)
5692 return do_vec_ins_2 (cpu);
5694 switch (INSTR (15, 10))
5696 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5697 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5698 case 0x07: do_vec_INS (cpu); return;
5699 case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5700 case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5706 do_vec_TBL (cpu); return;
5710 do_vec_UZP (cpu); return;
5712 case 0x0A: do_vec_TRN (cpu); return;
5716 do_vec_ZIP (cpu); return;
5723 switch (INSTR (13, 10))
5725 case 0x6: do_vec_UZP (cpu); return;
5726 case 0xE: do_vec_ZIP (cpu); return;
5727 case 0xA: do_vec_TRN (cpu); return;
5732 switch (INSTR (15, 10))
5734 case 0x02: do_vec_REV64 (cpu); return;
5735 case 0x06: do_vec_REV16 (cpu); return;
5738 switch (INSTR (23, 21))
5740 case 1: do_vec_AND (cpu); return;
5741 case 3: do_vec_BIC (cpu); return;
5742 case 5: do_vec_ORR (cpu); return;
5743 case 7: do_vec_ORN (cpu); return;
5747 case 0x08: do_vec_sub_long (cpu); return;
5748 case 0x0a: do_vec_XTN (cpu); return;
5749 case 0x11: do_vec_SSHL (cpu); return;
5750 case 0x16: do_vec_CNT (cpu); return;
5751 case 0x19: do_vec_max (cpu); return;
5752 case 0x1B: do_vec_min (cpu); return;
5753 case 0x21: do_vec_add (cpu); return;
5754 case 0x25: do_vec_MLA (cpu); return;
5755 case 0x27: do_vec_mul (cpu); return;
5756 case 0x2F: do_vec_ADDP (cpu); return;
5757 case 0x30: do_vec_mull (cpu); return;
5758 case 0x33: do_vec_FMLA (cpu); return;
5759 case 0x35: do_vec_fadd (cpu); return;
5762 switch (INSTR (20, 16))
5764 case 0x01: do_vec_FCVTL (cpu); return;
5769 switch (INSTR (20, 16))
5771 case 0x00: do_vec_ABS (cpu); return;
5772 case 0x01: do_vec_FCVTZS (cpu); return;
5773 case 0x11: do_vec_ADDV (cpu); return;
5779 do_vec_Fminmax (cpu); return;
5791 do_vec_compare (cpu); return;
5794 do_vec_FABS (cpu); return;
5802 do_vec_xtl (sim_cpu *cpu)
5805 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5806 instr[28,22] = 0 1111 00
5807 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5808 instr[15,10] = 1010 01
5809 instr[9,5] = V source
5810 instr[4,0] = V dest. */
5812 unsigned vs = INSTR (9, 5);
5813 unsigned vd = INSTR (4, 0);
5814 unsigned i, shift, bias = 0;
5816 NYI_assert (28, 22, 0x3C);
5817 NYI_assert (15, 10, 0x29);
5819 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5820 switch (INSTR (30, 29))
5822 case 2: /* SXTL2, SSHLL2. */
5824 case 0: /* SXTL, SSHLL. */
5829 shift = INSTR (20, 16);
5830 /* Get the source values before setting the destination values
5831 in case the source and destination are the same. */
5832 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5833 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5834 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5835 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5837 else if (INSTR (20, 20))
5840 int32_t v1,v2,v3,v4;
5842 shift = INSTR (19, 16);
5844 for (i = 0; i < 4; i++)
5845 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5846 for (i = 0; i < 4; i++)
5847 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5852 NYI_assert (19, 19, 1);
5854 shift = INSTR (18, 16);
5856 for (i = 0; i < 8; i++)
5857 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5858 for (i = 0; i < 8; i++)
5859 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5863 case 3: /* UXTL2, USHLL2. */
5865 case 1: /* UXTL, USHLL. */
5869 shift = INSTR (20, 16);
5870 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5871 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5872 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5873 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5875 else if (INSTR (20, 20))
5878 shift = INSTR (19, 16);
5880 for (i = 0; i < 4; i++)
5881 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5882 for (i = 0; i < 4; i++)
5883 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5888 NYI_assert (19, 19, 1);
5890 shift = INSTR (18, 16);
5892 for (i = 0; i < 8; i++)
5893 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5894 for (i = 0; i < 8; i++)
5895 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5902 do_vec_SHL (sim_cpu *cpu)
5905 instr [30] = half(0)/full(1)
5906 instr [29,23] = 001 1110
5907 instr [22,16] = size and shift amount
5908 instr [15,10] = 01 0101
5910 instr [4, 0] = Vd. */
5913 int full = INSTR (30, 30);
5914 unsigned vs = INSTR (9, 5);
5915 unsigned vd = INSTR (4, 0);
5918 NYI_assert (29, 23, 0x1E);
5919 NYI_assert (15, 10, 0x15);
5921 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5924 shift = INSTR (21, 16);
5929 for (i = 0; i < 2; i++)
5931 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5932 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5940 shift = INSTR (20, 16);
5942 for (i = 0; i < (full ? 4 : 2); i++)
5944 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5945 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5953 shift = INSTR (19, 16);
5955 for (i = 0; i < (full ? 8 : 4); i++)
5957 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5958 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5964 if (INSTR (19, 19) == 0)
5967 shift = INSTR (18, 16);
5969 for (i = 0; i < (full ? 16 : 8); i++)
5971 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5972 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5977 do_vec_SSHR_USHR (sim_cpu *cpu)
5980 instr [30] = half(0)/full(1)
5981 instr [29] = signed(0)/unsigned(1)
5982 instr [28,23] = 0 1111 0
5983 instr [22,16] = size and shift amount
5984 instr [15,10] = 0000 01
5986 instr [4, 0] = Vd. */
5988 int full = INSTR (30, 30);
5989 int sign = ! INSTR (29, 29);
5990 unsigned shift = INSTR (22, 16);
5991 unsigned vs = INSTR (9, 5);
5992 unsigned vd = INSTR (4, 0);
5995 NYI_assert (28, 23, 0x1E);
5996 NYI_assert (15, 10, 0x01);
5998 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6001 shift = 128 - shift;
6007 for (i = 0; i < 2; i++)
6009 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6010 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6013 for (i = 0; i < 2; i++)
6015 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6016 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6027 for (i = 0; i < (full ? 4 : 2); i++)
6029 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6030 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6033 for (i = 0; i < (full ? 4 : 2); i++)
6035 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6036 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6047 for (i = 0; i < (full ? 8 : 4); i++)
6049 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6050 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6053 for (i = 0; i < (full ? 8 : 4); i++)
6055 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6056 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6062 if (INSTR (19, 19) == 0)
6068 for (i = 0; i < (full ? 16 : 8); i++)
6070 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6071 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6074 for (i = 0; i < (full ? 16 : 8); i++)
6076 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6077 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6082 do_vec_MUL_by_element (sim_cpu *cpu)
6085 instr[30] = half/full
6086 instr[29,24] = 00 1111
6097 unsigned full = INSTR (30, 30);
6098 unsigned L = INSTR (21, 21);
6099 unsigned H = INSTR (11, 11);
6100 unsigned vn = INSTR (9, 5);
6101 unsigned vd = INSTR (4, 0);
6102 unsigned size = INSTR (23, 22);
6107 NYI_assert (29, 24, 0x0F);
6108 NYI_assert (15, 12, 0x8);
6109 NYI_assert (10, 10, 0);
6111 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6116 /* 16 bit products. */
6121 index = (H << 2) | (L << 1) | INSTR (20, 20);
6122 vm = INSTR (19, 16);
6123 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6125 for (e = 0; e < (full ? 8 : 4); e ++)
6127 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6128 product = element1 * element2;
6129 aarch64_set_vec_u16 (cpu, vd, e, product);
6136 /* 32 bit products. */
6141 index = (H << 1) | L;
6142 vm = INSTR (20, 16);
6143 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6145 for (e = 0; e < (full ? 4 : 2); e ++)
6147 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6148 product = element1 * element2;
6149 aarch64_set_vec_u32 (cpu, vd, e, product);
6160 do_FMLA_by_element (sim_cpu *cpu)
6163 instr[30] = half/full
6164 instr[29,23] = 00 1111 1
6174 unsigned full = INSTR (30, 30);
6175 unsigned size = INSTR (22, 22);
6176 unsigned L = INSTR (21, 21);
6177 unsigned vm = INSTR (20, 16);
6178 unsigned H = INSTR (11, 11);
6179 unsigned vn = INSTR (9, 5);
6180 unsigned vd = INSTR (4, 0);
6183 NYI_assert (29, 23, 0x1F);
6184 NYI_assert (15, 12, 0x1);
6185 NYI_assert (10, 10, 0);
6187 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6190 double element1, element2;
6195 element2 = aarch64_get_vec_double (cpu, vm, H);
6197 for (e = 0; e < 2; e++)
6199 element1 = aarch64_get_vec_double (cpu, vn, e);
6200 element1 *= element2;
6201 element1 += aarch64_get_vec_double (cpu, vd, e);
6202 aarch64_set_vec_double (cpu, vd, e, element1);
6208 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6210 for (e = 0; e < (full ? 4 : 2); e++)
6212 element1 = aarch64_get_vec_float (cpu, vn, e);
6213 element1 *= element2;
6214 element1 += aarch64_get_vec_float (cpu, vd, e);
6215 aarch64_set_vec_float (cpu, vd, e, element1);
6221 do_vec_op2 (sim_cpu *cpu)
6224 instr[30] = half/full
6225 instr[29,24] = 00 1111
6227 instr[22,16] = element size & index
6228 instr[15,10] = sub-opcode
6232 NYI_assert (29, 24, 0x0F);
6234 if (INSTR (23, 23) != 0)
6236 switch (INSTR (15, 10))
6240 do_FMLA_by_element (cpu);
6245 do_vec_MUL_by_element (cpu);
6254 switch (INSTR (15, 10))
6256 case 0x01: do_vec_SSHR_USHR (cpu); return;
6257 case 0x15: do_vec_SHL (cpu); return;
6259 case 0x22: do_vec_MUL_by_element (cpu); return;
6260 case 0x29: do_vec_xtl (cpu); return;
6267 do_vec_neg (sim_cpu *cpu)
6270 instr[30] = full(1)/half(0)
6271 instr[29,24] = 10 1110
6272 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6273 instr[21,10] = 1000 0010 1110
6277 int full = INSTR (30, 30);
6278 unsigned vs = INSTR (9, 5);
6279 unsigned vd = INSTR (4, 0);
6282 NYI_assert (29, 24, 0x2E);
6283 NYI_assert (21, 10, 0x82E);
6285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6286 switch (INSTR (23, 22))
6289 for (i = 0; i < (full ? 16 : 8); i++)
6290 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6294 for (i = 0; i < (full ? 8 : 4); i++)
6295 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6299 for (i = 0; i < (full ? 4 : 2); i++)
6300 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6306 for (i = 0; i < 2; i++)
6307 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6313 do_vec_sqrt (sim_cpu *cpu)
6316 instr[30] = full(1)/half(0)
6317 instr[29,23] = 101 1101
6318 instr[22] = single(0)/double(1)
6319 instr[21,10] = 1000 0111 1110
6323 int full = INSTR (30, 30);
6324 unsigned vs = INSTR (9, 5);
6325 unsigned vd = INSTR (4, 0);
6328 NYI_assert (29, 23, 0x5B);
6329 NYI_assert (21, 10, 0x87E);
6331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6332 if (INSTR (22, 22) == 0)
6333 for (i = 0; i < (full ? 4 : 2); i++)
6334 aarch64_set_vec_float (cpu, vd, i,
6335 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6337 for (i = 0; i < 2; i++)
6338 aarch64_set_vec_double (cpu, vd, i,
6339 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6343 do_vec_mls_indexed (sim_cpu *cpu)
6346 instr[30] = half(0)/full(1)
6347 instr[29,24] = 10 1111
6348 instr[23,22] = 16-bit(01)/32-bit(10)
6349 instr[21,20+11] = index (if 16-bit)
6350 instr[21+11] = index (if 32-bit)
6353 instr[11] = part of index
6358 int full = INSTR (30, 30);
6359 unsigned vs = INSTR (9, 5);
6360 unsigned vd = INSTR (4, 0);
6361 unsigned vm = INSTR (20, 16);
6364 NYI_assert (15, 12, 4);
6365 NYI_assert (10, 10, 0);
6367 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6368 switch (INSTR (23, 22))
6378 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6379 val = aarch64_get_vec_u16 (cpu, vm, elem);
6381 for (i = 0; i < (full ? 8 : 4); i++)
6382 aarch64_set_vec_u32 (cpu, vd, i,
6383 aarch64_get_vec_u32 (cpu, vd, i) -
6384 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6390 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6391 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6393 for (i = 0; i < (full ? 4 : 2); i++)
6394 aarch64_set_vec_u64 (cpu, vd, i,
6395 aarch64_get_vec_u64 (cpu, vd, i) -
6396 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6408 do_vec_SUB (sim_cpu *cpu)
6411 instr [30] = half(0)/full(1)
6412 instr [29,24] = 10 1110
6413 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6416 instr [15,10] = 10 0001
6418 instr [4, 0] = Vd. */
6420 unsigned full = INSTR (30, 30);
6421 unsigned vm = INSTR (20, 16);
6422 unsigned vn = INSTR (9, 5);
6423 unsigned vd = INSTR (4, 0);
6426 NYI_assert (29, 24, 0x2E);
6427 NYI_assert (21, 21, 1);
6428 NYI_assert (15, 10, 0x21);
6430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6431 switch (INSTR (23, 22))
6434 for (i = 0; i < (full ? 16 : 8); i++)
6435 aarch64_set_vec_s8 (cpu, vd, i,
6436 aarch64_get_vec_s8 (cpu, vn, i)
6437 - aarch64_get_vec_s8 (cpu, vm, i));
6441 for (i = 0; i < (full ? 8 : 4); i++)
6442 aarch64_set_vec_s16 (cpu, vd, i,
6443 aarch64_get_vec_s16 (cpu, vn, i)
6444 - aarch64_get_vec_s16 (cpu, vm, i));
6448 for (i = 0; i < (full ? 4 : 2); i++)
6449 aarch64_set_vec_s32 (cpu, vd, i,
6450 aarch64_get_vec_s32 (cpu, vn, i)
6451 - aarch64_get_vec_s32 (cpu, vm, i));
6458 for (i = 0; i < 2; i++)
6459 aarch64_set_vec_s64 (cpu, vd, i,
6460 aarch64_get_vec_s64 (cpu, vn, i)
6461 - aarch64_get_vec_s64 (cpu, vm, i));
6467 do_vec_MLS (sim_cpu *cpu)
6470 instr [30] = half(0)/full(1)
6471 instr [29,24] = 10 1110
6472 instr [23,22] = size: byte(00, half(01), word (10)
6475 instr [15,10] = 10 0101
6477 instr [4, 0] = Vd. */
6479 unsigned full = INSTR (30, 30);
6480 unsigned vm = INSTR (20, 16);
6481 unsigned vn = INSTR (9, 5);
6482 unsigned vd = INSTR (4, 0);
6485 NYI_assert (29, 24, 0x2E);
6486 NYI_assert (21, 21, 1);
6487 NYI_assert (15, 10, 0x25);
6489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6490 switch (INSTR (23, 22))
6493 for (i = 0; i < (full ? 16 : 8); i++)
6494 aarch64_set_vec_u8 (cpu, vd, i,
6495 aarch64_get_vec_u8 (cpu, vd, i)
6496 - (aarch64_get_vec_u8 (cpu, vn, i)
6497 * aarch64_get_vec_u8 (cpu, vm, i)));
6501 for (i = 0; i < (full ? 8 : 4); i++)
6502 aarch64_set_vec_u16 (cpu, vd, i,
6503 aarch64_get_vec_u16 (cpu, vd, i)
6504 - (aarch64_get_vec_u16 (cpu, vn, i)
6505 * aarch64_get_vec_u16 (cpu, vm, i)));
6509 for (i = 0; i < (full ? 4 : 2); i++)
6510 aarch64_set_vec_u32 (cpu, vd, i,
6511 aarch64_get_vec_u32 (cpu, vd, i)
6512 - (aarch64_get_vec_u32 (cpu, vn, i)
6513 * aarch64_get_vec_u32 (cpu, vm, i)));
6522 do_vec_FDIV (sim_cpu *cpu)
6525 instr [30] = half(0)/full(1)
6526 instr [29,23] = 10 1110 0
6527 instr [22] = float()/double(1)
6530 instr [15,10] = 1111 11
6532 instr [4, 0] = Vd. */
6534 unsigned full = INSTR (30, 30);
6535 unsigned vm = INSTR (20, 16);
6536 unsigned vn = INSTR (9, 5);
6537 unsigned vd = INSTR (4, 0);
6540 NYI_assert (29, 23, 0x5C);
6541 NYI_assert (21, 21, 1);
6542 NYI_assert (15, 10, 0x3F);
6544 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6550 for (i = 0; i < 2; i++)
6551 aarch64_set_vec_double (cpu, vd, i,
6552 aarch64_get_vec_double (cpu, vn, i)
6553 / aarch64_get_vec_double (cpu, vm, i));
6556 for (i = 0; i < (full ? 4 : 2); i++)
6557 aarch64_set_vec_float (cpu, vd, i,
6558 aarch64_get_vec_float (cpu, vn, i)
6559 / aarch64_get_vec_float (cpu, vm, i));
6563 do_vec_FMUL (sim_cpu *cpu)
6566 instr [30] = half(0)/full(1)
6567 instr [29,23] = 10 1110 0
6568 instr [22] = float(0)/double(1)
6571 instr [15,10] = 1101 11
6573 instr [4, 0] = Vd. */
6575 unsigned full = INSTR (30, 30);
6576 unsigned vm = INSTR (20, 16);
6577 unsigned vn = INSTR (9, 5);
6578 unsigned vd = INSTR (4, 0);
6581 NYI_assert (29, 23, 0x5C);
6582 NYI_assert (21, 21, 1);
6583 NYI_assert (15, 10, 0x37);
6585 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6591 for (i = 0; i < 2; i++)
6592 aarch64_set_vec_double (cpu, vd, i,
6593 aarch64_get_vec_double (cpu, vn, i)
6594 * aarch64_get_vec_double (cpu, vm, i));
6597 for (i = 0; i < (full ? 4 : 2); i++)
6598 aarch64_set_vec_float (cpu, vd, i,
6599 aarch64_get_vec_float (cpu, vn, i)
6600 * aarch64_get_vec_float (cpu, vm, i));
6604 do_vec_FADDP (sim_cpu *cpu)
6607 instr [30] = half(0)/full(1)
6608 instr [29,23] = 10 1110 0
6609 instr [22] = float(0)/double(1)
6612 instr [15,10] = 1101 01
6614 instr [4, 0] = Vd. */
6616 unsigned full = INSTR (30, 30);
6617 unsigned vm = INSTR (20, 16);
6618 unsigned vn = INSTR (9, 5);
6619 unsigned vd = INSTR (4, 0);
6621 NYI_assert (29, 23, 0x5C);
6622 NYI_assert (21, 21, 1);
6623 NYI_assert (15, 10, 0x35);
6625 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6628 /* Extract values before adding them incase vd == vn/vm. */
6629 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6630 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6631 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6632 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6637 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6638 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6642 /* Extract values before adding them incase vd == vn/vm. */
6643 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6644 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6645 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6646 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6650 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6651 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6652 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6653 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6655 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6656 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6657 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6658 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6662 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6663 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6669 do_vec_FSQRT (sim_cpu *cpu)
6672 instr[30] = half(0)/full(1)
6673 instr[29,23] = 10 1110 1
6674 instr[22] = single(0)/double(1)
6675 instr[21,10] = 10 0001 1111 10
6677 instr[4,0] = Vdest. */
6679 unsigned vn = INSTR (9, 5);
6680 unsigned vd = INSTR (4, 0);
6681 unsigned full = INSTR (30, 30);
6684 NYI_assert (29, 23, 0x5D);
6685 NYI_assert (21, 10, 0x87E);
6687 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6693 for (i = 0; i < 2; i++)
6694 aarch64_set_vec_double (cpu, vd, i,
6695 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6699 for (i = 0; i < (full ? 4 : 2); i++)
6700 aarch64_set_vec_float (cpu, vd, i,
6701 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6706 do_vec_FNEG (sim_cpu *cpu)
6709 instr[30] = half (0)/full (1)
6710 instr[29,23] = 10 1110 1
6711 instr[22] = single (0)/double (1)
6712 instr[21,10] = 10 0000 1111 10
6714 instr[4,0] = Vdest. */
6716 unsigned vn = INSTR (9, 5);
6717 unsigned vd = INSTR (4, 0);
6718 unsigned full = INSTR (30, 30);
6721 NYI_assert (29, 23, 0x5D);
6722 NYI_assert (21, 10, 0x83E);
6724 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6730 for (i = 0; i < 2; i++)
6731 aarch64_set_vec_double (cpu, vd, i,
6732 - aarch64_get_vec_double (cpu, vn, i));
6736 for (i = 0; i < (full ? 4 : 2); i++)
6737 aarch64_set_vec_float (cpu, vd, i,
6738 - aarch64_get_vec_float (cpu, vn, i));
6743 do_vec_NOT (sim_cpu *cpu)
6746 instr[30] = half (0)/full (1)
6747 instr[29,10] = 10 1110 0010 0000 0101 10
6751 unsigned vn = INSTR (9, 5);
6752 unsigned vd = INSTR (4, 0);
6754 int full = INSTR (30, 30);
6756 NYI_assert (29, 10, 0xB8816);
6758 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6759 for (i = 0; i < (full ? 16 : 8); i++)
6760 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6764 clz (uint64_t val, unsigned size)
6769 mask <<= (size - 1);
6784 do_vec_CLZ (sim_cpu *cpu)
6787 instr[30] = half (0)/full (1)
6788 instr[29,24] = 10 1110
6790 instr[21,10] = 10 0000 0100 10
6794 unsigned vn = INSTR (9, 5);
6795 unsigned vd = INSTR (4, 0);
6797 int full = INSTR (30,30);
6799 NYI_assert (29, 24, 0x2E);
6800 NYI_assert (21, 10, 0x812);
6802 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6803 switch (INSTR (23, 22))
6806 for (i = 0; i < (full ? 16 : 8); i++)
6807 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6810 for (i = 0; i < (full ? 8 : 4); i++)
6811 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6814 for (i = 0; i < (full ? 4 : 2); i++)
6815 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6820 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6821 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6827 do_vec_MOV_element (sim_cpu *cpu)
6829 /* instr[31,21] = 0110 1110 000
6830 instr[20,16] = size & dest index
6832 instr[14,11] = source index
6837 unsigned vs = INSTR (9, 5);
6838 unsigned vd = INSTR (4, 0);
6842 NYI_assert (31, 21, 0x370);
6843 NYI_assert (15, 15, 0);
6844 NYI_assert (10, 10, 1);
6846 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6850 src_index = INSTR (14, 11);
6851 dst_index = INSTR (20, 17);
6852 aarch64_set_vec_u8 (cpu, vd, dst_index,
6853 aarch64_get_vec_u8 (cpu, vs, src_index));
6855 else if (INSTR (17, 17))
6858 NYI_assert (11, 11, 0);
6859 src_index = INSTR (14, 12);
6860 dst_index = INSTR (20, 18);
6861 aarch64_set_vec_u16 (cpu, vd, dst_index,
6862 aarch64_get_vec_u16 (cpu, vs, src_index));
6864 else if (INSTR (18, 18))
6867 NYI_assert (12, 11, 0);
6868 src_index = INSTR (14, 13);
6869 dst_index = INSTR (20, 19);
6870 aarch64_set_vec_u32 (cpu, vd, dst_index,
6871 aarch64_get_vec_u32 (cpu, vs, src_index));
6875 NYI_assert (19, 19, 1);
6876 NYI_assert (13, 11, 0);
6877 src_index = INSTR (14, 14);
6878 dst_index = INSTR (20, 20);
6879 aarch64_set_vec_u64 (cpu, vd, dst_index,
6880 aarch64_get_vec_u64 (cpu, vs, src_index));
6885 do_vec_REV32 (sim_cpu *cpu)
6888 instr[30] = full/half
6889 instr[29,24] = 10 1110
6891 instr[21,10] = 10 0000 0000 10
6895 unsigned rn = INSTR (9, 5);
6896 unsigned rd = INSTR (4, 0);
6897 unsigned size = INSTR (23, 22);
6898 unsigned full = INSTR (30, 30);
6902 NYI_assert (29, 24, 0x2E);
6903 NYI_assert (21, 10, 0x802);
6905 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6909 for (i = 0; i < (full ? 16 : 8); i++)
6910 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6914 for (i = 0; i < (full ? 8 : 4); i++)
6915 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6922 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6924 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6928 do_vec_EXT (sim_cpu *cpu)
6931 instr[30] = full/half
6932 instr[29,21] = 10 1110 000
6935 instr[14,11] = source index
6940 unsigned vm = INSTR (20, 16);
6941 unsigned vn = INSTR (9, 5);
6942 unsigned vd = INSTR (4, 0);
6943 unsigned src_index = INSTR (14, 11);
6944 unsigned full = INSTR (30, 30);
6949 NYI_assert (31, 21, 0x370);
6950 NYI_assert (15, 15, 0);
6951 NYI_assert (10, 10, 0);
6953 if (!full && (src_index & 0x8))
6958 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6959 for (i = src_index; i < (full ? 16 : 8); i++)
6960 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6961 for (i = 0; i < src_index; i++)
6962 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6964 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6966 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6970 dexAdvSIMD0 (sim_cpu *cpu)
6972 /* instr [28,25] = 0 111. */
6973 if ( INSTR (15, 10) == 0x07
6977 if (INSTR (31, 21) == 0x075
6978 || INSTR (31, 21) == 0x275)
6980 do_vec_MOV_whole_vector (cpu);
6985 if (INSTR (29, 19) == 0x1E0)
6987 do_vec_MOV_immediate (cpu);
6991 if (INSTR (29, 19) == 0x5E0)
6997 if (INSTR (29, 19) == 0x1C0
6998 || INSTR (29, 19) == 0x1C1)
7000 if (INSTR (15, 10) == 0x03)
7002 do_vec_DUP_scalar_into_vector (cpu);
7007 switch (INSTR (29, 24))
7009 case 0x0E: do_vec_op1 (cpu); return;
7010 case 0x0F: do_vec_op2 (cpu); return;
7013 if (INSTR (21, 21) == 1)
7015 switch (INSTR (15, 10))
7022 switch (INSTR (23, 22))
7024 case 0: do_vec_EOR (cpu); return;
7025 case 1: do_vec_BSL (cpu); return;
7027 case 3: do_vec_bit (cpu); return;
7031 case 0x08: do_vec_sub_long (cpu); return;
7032 case 0x11: do_vec_USHL (cpu); return;
7033 case 0x12: do_vec_CLZ (cpu); return;
7034 case 0x16: do_vec_NOT (cpu); return;
7035 case 0x19: do_vec_max (cpu); return;
7036 case 0x1B: do_vec_min (cpu); return;
7037 case 0x21: do_vec_SUB (cpu); return;
7038 case 0x25: do_vec_MLS (cpu); return;
7039 case 0x31: do_vec_FminmaxNMP (cpu); return;
7040 case 0x35: do_vec_FADDP (cpu); return;
7041 case 0x37: do_vec_FMUL (cpu); return;
7042 case 0x3F: do_vec_FDIV (cpu); return;
7045 switch (INSTR (20, 16))
7047 case 0x00: do_vec_FNEG (cpu); return;
7048 case 0x01: do_vec_FSQRT (cpu); return;
7062 do_vec_compare (cpu); return;
7069 if (INSTR (31, 21) == 0x370)
7072 do_vec_MOV_element (cpu);
7078 switch (INSTR (21, 10))
7080 case 0x82E: do_vec_neg (cpu); return;
7081 case 0x87E: do_vec_sqrt (cpu); return;
7083 if (INSTR (15, 10) == 0x30)
7093 switch (INSTR (15, 10))
7095 case 0x01: do_vec_SSHR_USHR (cpu); return;
7097 case 0x12: do_vec_mls_indexed (cpu); return;
7098 case 0x29: do_vec_xtl (cpu); return;
7112 /* Float multiply add. */
7114 fmadds (sim_cpu *cpu)
7116 unsigned sa = INSTR (14, 10);
7117 unsigned sm = INSTR (20, 16);
7118 unsigned sn = INSTR ( 9, 5);
7119 unsigned sd = INSTR ( 4, 0);
7121 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7122 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7123 + aarch64_get_FP_float (cpu, sn)
7124 * aarch64_get_FP_float (cpu, sm));
7127 /* Double multiply add. */
7129 fmaddd (sim_cpu *cpu)
7131 unsigned sa = INSTR (14, 10);
7132 unsigned sm = INSTR (20, 16);
7133 unsigned sn = INSTR ( 9, 5);
7134 unsigned sd = INSTR ( 4, 0);
7136 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7137 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7138 + aarch64_get_FP_double (cpu, sn)
7139 * aarch64_get_FP_double (cpu, sm));
7142 /* Float multiply subtract. */
7144 fmsubs (sim_cpu *cpu)
7146 unsigned sa = INSTR (14, 10);
7147 unsigned sm = INSTR (20, 16);
7148 unsigned sn = INSTR ( 9, 5);
7149 unsigned sd = INSTR ( 4, 0);
7151 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7152 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7153 - aarch64_get_FP_float (cpu, sn)
7154 * aarch64_get_FP_float (cpu, sm));
7157 /* Double multiply subtract. */
7159 fmsubd (sim_cpu *cpu)
7161 unsigned sa = INSTR (14, 10);
7162 unsigned sm = INSTR (20, 16);
7163 unsigned sn = INSTR ( 9, 5);
7164 unsigned sd = INSTR ( 4, 0);
7166 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7167 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7168 - aarch64_get_FP_double (cpu, sn)
7169 * aarch64_get_FP_double (cpu, sm));
7172 /* Float negative multiply add. */
7174 fnmadds (sim_cpu *cpu)
7176 unsigned sa = INSTR (14, 10);
7177 unsigned sm = INSTR (20, 16);
7178 unsigned sn = INSTR ( 9, 5);
7179 unsigned sd = INSTR ( 4, 0);
7181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7182 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7183 + (- aarch64_get_FP_float (cpu, sn))
7184 * aarch64_get_FP_float (cpu, sm));
7187 /* Double negative multiply add. */
7189 fnmaddd (sim_cpu *cpu)
7191 unsigned sa = INSTR (14, 10);
7192 unsigned sm = INSTR (20, 16);
7193 unsigned sn = INSTR ( 9, 5);
7194 unsigned sd = INSTR ( 4, 0);
7196 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7197 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7198 + (- aarch64_get_FP_double (cpu, sn))
7199 * aarch64_get_FP_double (cpu, sm));
7202 /* Float negative multiply subtract. */
7204 fnmsubs (sim_cpu *cpu)
7206 unsigned sa = INSTR (14, 10);
7207 unsigned sm = INSTR (20, 16);
7208 unsigned sn = INSTR ( 9, 5);
7209 unsigned sd = INSTR ( 4, 0);
7211 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7212 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7213 + aarch64_get_FP_float (cpu, sn)
7214 * aarch64_get_FP_float (cpu, sm));
7217 /* Double negative multiply subtract. */
7219 fnmsubd (sim_cpu *cpu)
7221 unsigned sa = INSTR (14, 10);
7222 unsigned sm = INSTR (20, 16);
7223 unsigned sn = INSTR ( 9, 5);
7224 unsigned sd = INSTR ( 4, 0);
7226 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7227 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7228 + aarch64_get_FP_double (cpu, sn)
7229 * aarch64_get_FP_double (cpu, sm));
7233 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7235 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7237 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7240 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7241 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7242 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7244 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7245 /* dispatch on combined type:o1:o2. */
7246 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7253 case 0: fmadds (cpu); return;
7254 case 1: fmsubs (cpu); return;
7255 case 2: fnmadds (cpu); return;
7256 case 3: fnmsubs (cpu); return;
7257 case 4: fmaddd (cpu); return;
7258 case 5: fmsubd (cpu); return;
7259 case 6: fnmaddd (cpu); return;
7260 case 7: fnmsubd (cpu); return;
7262 /* type > 1 is currently unallocated. */
7268 dexSimpleFPFixedConvert (sim_cpu *cpu)
7274 dexSimpleFPCondCompare (sim_cpu *cpu)
7276 /* instr [31,23] = 0001 1110 0
7280 instr [15,12] = condition
7284 instr [3,0] = nzcv */
7286 unsigned rm = INSTR (20, 16);
7287 unsigned rn = INSTR (9, 5);
7289 NYI_assert (31, 23, 0x3C);
7290 NYI_assert (11, 10, 0x1);
7291 NYI_assert (4, 4, 0);
7293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7294 if (! testConditionCode (cpu, INSTR (15, 12)))
7296 aarch64_set_CPSR (cpu, INSTR (3, 0));
7302 /* Double precision. */
7303 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7304 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7306 /* FIXME: Check for NaNs. */
7308 aarch64_set_CPSR (cpu, (Z | C));
7309 else if (val1 < val2)
7310 aarch64_set_CPSR (cpu, N);
7311 else /* val1 > val2 */
7312 aarch64_set_CPSR (cpu, C);
7316 /* Single precision. */
7317 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7318 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7320 /* FIXME: Check for NaNs. */
7322 aarch64_set_CPSR (cpu, (Z | C));
7323 else if (val1 < val2)
7324 aarch64_set_CPSR (cpu, N);
7325 else /* val1 > val2 */
7326 aarch64_set_CPSR (cpu, C);
7334 fadds (sim_cpu *cpu)
7336 unsigned sm = INSTR (20, 16);
7337 unsigned sn = INSTR ( 9, 5);
7338 unsigned sd = INSTR ( 4, 0);
7340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7341 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7342 + aarch64_get_FP_float (cpu, sm));
7347 faddd (sim_cpu *cpu)
7349 unsigned sm = INSTR (20, 16);
7350 unsigned sn = INSTR ( 9, 5);
7351 unsigned sd = INSTR ( 4, 0);
7353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7354 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7355 + aarch64_get_FP_double (cpu, sm));
7360 fdivs (sim_cpu *cpu)
7362 unsigned sm = INSTR (20, 16);
7363 unsigned sn = INSTR ( 9, 5);
7364 unsigned sd = INSTR ( 4, 0);
7366 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7367 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7368 / aarch64_get_FP_float (cpu, sm));
7371 /* Double divide. */
7373 fdivd (sim_cpu *cpu)
7375 unsigned sm = INSTR (20, 16);
7376 unsigned sn = INSTR ( 9, 5);
7377 unsigned sd = INSTR ( 4, 0);
7379 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7380 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7381 / aarch64_get_FP_double (cpu, sm));
7384 /* Float multiply. */
7386 fmuls (sim_cpu *cpu)
7388 unsigned sm = INSTR (20, 16);
7389 unsigned sn = INSTR ( 9, 5);
7390 unsigned sd = INSTR ( 4, 0);
7392 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7393 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7394 * aarch64_get_FP_float (cpu, sm));
7397 /* Double multiply. */
7399 fmuld (sim_cpu *cpu)
7401 unsigned sm = INSTR (20, 16);
7402 unsigned sn = INSTR ( 9, 5);
7403 unsigned sd = INSTR ( 4, 0);
7405 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7406 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7407 * aarch64_get_FP_double (cpu, sm));
7410 /* Float negate and multiply. */
7412 fnmuls (sim_cpu *cpu)
7414 unsigned sm = INSTR (20, 16);
7415 unsigned sn = INSTR ( 9, 5);
7416 unsigned sd = INSTR ( 4, 0);
7418 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7419 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7420 * aarch64_get_FP_float (cpu, sm)));
7423 /* Double negate and multiply. */
7425 fnmuld (sim_cpu *cpu)
7427 unsigned sm = INSTR (20, 16);
7428 unsigned sn = INSTR ( 9, 5);
7429 unsigned sd = INSTR ( 4, 0);
7431 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7432 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7433 * aarch64_get_FP_double (cpu, sm)));
7436 /* Float subtract. */
7438 fsubs (sim_cpu *cpu)
7440 unsigned sm = INSTR (20, 16);
7441 unsigned sn = INSTR ( 9, 5);
7442 unsigned sd = INSTR ( 4, 0);
7444 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7445 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7446 - aarch64_get_FP_float (cpu, sm));
7449 /* Double subtract. */
7451 fsubd (sim_cpu *cpu)
7453 unsigned sm = INSTR (20, 16);
7454 unsigned sn = INSTR ( 9, 5);
7455 unsigned sd = INSTR ( 4, 0);
7457 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7458 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7459 - aarch64_get_FP_double (cpu, sm));
7463 do_FMINNM (sim_cpu *cpu)
7465 /* instr[31,23] = 0 0011 1100
7466 instr[22] = float(0)/double(1)
7469 instr[15,10] = 01 1110
7473 unsigned sm = INSTR (20, 16);
7474 unsigned sn = INSTR ( 9, 5);
7475 unsigned sd = INSTR ( 4, 0);
7477 NYI_assert (31, 23, 0x03C);
7478 NYI_assert (15, 10, 0x1E);
7480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7482 aarch64_set_FP_double (cpu, sd,
7483 dminnm (aarch64_get_FP_double (cpu, sn),
7484 aarch64_get_FP_double (cpu, sm)));
7486 aarch64_set_FP_float (cpu, sd,
7487 fminnm (aarch64_get_FP_float (cpu, sn),
7488 aarch64_get_FP_float (cpu, sm)));
7492 do_FMAXNM (sim_cpu *cpu)
7494 /* instr[31,23] = 0 0011 1100
7495 instr[22] = float(0)/double(1)
7498 instr[15,10] = 01 1010
7502 unsigned sm = INSTR (20, 16);
7503 unsigned sn = INSTR ( 9, 5);
7504 unsigned sd = INSTR ( 4, 0);
7506 NYI_assert (31, 23, 0x03C);
7507 NYI_assert (15, 10, 0x1A);
7509 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7511 aarch64_set_FP_double (cpu, sd,
7512 dmaxnm (aarch64_get_FP_double (cpu, sn),
7513 aarch64_get_FP_double (cpu, sm)));
7515 aarch64_set_FP_float (cpu, sd,
7516 fmaxnm (aarch64_get_FP_float (cpu, sn),
7517 aarch64_get_FP_float (cpu, sm)));
7521 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7523 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7525 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7528 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7531 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7532 0010 ==> FADD, 0011 ==> FSUB,
7533 0100 ==> FMAX, 0101 ==> FMIN
7534 0110 ==> FMAXNM, 0111 ==> FMINNM
7535 1000 ==> FNMUL, ow ==> UNALLOC
7540 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7541 uint32_t type = INSTR (23, 22);
7542 /* Dispatch on opcode. */
7543 uint32_t dispatch = INSTR (15, 12);
7554 case 0: fmuld (cpu); return;
7555 case 1: fdivd (cpu); return;
7556 case 2: faddd (cpu); return;
7557 case 3: fsubd (cpu); return;
7558 case 6: do_FMAXNM (cpu); return;
7559 case 7: do_FMINNM (cpu); return;
7560 case 8: fnmuld (cpu); return;
7562 /* Have not yet implemented fmax and fmin. */
7570 else /* type == 0 => floats. */
7573 case 0: fmuls (cpu); return;
7574 case 1: fdivs (cpu); return;
7575 case 2: fadds (cpu); return;
7576 case 3: fsubs (cpu); return;
7577 case 6: do_FMAXNM (cpu); return;
7578 case 7: do_FMINNM (cpu); return;
7579 case 8: fnmuls (cpu); return;
7591 dexSimpleFPCondSelect (sim_cpu *cpu)
7594 instr[31,23] = 0 0011 1100
7595 instr[22] = 0=>single 1=>double
7602 unsigned sm = INSTR (20, 16);
7603 unsigned sn = INSTR ( 9, 5);
7604 unsigned sd = INSTR ( 4, 0);
7605 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7607 NYI_assert (31, 23, 0x03C);
7608 NYI_assert (11, 10, 0x3);
7610 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7612 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7613 : aarch64_get_FP_double (cpu, sm)));
7615 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7616 : aarch64_get_FP_float (cpu, sm)));
7619 /* Store 32 bit unscaled signed 9 bit. */
7621 fsturs (sim_cpu *cpu, int32_t offset)
7623 unsigned int rn = INSTR (9, 5);
7624 unsigned int st = INSTR (4, 0);
7626 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7627 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7628 aarch64_get_vec_u32 (cpu, st, 0));
7631 /* Store 64 bit unscaled signed 9 bit. */
7633 fsturd (sim_cpu *cpu, int32_t offset)
7635 unsigned int rn = INSTR (9, 5);
7636 unsigned int st = INSTR (4, 0);
7638 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7639 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7640 aarch64_get_vec_u64 (cpu, st, 0));
7643 /* Store 128 bit unscaled signed 9 bit. */
7645 fsturq (sim_cpu *cpu, int32_t offset)
7647 unsigned int rn = INSTR (9, 5);
7648 unsigned int st = INSTR (4, 0);
7651 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7652 aarch64_get_FP_long_double (cpu, st, & a);
7653 aarch64_set_mem_long_double (cpu,
7654 aarch64_get_reg_u64 (cpu, rn, 1)
7658 /* TODO FP move register. */
7660 /* 32 bit fp to fp move register. */
7662 ffmovs (sim_cpu *cpu)
7664 unsigned int rn = INSTR (9, 5);
7665 unsigned int st = INSTR (4, 0);
7667 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7668 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7671 /* 64 bit fp to fp move register. */
7673 ffmovd (sim_cpu *cpu)
7675 unsigned int rn = INSTR (9, 5);
7676 unsigned int st = INSTR (4, 0);
7678 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7679 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7682 /* 32 bit GReg to Vec move register. */
7684 fgmovs (sim_cpu *cpu)
7686 unsigned int rn = INSTR (9, 5);
7687 unsigned int st = INSTR (4, 0);
7689 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7690 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7693 /* 64 bit g to fp move register. */
7695 fgmovd (sim_cpu *cpu)
7697 unsigned int rn = INSTR (9, 5);
7698 unsigned int st = INSTR (4, 0);
7700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7701 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7704 /* 32 bit fp to g move register. */
7706 gfmovs (sim_cpu *cpu)
7708 unsigned int rn = INSTR (9, 5);
7709 unsigned int st = INSTR (4, 0);
7711 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7712 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7715 /* 64 bit fp to g move register. */
7717 gfmovd (sim_cpu *cpu)
7719 unsigned int rn = INSTR (9, 5);
7720 unsigned int st = INSTR (4, 0);
7722 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7723 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7726 /* FP move immediate
7728 These install an immediate 8 bit value in the target register
7729 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7733 fmovs (sim_cpu *cpu)
7735 unsigned int sd = INSTR (4, 0);
7736 uint32_t imm = INSTR (20, 13);
7737 float f = fp_immediate_for_encoding_32 (imm);
7739 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7740 aarch64_set_FP_float (cpu, sd, f);
7744 fmovd (sim_cpu *cpu)
7746 unsigned int sd = INSTR (4, 0);
7747 uint32_t imm = INSTR (20, 13);
7748 double d = fp_immediate_for_encoding_64 (imm);
7750 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7751 aarch64_set_FP_double (cpu, sd, d);
7755 dexSimpleFPImmediate (sim_cpu *cpu)
7757 /* instr[31,23] == 00111100
7758 instr[22] == type : single(0)/double(1)
7760 instr[20,13] == imm8
7762 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7764 uint32_t imm5 = INSTR (9, 5);
7766 NYI_assert (31, 23, 0x3C);
7777 /* TODO specific decode and execute for group Load Store. */
7779 /* TODO FP load/store single register (unscaled offset). */
7781 /* TODO load 8 bit unscaled signed 9 bit. */
7782 /* TODO load 16 bit unscaled signed 9 bit. */
7784 /* Load 32 bit unscaled signed 9 bit. */
7786 fldurs (sim_cpu *cpu, int32_t offset)
7788 unsigned int rn = INSTR (9, 5);
7789 unsigned int st = INSTR (4, 0);
7791 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7792 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7793 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7796 /* Load 64 bit unscaled signed 9 bit. */
7798 fldurd (sim_cpu *cpu, int32_t offset)
7800 unsigned int rn = INSTR (9, 5);
7801 unsigned int st = INSTR (4, 0);
7803 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7804 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7805 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7808 /* Load 128 bit unscaled signed 9 bit. */
7810 fldurq (sim_cpu *cpu, int32_t offset)
7812 unsigned int rn = INSTR (9, 5);
7813 unsigned int st = INSTR (4, 0);
7815 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7817 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7818 aarch64_get_mem_long_double (cpu, addr, & a);
7819 aarch64_set_FP_long_double (cpu, st, a);
7822 /* TODO store 8 bit unscaled signed 9 bit. */
7823 /* TODO store 16 bit unscaled signed 9 bit. */
7828 /* Float absolute value. */
7830 fabss (sim_cpu *cpu)
7832 unsigned sn = INSTR (9, 5);
7833 unsigned sd = INSTR (4, 0);
7834 float value = aarch64_get_FP_float (cpu, sn);
7836 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7837 aarch64_set_FP_float (cpu, sd, fabsf (value));
7840 /* Double absolute value. */
7842 fabcpu (sim_cpu *cpu)
7844 unsigned sn = INSTR (9, 5);
7845 unsigned sd = INSTR (4, 0);
7846 double value = aarch64_get_FP_double (cpu, sn);
7848 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7849 aarch64_set_FP_double (cpu, sd, fabs (value));
7852 /* Float negative value. */
7854 fnegs (sim_cpu *cpu)
7856 unsigned sn = INSTR (9, 5);
7857 unsigned sd = INSTR (4, 0);
7859 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7860 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7863 /* Double negative value. */
7865 fnegd (sim_cpu *cpu)
7867 unsigned sn = INSTR (9, 5);
7868 unsigned sd = INSTR (4, 0);
7870 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7871 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7874 /* Float square root. */
7876 fsqrts (sim_cpu *cpu)
7878 unsigned sn = INSTR (9, 5);
7879 unsigned sd = INSTR (4, 0);
7881 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7882 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7885 /* Double square root. */
7887 fsqrtd (sim_cpu *cpu)
7889 unsigned sn = INSTR (9, 5);
7890 unsigned sd = INSTR (4, 0);
7892 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7893 aarch64_set_FP_double (cpu, sd,
7894 sqrt (aarch64_get_FP_double (cpu, sn)));
7897 /* Convert double to float. */
7899 fcvtds (sim_cpu *cpu)
7901 unsigned sn = INSTR (9, 5);
7902 unsigned sd = INSTR (4, 0);
7904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7905 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7908 /* Convert float to double. */
7910 fcvtcpu (sim_cpu *cpu)
7912 unsigned sn = INSTR (9, 5);
7913 unsigned sd = INSTR (4, 0);
7915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7916 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7920 do_FRINT (sim_cpu *cpu)
7922 /* instr[31,23] = 0001 1110 0
7923 instr[22] = single(0)/double(1)
7925 instr[17,15] = rounding mode
7926 instr[14,10] = 10000
7928 instr[4,0] = dest */
7931 unsigned rs = INSTR (9, 5);
7932 unsigned rd = INSTR (4, 0);
7933 unsigned int rmode = INSTR (17, 15);
7935 NYI_assert (31, 23, 0x03C);
7936 NYI_assert (21, 18, 0x9);
7937 NYI_assert (14, 10, 0x10);
7939 if (rmode == 6 || rmode == 7)
7940 /* FIXME: Add support for rmode == 6 exactness check. */
7941 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7943 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7946 double val = aarch64_get_FP_double (cpu, rs);
7950 case 0: /* mode N: nearest or even. */
7952 double rval = round (val);
7954 if (val - rval == 0.5)
7956 if (((rval / 2.0) * 2.0) != rval)
7960 aarch64_set_FP_double (cpu, rd, round (val));
7964 case 1: /* mode P: towards +inf. */
7966 aarch64_set_FP_double (cpu, rd, trunc (val));
7968 aarch64_set_FP_double (cpu, rd, round (val));
7971 case 2: /* mode M: towards -inf. */
7973 aarch64_set_FP_double (cpu, rd, round (val));
7975 aarch64_set_FP_double (cpu, rd, trunc (val));
7978 case 3: /* mode Z: towards 0. */
7979 aarch64_set_FP_double (cpu, rd, trunc (val));
7982 case 4: /* mode A: away from 0. */
7983 aarch64_set_FP_double (cpu, rd, round (val));
7986 case 6: /* mode X: use FPCR with exactness check. */
7987 case 7: /* mode I: use FPCR mode. */
7995 val = aarch64_get_FP_float (cpu, rs);
7999 case 0: /* mode N: nearest or even. */
8001 float rval = roundf (val);
8003 if (val - rval == 0.5)
8005 if (((rval / 2.0) * 2.0) != rval)
8009 aarch64_set_FP_float (cpu, rd, rval);
8013 case 1: /* mode P: towards +inf. */
8015 aarch64_set_FP_float (cpu, rd, truncf (val));
8017 aarch64_set_FP_float (cpu, rd, roundf (val));
8020 case 2: /* mode M: towards -inf. */
8022 aarch64_set_FP_float (cpu, rd, truncf (val));
8024 aarch64_set_FP_float (cpu, rd, roundf (val));
8027 case 3: /* mode Z: towards 0. */
8028 aarch64_set_FP_float (cpu, rd, truncf (val));
8031 case 4: /* mode A: away from 0. */
8032 aarch64_set_FP_float (cpu, rd, roundf (val));
8035 case 6: /* mode X: use FPCR with exactness check. */
8036 case 7: /* mode I: use FPCR mode. */
8044 /* Convert half to float. */
8046 do_FCVT_half_to_single (sim_cpu *cpu)
8048 unsigned rn = INSTR (9, 5);
8049 unsigned rd = INSTR (4, 0);
8051 NYI_assert (31, 10, 0x7B890);
8053 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8054 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
8057 /* Convert half to double. */
8059 do_FCVT_half_to_double (sim_cpu *cpu)
8061 unsigned rn = INSTR (9, 5);
8062 unsigned rd = INSTR (4, 0);
8064 NYI_assert (31, 10, 0x7B8B0);
8066 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8067 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
8071 do_FCVT_single_to_half (sim_cpu *cpu)
8073 unsigned rn = INSTR (9, 5);
8074 unsigned rd = INSTR (4, 0);
8076 NYI_assert (31, 10, 0x788F0);
8078 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8079 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
8082 /* Convert double to half. */
8084 do_FCVT_double_to_half (sim_cpu *cpu)
8086 unsigned rn = INSTR (9, 5);
8087 unsigned rd = INSTR (4, 0);
8089 NYI_assert (31, 10, 0x798F0);
8091 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8092 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
8096 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8098 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
8100 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8103 instr[23,22] ==> type : 00 ==> source is single,
8104 01 ==> source is double
8106 11 ==> UNALLOC or source is half
8108 instr[20,15] ==> opcode : with type 00 or 01
8109 000000 ==> FMOV, 000001 ==> FABS,
8110 000010 ==> FNEG, 000011 ==> FSQRT,
8111 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8112 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8113 001000 ==> FRINTN, 001001 ==> FRINTP,
8114 001010 ==> FRINTM, 001011 ==> FRINTZ,
8115 001100 ==> FRINTA, 001101 ==> UNALLOC
8116 001110 ==> FRINTX, 001111 ==> FRINTI
8118 000100 ==> FCVT (half-to-single)
8119 000101 ==> FCVT (half-to-double)
8120 instr[14,10] = 10000. */
8122 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8123 uint32_t type = INSTR (23, 22);
8124 uint32_t opcode = INSTR (20, 15);
8132 do_FCVT_half_to_single (cpu);
8133 else if (opcode == 5)
8134 do_FCVT_half_to_double (cpu);
8186 case 8: /* FRINTN etc. */
8198 do_FCVT_double_to_half (cpu);
8200 do_FCVT_single_to_half (cpu);
8211 /* 32 bit signed int to float. */
8213 scvtf32 (sim_cpu *cpu)
8215 unsigned rn = INSTR (9, 5);
8216 unsigned sd = INSTR (4, 0);
8218 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8219 aarch64_set_FP_float
8220 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8223 /* signed int to float. */
8225 scvtf (sim_cpu *cpu)
8227 unsigned rn = INSTR (9, 5);
8228 unsigned sd = INSTR (4, 0);
8230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8231 aarch64_set_FP_float
8232 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8235 /* 32 bit signed int to double. */
8237 scvtd32 (sim_cpu *cpu)
8239 unsigned rn = INSTR (9, 5);
8240 unsigned sd = INSTR (4, 0);
8242 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8243 aarch64_set_FP_double
8244 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8247 /* signed int to double. */
8249 scvtd (sim_cpu *cpu)
8251 unsigned rn = INSTR (9, 5);
8252 unsigned sd = INSTR (4, 0);
8254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8255 aarch64_set_FP_double
8256 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8259 static const float FLOAT_INT_MAX = (float) INT_MAX;
8260 static const float FLOAT_INT_MIN = (float) INT_MIN;
8261 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8262 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8263 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8264 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8265 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8266 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8270 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8271 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8272 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8273 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8274 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8275 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8276 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8277 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8279 /* Check for FP exception conditions:
8282 Out of Range raises IO and IX and saturates value
8283 Denormal raises ID and IX and sets to zero. */
8284 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8287 switch (fpclassify (F)) \
8291 aarch64_set_FPSR (cpu, IO); \
8293 VALUE = ITYPE##_MAX; \
8295 VALUE = ITYPE##_MIN; \
8299 if (F >= FTYPE##_##ITYPE##_MAX) \
8301 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8302 VALUE = ITYPE##_MAX; \
8304 else if (F <= FTYPE##_##ITYPE##_MIN) \
8306 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8307 VALUE = ITYPE##_MIN; \
8311 case FP_SUBNORMAL: \
8312 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8324 /* 32 bit convert float to signed int truncate towards zero. */
8326 fcvtszs32 (sim_cpu *cpu)
8328 unsigned sn = INSTR (9, 5);
8329 unsigned rd = INSTR (4, 0);
8330 /* TODO : check that this rounds toward zero. */
8331 float f = aarch64_get_FP_float (cpu, sn);
8332 int32_t value = (int32_t) f;
8334 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8336 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8337 /* Avoid sign extension to 64 bit. */
8338 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8341 /* 64 bit convert float to signed int truncate towards zero. */
8343 fcvtszs (sim_cpu *cpu)
8345 unsigned sn = INSTR (9, 5);
8346 unsigned rd = INSTR (4, 0);
8347 float f = aarch64_get_FP_float (cpu, sn);
8348 int64_t value = (int64_t) f;
8350 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8352 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8353 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8356 /* 32 bit convert double to signed int truncate towards zero. */
8358 fcvtszd32 (sim_cpu *cpu)
8360 unsigned sn = INSTR (9, 5);
8361 unsigned rd = INSTR (4, 0);
8362 /* TODO : check that this rounds toward zero. */
8363 double d = aarch64_get_FP_double (cpu, sn);
8364 int32_t value = (int32_t) d;
8366 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8368 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8369 /* Avoid sign extension to 64 bit. */
8370 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8373 /* 64 bit convert double to signed int truncate towards zero. */
8375 fcvtszd (sim_cpu *cpu)
8377 unsigned sn = INSTR (9, 5);
8378 unsigned rd = INSTR (4, 0);
8379 /* TODO : check that this rounds toward zero. */
8380 double d = aarch64_get_FP_double (cpu, sn);
8383 value = (int64_t) d;
8385 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8388 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8392 do_fcvtzu (sim_cpu *cpu)
8394 /* instr[31] = size: 32-bit (0), 64-bit (1)
8395 instr[30,23] = 00111100
8396 instr[22] = type: single (0)/ double (1)
8397 instr[21] = enable (0)/disable(1) precision
8398 instr[20,16] = 11001
8399 instr[15,10] = precision
8403 unsigned rs = INSTR (9, 5);
8404 unsigned rd = INSTR (4, 0);
8406 NYI_assert (30, 23, 0x3C);
8407 NYI_assert (20, 16, 0x19);
8409 if (INSTR (21, 21) != 1)
8410 /* Convert to fixed point. */
8413 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8416 /* Convert to unsigned 64-bit integer. */
8419 double d = aarch64_get_FP_double (cpu, rs);
8420 uint64_t value = (uint64_t) d;
8422 /* Do not raise an exception if we have reached ULONG_MAX. */
8423 if (value != (1UL << 63))
8424 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8426 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8430 float f = aarch64_get_FP_float (cpu, rs);
8431 uint64_t value = (uint64_t) f;
8433 /* Do not raise an exception if we have reached ULONG_MAX. */
8434 if (value != (1UL << 63))
8435 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8437 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8444 /* Convert to unsigned 32-bit integer. */
8447 double d = aarch64_get_FP_double (cpu, rs);
8449 value = (uint32_t) d;
8450 /* Do not raise an exception if we have reached UINT_MAX. */
8451 if (value != (1UL << 31))
8452 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8456 float f = aarch64_get_FP_float (cpu, rs);
8458 value = (uint32_t) f;
8459 /* Do not raise an exception if we have reached UINT_MAX. */
8460 if (value != (1UL << 31))
8461 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8464 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8469 do_UCVTF (sim_cpu *cpu)
8471 /* instr[31] = size: 32-bit (0), 64-bit (1)
8472 instr[30,23] = 001 1110 0
8473 instr[22] = type: single (0)/ double (1)
8474 instr[21] = enable (0)/disable(1) precision
8475 instr[20,16] = 0 0011
8476 instr[15,10] = precision
8480 unsigned rs = INSTR (9, 5);
8481 unsigned rd = INSTR (4, 0);
8483 NYI_assert (30, 23, 0x3C);
8484 NYI_assert (20, 16, 0x03);
8486 if (INSTR (21, 21) != 1)
8489 /* FIXME: Add exception raising. */
8490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8493 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8496 aarch64_set_FP_double (cpu, rd, (double) value);
8498 aarch64_set_FP_float (cpu, rd, (float) value);
8502 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8505 aarch64_set_FP_double (cpu, rd, (double) value);
8507 aarch64_set_FP_float (cpu, rd, (float) value);
8512 float_vector_move (sim_cpu *cpu)
8514 /* instr[31,17] == 100 1111 0101 0111
8515 instr[16] ==> direction 0=> to GR, 1=> from GR
8517 instr[9,5] ==> source
8518 instr[4,0] ==> dest. */
8520 unsigned rn = INSTR (9, 5);
8521 unsigned rd = INSTR (4, 0);
8523 NYI_assert (31, 17, 0x4F57);
8525 if (INSTR (15, 10) != 0)
8528 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8530 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8532 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8536 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8538 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8540 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8543 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8545 instr[20,19] = rmode
8546 instr[18,16] = opcode
8547 instr[15,10] = 10 0000 */
8549 uint32_t rmode_opcode;
8555 if (INSTR (31, 17) == 0x4F57)
8557 float_vector_move (cpu);
8561 size = INSTR (31, 31);
8566 type = INSTR (23, 22);
8570 rmode_opcode = INSTR (20, 16);
8571 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8573 switch (rmode_opcode)
8575 case 2: /* SCVTF. */
8578 case 0: scvtf32 (cpu); return;
8579 case 1: scvtd32 (cpu); return;
8580 case 2: scvtf (cpu); return;
8581 case 3: scvtd (cpu); return;
8584 case 6: /* FMOV GR, Vec. */
8587 case 0: gfmovs (cpu); return;
8588 case 3: gfmovd (cpu); return;
8589 default: HALT_UNALLOC;
8592 case 7: /* FMOV vec, GR. */
8595 case 0: fgmovs (cpu); return;
8596 case 3: fgmovd (cpu); return;
8597 default: HALT_UNALLOC;
8600 case 24: /* FCVTZS. */
8603 case 0: fcvtszs32 (cpu); return;
8604 case 1: fcvtszd32 (cpu); return;
8605 case 2: fcvtszs (cpu); return;
8606 case 3: fcvtszd (cpu); return;
8609 case 25: do_fcvtzu (cpu); return;
8610 case 3: do_UCVTF (cpu); return;
8612 case 0: /* FCVTNS. */
8613 case 1: /* FCVTNU. */
8614 case 4: /* FCVTAS. */
8615 case 5: /* FCVTAU. */
8616 case 8: /* FCVPTS. */
8617 case 9: /* FCVTPU. */
8618 case 16: /* FCVTMS. */
8619 case 17: /* FCVTMU. */
8626 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8630 /* FIXME: Add exception raising. */
8631 if (isnan (fvalue1) || isnan (fvalue2))
8633 else if (isinf (fvalue1) && isinf (fvalue2))
8635 /* Subtracting two infinities may give a NaN. We only need to compare
8636 the signs, which we can get from isinf. */
8637 int result = isinf (fvalue1) - isinf (fvalue2);
8641 else if (result < 0)
8643 else /* (result > 0). */
8648 float result = fvalue1 - fvalue2;
8652 else if (result < 0)
8654 else /* (result > 0). */
8658 aarch64_set_CPSR (cpu, flags);
8662 fcmps (sim_cpu *cpu)
8664 unsigned sm = INSTR (20, 16);
8665 unsigned sn = INSTR ( 9, 5);
8667 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8668 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8670 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8671 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8674 /* Float compare to zero -- Invalid Operation exception
8675 only on signaling NaNs. */
8677 fcmpzs (sim_cpu *cpu)
8679 unsigned sn = INSTR ( 9, 5);
8680 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8682 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8683 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8686 /* Float compare -- Invalid Operation exception on all NaNs. */
8688 fcmpes (sim_cpu *cpu)
8690 unsigned sm = INSTR (20, 16);
8691 unsigned sn = INSTR ( 9, 5);
8693 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8694 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8697 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8700 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8702 fcmpzes (sim_cpu *cpu)
8704 unsigned sn = INSTR ( 9, 5);
8705 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8707 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8708 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8712 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8716 /* FIXME: Add exception raising. */
8717 if (isnan (dval1) || isnan (dval2))
8719 else if (isinf (dval1) && isinf (dval2))
8721 /* Subtracting two infinities may give a NaN. We only need to compare
8722 the signs, which we can get from isinf. */
8723 int result = isinf (dval1) - isinf (dval2);
8727 else if (result < 0)
8729 else /* (result > 0). */
8734 double result = dval1 - dval2;
8738 else if (result < 0)
8740 else /* (result > 0). */
8744 aarch64_set_CPSR (cpu, flags);
8747 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8749 fcmpd (sim_cpu *cpu)
8751 unsigned sm = INSTR (20, 16);
8752 unsigned sn = INSTR ( 9, 5);
8754 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8755 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8757 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8758 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8761 /* Double compare to zero -- Invalid Operation exception
8762 only on signaling NaNs. */
8764 fcmpzd (sim_cpu *cpu)
8766 unsigned sn = INSTR ( 9, 5);
8767 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8769 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8770 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8773 /* Double compare -- Invalid Operation exception on all NaNs. */
8775 fcmped (sim_cpu *cpu)
8777 unsigned sm = INSTR (20, 16);
8778 unsigned sn = INSTR ( 9, 5);
8780 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8781 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8783 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8784 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8787 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8789 fcmpzed (sim_cpu *cpu)
8791 unsigned sn = INSTR ( 9, 5);
8792 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8794 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8795 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8799 dexSimpleFPCompare (sim_cpu *cpu)
8801 /* assert instr[28,25] == 1111
8802 instr[30:24:21:13,10] = 0011000
8803 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8804 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8805 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8806 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8807 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8808 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8811 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8812 uint32_t type = INSTR (23, 22);
8813 uint32_t op = INSTR (15, 14);
8814 uint32_t op2_2_0 = INSTR (2, 0);
8828 /* dispatch on type and top 2 bits of opcode. */
8829 dispatch = (type << 2) | INSTR (4, 3);
8833 case 0: fcmps (cpu); return;
8834 case 1: fcmpzs (cpu); return;
8835 case 2: fcmpes (cpu); return;
8836 case 3: fcmpzes (cpu); return;
8837 case 4: fcmpd (cpu); return;
8838 case 5: fcmpzd (cpu); return;
8839 case 6: fcmped (cpu); return;
8840 case 7: fcmpzed (cpu); return;
8845 do_scalar_FADDP (sim_cpu *cpu)
8847 /* instr [31,23] = 0111 1110 0
8848 instr [22] = single(0)/double(1)
8849 instr [21,10] = 11 0000 1101 10
8851 instr [4,0] = Fd. */
8853 unsigned Fn = INSTR (9, 5);
8854 unsigned Fd = INSTR (4, 0);
8856 NYI_assert (31, 23, 0x0FC);
8857 NYI_assert (21, 10, 0xC36);
8859 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8862 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8863 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8865 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8869 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8870 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8872 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8876 /* Floating point absolute difference. */
8879 do_scalar_FABD (sim_cpu *cpu)
8881 /* instr [31,23] = 0111 1110 1
8882 instr [22] = float(0)/double(1)
8885 instr [15,10] = 1101 01
8887 instr [4, 0] = Rd. */
8889 unsigned rm = INSTR (20, 16);
8890 unsigned rn = INSTR (9, 5);
8891 unsigned rd = INSTR (4, 0);
8893 NYI_assert (31, 23, 0x0FD);
8894 NYI_assert (21, 21, 1);
8895 NYI_assert (15, 10, 0x35);
8897 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8899 aarch64_set_FP_double (cpu, rd,
8900 fabs (aarch64_get_FP_double (cpu, rn)
8901 - aarch64_get_FP_double (cpu, rm)));
8903 aarch64_set_FP_float (cpu, rd,
8904 fabsf (aarch64_get_FP_float (cpu, rn)
8905 - aarch64_get_FP_float (cpu, rm)));
8909 do_scalar_CMGT (sim_cpu *cpu)
8911 /* instr [31,21] = 0101 1110 111
8913 instr [15,10] = 00 1101
8915 instr [4, 0] = Rd. */
8917 unsigned rm = INSTR (20, 16);
8918 unsigned rn = INSTR (9, 5);
8919 unsigned rd = INSTR (4, 0);
8921 NYI_assert (31, 21, 0x2F7);
8922 NYI_assert (15, 10, 0x0D);
8924 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8925 aarch64_set_vec_u64 (cpu, rd, 0,
8926 aarch64_get_vec_u64 (cpu, rn, 0) >
8927 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8931 do_scalar_USHR (sim_cpu *cpu)
8933 /* instr [31,23] = 0111 1111 0
8934 instr [22,16] = shift amount
8935 instr [15,10] = 0000 01
8937 instr [4, 0] = Rd. */
8939 unsigned amount = 128 - INSTR (22, 16);
8940 unsigned rn = INSTR (9, 5);
8941 unsigned rd = INSTR (4, 0);
8943 NYI_assert (31, 23, 0x0FE);
8944 NYI_assert (15, 10, 0x01);
8946 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8947 aarch64_set_vec_u64 (cpu, rd, 0,
8948 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8952 do_scalar_SSHL (sim_cpu *cpu)
8954 /* instr [31,21] = 0101 1110 111
8956 instr [15,10] = 0100 01
8958 instr [4, 0] = Rd. */
8960 unsigned rm = INSTR (20, 16);
8961 unsigned rn = INSTR (9, 5);
8962 unsigned rd = INSTR (4, 0);
8963 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8965 NYI_assert (31, 21, 0x2F7);
8966 NYI_assert (15, 10, 0x11);
8968 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8970 aarch64_set_vec_s64 (cpu, rd, 0,
8971 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8973 aarch64_set_vec_s64 (cpu, rd, 0,
8974 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8977 /* Floating point scalar compare greater than or equal to 0. */
8979 do_scalar_FCMGE_zero (sim_cpu *cpu)
8981 /* instr [31,23] = 0111 1110 1
8982 instr [22,22] = size
8983 instr [21,16] = 1000 00
8984 instr [15,10] = 1100 10
8986 instr [4, 0] = Rd. */
8988 unsigned size = INSTR (22, 22);
8989 unsigned rn = INSTR (9, 5);
8990 unsigned rd = INSTR (4, 0);
8992 NYI_assert (31, 23, 0x0FD);
8993 NYI_assert (21, 16, 0x20);
8994 NYI_assert (15, 10, 0x32);
8996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8998 aarch64_set_vec_u64 (cpu, rd, 0,
8999 aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
9001 aarch64_set_vec_u32 (cpu, rd, 0,
9002 aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
9005 /* Floating point scalar compare less than or equal to 0. */
9007 do_scalar_FCMLE_zero (sim_cpu *cpu)
9009 /* instr [31,23] = 0111 1110 1
9010 instr [22,22] = size
9011 instr [21,16] = 1000 00
9012 instr [15,10] = 1101 10
9014 instr [4, 0] = Rd. */
9016 unsigned size = INSTR (22, 22);
9017 unsigned rn = INSTR (9, 5);
9018 unsigned rd = INSTR (4, 0);
9020 NYI_assert (31, 23, 0x0FD);
9021 NYI_assert (21, 16, 0x20);
9022 NYI_assert (15, 10, 0x36);
9024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9026 aarch64_set_vec_u64 (cpu, rd, 0,
9027 aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9029 aarch64_set_vec_u32 (cpu, rd, 0,
9030 aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9033 /* Floating point scalar compare greater than 0. */
9035 do_scalar_FCMGT_zero (sim_cpu *cpu)
9037 /* instr [31,23] = 0101 1110 1
9038 instr [22,22] = size
9039 instr [21,16] = 1000 00
9040 instr [15,10] = 1100 10
9042 instr [4, 0] = Rd. */
9044 unsigned size = INSTR (22, 22);
9045 unsigned rn = INSTR (9, 5);
9046 unsigned rd = INSTR (4, 0);
9048 NYI_assert (31, 23, 0x0BD);
9049 NYI_assert (21, 16, 0x20);
9050 NYI_assert (15, 10, 0x32);
9052 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9054 aarch64_set_vec_u64 (cpu, rd, 0,
9055 aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9057 aarch64_set_vec_u32 (cpu, rd, 0,
9058 aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9061 /* Floating point scalar compare equal to 0. */
9063 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9065 /* instr [31,23] = 0101 1110 1
9066 instr [22,22] = size
9067 instr [21,16] = 1000 00
9068 instr [15,10] = 1101 10
9070 instr [4, 0] = Rd. */
9072 unsigned size = INSTR (22, 22);
9073 unsigned rn = INSTR (9, 5);
9074 unsigned rd = INSTR (4, 0);
9076 NYI_assert (31, 23, 0x0BD);
9077 NYI_assert (21, 16, 0x20);
9078 NYI_assert (15, 10, 0x36);
9080 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9082 aarch64_set_vec_u64 (cpu, rd, 0,
9083 aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9085 aarch64_set_vec_u32 (cpu, rd, 0,
9086 aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9089 /* Floating point scalar compare less than 0. */
9091 do_scalar_FCMLT_zero (sim_cpu *cpu)
9093 /* instr [31,23] = 0101 1110 1
9094 instr [22,22] = size
9095 instr [21,16] = 1000 00
9096 instr [15,10] = 1110 10
9098 instr [4, 0] = Rd. */
9100 unsigned size = INSTR (22, 22);
9101 unsigned rn = INSTR (9, 5);
9102 unsigned rd = INSTR (4, 0);
9104 NYI_assert (31, 23, 0x0BD);
9105 NYI_assert (21, 16, 0x20);
9106 NYI_assert (15, 10, 0x3A);
9108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9110 aarch64_set_vec_u64 (cpu, rd, 0,
9111 aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9113 aarch64_set_vec_u32 (cpu, rd, 0,
9114 aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9118 do_scalar_shift (sim_cpu *cpu)
9120 /* instr [31,23] = 0101 1111 0
9121 instr [22,16] = shift amount
9122 instr [15,10] = 0101 01 [SHL]
9123 instr [15,10] = 0000 01 [SSHR]
9125 instr [4, 0] = Rd. */
9127 unsigned rn = INSTR (9, 5);
9128 unsigned rd = INSTR (4, 0);
9131 NYI_assert (31, 23, 0x0BE);
9133 if (INSTR (22, 22) == 0)
9136 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9137 switch (INSTR (15, 10))
9139 case 0x01: /* SSHR */
9140 amount = 128 - INSTR (22, 16);
9141 aarch64_set_vec_s64 (cpu, rd, 0,
9142 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9144 case 0x15: /* SHL */
9145 amount = INSTR (22, 16) - 64;
9146 aarch64_set_vec_u64 (cpu, rd, 0,
9147 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9154 /* FCMEQ FCMGT FCMGE. */
9156 do_scalar_FCM (sim_cpu *cpu)
9158 /* instr [31,30] = 01
9160 instr [28,24] = 1 1110
9165 instr [15,12] = 1110
9169 instr [4, 0] = Rd. */
9171 unsigned rm = INSTR (20, 16);
9172 unsigned rn = INSTR (9, 5);
9173 unsigned rd = INSTR (4, 0);
9174 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9179 NYI_assert (31, 30, 1);
9180 NYI_assert (28, 24, 0x1E);
9181 NYI_assert (21, 21, 1);
9182 NYI_assert (15, 12, 0xE);
9183 NYI_assert (10, 10, 1);
9185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9188 double val1 = aarch64_get_FP_double (cpu, rn);
9189 double val2 = aarch64_get_FP_double (cpu, rm);
9194 result = val1 == val2;
9202 result = val1 >= val2;
9210 result = val1 > val2;
9217 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9221 val1 = aarch64_get_FP_float (cpu, rn);
9222 val2 = aarch64_get_FP_float (cpu, rm);
9227 result = val1 == val2;
9231 val1 = fabsf (val1);
9232 val2 = fabsf (val2);
9235 result = val1 >= val2;
9239 val1 = fabsf (val1);
9240 val2 = fabsf (val2);
9243 result = val1 > val2;
9250 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9253 /* An alias of DUP. */
9255 do_scalar_MOV (sim_cpu *cpu)
9257 /* instr [31,21] = 0101 1110 000
9258 instr [20,16] = imm5
9259 instr [15,10] = 0000 01
9261 instr [4, 0] = Rd. */
9263 unsigned rn = INSTR (9, 5);
9264 unsigned rd = INSTR (4, 0);
9267 NYI_assert (31, 21, 0x2F0);
9268 NYI_assert (15, 10, 0x01);
9270 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9274 index = INSTR (20, 17);
9276 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9278 else if (INSTR (17, 17))
9281 index = INSTR (20, 18);
9283 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9285 else if (INSTR (18, 18))
9288 index = INSTR (20, 19);
9290 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9292 else if (INSTR (19, 19))
9295 index = INSTR (20, 20);
9297 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9304 do_scalar_NEG (sim_cpu *cpu)
9306 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9308 instr [4, 0] = Rd. */
9310 unsigned rn = INSTR (9, 5);
9311 unsigned rd = INSTR (4, 0);
9313 NYI_assert (31, 10, 0x1FB82E);
9315 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9316 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9320 do_scalar_USHL (sim_cpu *cpu)
9322 /* instr [31,21] = 0111 1110 111
9324 instr [15,10] = 0100 01
9326 instr [4, 0] = Rd. */
9328 unsigned rm = INSTR (20, 16);
9329 unsigned rn = INSTR (9, 5);
9330 unsigned rd = INSTR (4, 0);
9331 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9333 NYI_assert (31, 21, 0x3F7);
9334 NYI_assert (15, 10, 0x11);
9336 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9338 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9340 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9344 do_double_add (sim_cpu *cpu)
9346 /* instr [31,21] = 0101 1110 111
9348 instr [15,10] = 1000 01
9350 instr [4,0] = Fd. */
9357 NYI_assert (31, 21, 0x2F7);
9358 NYI_assert (15, 10, 0x21);
9362 Fn = INSTR (20, 16);
9364 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9365 val1 = aarch64_get_FP_double (cpu, Fm);
9366 val2 = aarch64_get_FP_double (cpu, Fn);
9368 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9372 do_scalar_UCVTF (sim_cpu *cpu)
9374 /* instr [31,23] = 0111 1110 0
9375 instr [22] = single(0)/double(1)
9376 instr [21,10] = 10 0001 1101 10
9378 instr [4,0] = rd. */
9380 unsigned rn = INSTR (9, 5);
9381 unsigned rd = INSTR (4, 0);
9383 NYI_assert (31, 23, 0x0FC);
9384 NYI_assert (21, 10, 0x876);
9386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9389 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9391 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9395 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9397 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9402 do_scalar_vec (sim_cpu *cpu)
9404 /* instr [30] = 1. */
9405 /* instr [28,25] = 1111. */
9406 switch (INSTR (31, 23))
9409 switch (INSTR (15, 10))
9411 case 0x01: do_scalar_MOV (cpu); return;
9412 case 0x39: do_scalar_FCM (cpu); return;
9413 case 0x3B: do_scalar_FCM (cpu); return;
9417 case 0xBE: do_scalar_shift (cpu); return;
9420 switch (INSTR (15, 10))
9423 switch (INSTR (21, 16))
9425 case 0x30: do_scalar_FADDP (cpu); return;
9426 case 0x21: do_scalar_UCVTF (cpu); return;
9429 case 0x39: do_scalar_FCM (cpu); return;
9430 case 0x3B: do_scalar_FCM (cpu); return;
9435 switch (INSTR (15, 10))
9437 case 0x0D: do_scalar_CMGT (cpu); return;
9438 case 0x11: do_scalar_USHL (cpu); return;
9439 case 0x2E: do_scalar_NEG (cpu); return;
9440 case 0x32: do_scalar_FCMGE_zero (cpu); return;
9441 case 0x35: do_scalar_FABD (cpu); return;
9442 case 0x36: do_scalar_FCMLE_zero (cpu); return;
9443 case 0x39: do_scalar_FCM (cpu); return;
9444 case 0x3B: do_scalar_FCM (cpu); return;
9449 case 0xFE: do_scalar_USHR (cpu); return;
9452 switch (INSTR (15, 10))
9454 case 0x21: do_double_add (cpu); return;
9455 case 0x11: do_scalar_SSHL (cpu); return;
9456 case 0x32: do_scalar_FCMGT_zero (cpu); return;
9457 case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9458 case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9469 dexAdvSIMD1 (sim_cpu *cpu)
9471 /* instr [28,25] = 1 111. */
9473 /* We are currently only interested in the basic
9474 scalar fp routines which all have bit 30 = 0. */
9476 do_scalar_vec (cpu);
9478 /* instr[24] is set for FP data processing 3-source and clear for
9479 all other basic scalar fp instruction groups. */
9480 else if (INSTR (24, 24))
9481 dexSimpleFPDataProc3Source (cpu);
9483 /* instr[21] is clear for floating <-> fixed conversions and set for
9484 all other basic scalar fp instruction groups. */
9485 else if (!INSTR (21, 21))
9486 dexSimpleFPFixedConvert (cpu);
9488 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9489 11 ==> cond select, 00 ==> other. */
9491 switch (INSTR (11, 10))
9493 case 1: dexSimpleFPCondCompare (cpu); return;
9494 case 2: dexSimpleFPDataProc2Source (cpu); return;
9495 case 3: dexSimpleFPCondSelect (cpu); return;
9498 /* Now an ordered cascade of tests.
9499 FP immediate has instr [12] == 1.
9500 FP compare has instr [13] == 1.
9501 FP Data Proc 1 Source has instr [14] == 1.
9502 FP floating <--> integer conversions has instr [15] == 0. */
9504 dexSimpleFPImmediate (cpu);
9506 else if (INSTR (13, 13))
9507 dexSimpleFPCompare (cpu);
9509 else if (INSTR (14, 14))
9510 dexSimpleFPDataProc1Source (cpu);
9512 else if (!INSTR (15, 15))
9513 dexSimpleFPIntegerConvert (cpu);
9516 /* If we get here then instr[15] == 1 which means UNALLOC. */
9521 /* PC relative addressing. */
9524 pcadr (sim_cpu *cpu)
9526 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9527 instr[30,29] = immlo
9528 instr[23,5] = immhi. */
9530 unsigned rd = INSTR (4, 0);
9531 uint32_t isPage = INSTR (31, 31);
9532 union { int64_t u64; uint64_t s64; } imm;
9535 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9537 offset = (offset << 2) | INSTR (30, 29);
9539 address = aarch64_get_PC (cpu);
9547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9548 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9551 /* Specific decode and execute for group Data Processing Immediate. */
9554 dexPCRelAddressing (sim_cpu *cpu)
9556 /* assert instr[28,24] = 10000. */
9560 /* Immediate logical.
9561 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9562 16, 32 or 64 bit sequence pulled out at decode and possibly
9565 N.B. the output register (dest) can normally be Xn or SP
9566 the exception occurs for flag setting instructions which may
9567 only use Xn for the output (dest). The input register can
9570 /* 32 bit and immediate. */
9572 and32 (sim_cpu *cpu, uint32_t bimm)
9574 unsigned rn = INSTR (9, 5);
9575 unsigned rd = INSTR (4, 0);
9577 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9578 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9579 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9582 /* 64 bit and immediate. */
9584 and64 (sim_cpu *cpu, uint64_t bimm)
9586 unsigned rn = INSTR (9, 5);
9587 unsigned rd = INSTR (4, 0);
9589 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9590 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9591 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9594 /* 32 bit and immediate set flags. */
9596 ands32 (sim_cpu *cpu, uint32_t bimm)
9598 unsigned rn = INSTR (9, 5);
9599 unsigned rd = INSTR (4, 0);
9601 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9602 uint32_t value2 = bimm;
9604 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9605 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9606 set_flags_for_binop32 (cpu, value1 & value2);
9609 /* 64 bit and immediate set flags. */
9611 ands64 (sim_cpu *cpu, uint64_t bimm)
9613 unsigned rn = INSTR (9, 5);
9614 unsigned rd = INSTR (4, 0);
9616 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9617 uint64_t value2 = bimm;
9619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9620 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9621 set_flags_for_binop64 (cpu, value1 & value2);
9624 /* 32 bit exclusive or immediate. */
9626 eor32 (sim_cpu *cpu, uint32_t bimm)
9628 unsigned rn = INSTR (9, 5);
9629 unsigned rd = INSTR (4, 0);
9631 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9632 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9633 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9636 /* 64 bit exclusive or immediate. */
9638 eor64 (sim_cpu *cpu, uint64_t bimm)
9640 unsigned rn = INSTR (9, 5);
9641 unsigned rd = INSTR (4, 0);
9643 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9644 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9645 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9648 /* 32 bit or immediate. */
9650 orr32 (sim_cpu *cpu, uint32_t bimm)
9652 unsigned rn = INSTR (9, 5);
9653 unsigned rd = INSTR (4, 0);
9655 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9656 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9657 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9660 /* 64 bit or immediate. */
9662 orr64 (sim_cpu *cpu, uint64_t bimm)
9664 unsigned rn = INSTR (9, 5);
9665 unsigned rd = INSTR (4, 0);
9667 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9668 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9669 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9672 /* Logical shifted register.
9673 These allow an optional LSL, ASR, LSR or ROR to the second source
9674 register with a count up to the register bit count.
9675 N.B register args may not be SP. */
9677 /* 32 bit AND shifted register. */
9679 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9681 unsigned rm = INSTR (20, 16);
9682 unsigned rn = INSTR (9, 5);
9683 unsigned rd = INSTR (4, 0);
9685 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9687 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9688 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9691 /* 64 bit AND shifted register. */
9693 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9695 unsigned rm = INSTR (20, 16);
9696 unsigned rn = INSTR (9, 5);
9697 unsigned rd = INSTR (4, 0);
9699 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9701 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9702 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9705 /* 32 bit AND shifted register setting flags. */
9707 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9709 unsigned rm = INSTR (20, 16);
9710 unsigned rn = INSTR (9, 5);
9711 unsigned rd = INSTR (4, 0);
9713 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9714 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9717 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9718 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9719 set_flags_for_binop32 (cpu, value1 & value2);
9722 /* 64 bit AND shifted register setting flags. */
9724 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9726 unsigned rm = INSTR (20, 16);
9727 unsigned rn = INSTR (9, 5);
9728 unsigned rd = INSTR (4, 0);
9730 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9731 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9735 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9736 set_flags_for_binop64 (cpu, value1 & value2);
9739 /* 32 bit BIC shifted register. */
9741 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9743 unsigned rm = INSTR (20, 16);
9744 unsigned rn = INSTR (9, 5);
9745 unsigned rd = INSTR (4, 0);
9747 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9749 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9750 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9753 /* 64 bit BIC shifted register. */
9755 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9757 unsigned rm = INSTR (20, 16);
9758 unsigned rn = INSTR (9, 5);
9759 unsigned rd = INSTR (4, 0);
9761 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9763 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9764 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9767 /* 32 bit BIC shifted register setting flags. */
9769 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9771 unsigned rm = INSTR (20, 16);
9772 unsigned rn = INSTR (9, 5);
9773 unsigned rd = INSTR (4, 0);
9775 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9776 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9779 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9780 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9781 set_flags_for_binop32 (cpu, value1 & value2);
9784 /* 64 bit BIC shifted register setting flags. */
9786 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9788 unsigned rm = INSTR (20, 16);
9789 unsigned rn = INSTR (9, 5);
9790 unsigned rd = INSTR (4, 0);
9792 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9793 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9797 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9798 set_flags_for_binop64 (cpu, value1 & value2);
9801 /* 32 bit EON shifted register. */
9803 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9805 unsigned rm = INSTR (20, 16);
9806 unsigned rn = INSTR (9, 5);
9807 unsigned rd = INSTR (4, 0);
9809 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9811 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9812 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9815 /* 64 bit EON shifted register. */
9817 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9819 unsigned rm = INSTR (20, 16);
9820 unsigned rn = INSTR (9, 5);
9821 unsigned rd = INSTR (4, 0);
9823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9825 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9826 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9829 /* 32 bit EOR shifted register. */
9831 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9833 unsigned rm = INSTR (20, 16);
9834 unsigned rn = INSTR (9, 5);
9835 unsigned rd = INSTR (4, 0);
9837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9839 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9840 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9843 /* 64 bit EOR shifted register. */
9845 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9847 unsigned rm = INSTR (20, 16);
9848 unsigned rn = INSTR (9, 5);
9849 unsigned rd = INSTR (4, 0);
9851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9853 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9854 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9857 /* 32 bit ORR shifted register. */
9859 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9861 unsigned rm = INSTR (20, 16);
9862 unsigned rn = INSTR (9, 5);
9863 unsigned rd = INSTR (4, 0);
9865 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9867 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9868 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9871 /* 64 bit ORR shifted register. */
9873 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9875 unsigned rm = INSTR (20, 16);
9876 unsigned rn = INSTR (9, 5);
9877 unsigned rd = INSTR (4, 0);
9879 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9881 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9882 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9885 /* 32 bit ORN shifted register. */
9887 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9889 unsigned rm = INSTR (20, 16);
9890 unsigned rn = INSTR (9, 5);
9891 unsigned rd = INSTR (4, 0);
9893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9895 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9896 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9899 /* 64 bit ORN shifted register. */
9901 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9903 unsigned rm = INSTR (20, 16);
9904 unsigned rn = INSTR (9, 5);
9905 unsigned rd = INSTR (4, 0);
9907 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9909 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9910 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9914 dexLogicalImmediate (sim_cpu *cpu)
9916 /* assert instr[28,23] = 1001000
9917 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9918 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9919 instr[22] = N : used to construct immediate mask
9925 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9926 uint32_t size = INSTR (31, 31);
9927 uint32_t N = INSTR (22, 22);
9928 /* uint32_t immr = INSTR (21, 16);. */
9929 /* uint32_t imms = INSTR (15, 10);. */
9930 uint32_t index = INSTR (22, 10);
9931 uint64_t bimm64 = LITable [index];
9932 uint32_t dispatch = INSTR (30, 29);
9942 uint32_t bimm = (uint32_t) bimm64;
9946 case 0: and32 (cpu, bimm); return;
9947 case 1: orr32 (cpu, bimm); return;
9948 case 2: eor32 (cpu, bimm); return;
9949 case 3: ands32 (cpu, bimm); return;
9956 case 0: and64 (cpu, bimm64); return;
9957 case 1: orr64 (cpu, bimm64); return;
9958 case 2: eor64 (cpu, bimm64); return;
9959 case 3: ands64 (cpu, bimm64); return;
9966 The uimm argument is a 16 bit value to be inserted into the
9967 target register the pos argument locates the 16 bit word in the
9968 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9970 N.B register arg may not be SP so it should be.
9971 accessed using the setGZRegisterXXX accessors. */
9973 /* 32 bit move 16 bit immediate zero remaining shorts. */
9975 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9977 unsigned rd = INSTR (4, 0);
9979 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9980 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9983 /* 64 bit move 16 bit immediate zero remaining shorts. */
9985 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9987 unsigned rd = INSTR (4, 0);
9989 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9990 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9993 /* 32 bit move 16 bit immediate negated. */
9995 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9997 unsigned rd = INSTR (4, 0);
9999 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10000 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
10003 /* 64 bit move 16 bit immediate negated. */
10005 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10007 unsigned rd = INSTR (4, 0);
10009 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10010 aarch64_set_reg_u64
10011 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10012 ^ 0xffffffffffffffffULL));
10015 /* 32 bit move 16 bit immediate keep remaining shorts. */
10017 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10019 unsigned rd = INSTR (4, 0);
10020 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10021 uint32_t value = val << (pos * 16);
10022 uint32_t mask = ~(0xffffU << (pos * 16));
10024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10025 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10028 /* 64 bit move 16 it immediate keep remaining shorts. */
10030 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10032 unsigned rd = INSTR (4, 0);
10033 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10034 uint64_t value = (uint64_t) val << (pos * 16);
10035 uint64_t mask = ~(0xffffULL << (pos * 16));
10037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10038 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10042 dexMoveWideImmediate (sim_cpu *cpu)
10044 /* assert instr[28:23] = 100101
10045 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10046 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10047 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10048 instr[20,5] = uimm16
10051 /* N.B. the (multiple of 16) shift is applied by the called routine,
10052 we just pass the multiplier. */
10055 uint32_t size = INSTR (31, 31);
10056 uint32_t op = INSTR (30, 29);
10057 uint32_t shift = INSTR (22, 21);
10059 /* 32 bit can only shift 0 or 1 lot of 16.
10060 anything else is an unallocated instruction. */
10061 if (size == 0 && (shift > 1))
10067 imm = INSTR (20, 5);
10072 movn32 (cpu, imm, shift);
10074 movz32 (cpu, imm, shift);
10076 movk32 (cpu, imm, shift);
10081 movn64 (cpu, imm, shift);
10083 movz64 (cpu, imm, shift);
10085 movk64 (cpu, imm, shift);
10089 /* Bitfield operations.
10090 These take a pair of bit positions r and s which are in {0..31}
10091 or {0..63} depending on the instruction word size.
10092 N.B register args may not be SP. */
10094 /* OK, we start with ubfm which just needs to pick
10095 some bits out of source zero the rest and write
10096 the result to dest. Just need two logical shifts. */
10098 /* 32 bit bitfield move, left and right of affected zeroed
10099 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10101 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10104 unsigned rn = INSTR (9, 5);
10105 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10107 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10110 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10111 We want only bits s:xxx:r at the bottom of the word
10112 so we LSL bit s up to bit 31 i.e. by 31 - s
10113 and then we LSR to bring bit 31 down to bit s - r
10114 i.e. by 31 + r - s. */
10116 value >>= 31 + r - s;
10120 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10121 We want only bits s:xxx:0 starting at it 31-(r-1)
10122 so we LSL bit s up to bit 31 i.e. by 31 - s
10123 and then we LSL to bring bit 31 down to 31-(r-1)+s
10124 i.e. by r - (s + 1). */
10126 value >>= r - (s + 1);
10129 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10134 /* 64 bit bitfield move, left and right of affected zeroed
10135 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10137 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10140 unsigned rn = INSTR (9, 5);
10141 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10145 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10146 We want only bits s:xxx:r at the bottom of the word.
10147 So we LSL bit s up to bit 63 i.e. by 63 - s
10148 and then we LSR to bring bit 63 down to bit s - r
10149 i.e. by 63 + r - s. */
10151 value >>= 63 + r - s;
10155 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10156 We want only bits s:xxx:0 starting at it 63-(r-1).
10157 So we LSL bit s up to bit 63 i.e. by 63 - s
10158 and then we LSL to bring bit 63 down to 63-(r-1)+s
10159 i.e. by r - (s + 1). */
10161 value >>= r - (s + 1);
10164 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10166 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10169 /* The signed versions need to insert sign bits
10170 on the left of the inserted bit field. so we do
10171 much the same as the unsigned version except we
10172 use an arithmetic shift right -- this just means
10173 we need to operate on signed values. */
10175 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
10176 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10178 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10181 unsigned rn = INSTR (9, 5);
10182 /* as per ubfm32 but use an ASR instead of an LSR. */
10183 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10188 value >>= 31 + r - s;
10193 value >>= r - (s + 1);
10196 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10198 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10201 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
10202 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10204 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10207 unsigned rn = INSTR (9, 5);
10208 /* acpu per ubfm but use an ASR instead of an LSR. */
10209 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10214 value >>= 63 + r - s;
10219 value >>= r - (s + 1);
10222 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10224 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10227 /* Finally, these versions leave non-affected bits
10228 as is. so we need to generate the bits as per
10229 ubfm and also generate a mask to pick the
10230 bits from the original and computed values. */
10232 /* 32 bit bitfield move, non-affected bits left as is.
10233 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10235 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10237 unsigned rn = INSTR (9, 5);
10238 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10239 uint32_t mask = -1;
10243 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10246 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10247 We want only bits s:xxx:r at the bottom of the word
10248 so we LSL bit s up to bit 31 i.e. by 31 - s
10249 and then we LSR to bring bit 31 down to bit s - r
10250 i.e. by 31 + r - s. */
10252 value >>= 31 + r - s;
10253 /* the mask must include the same bits. */
10255 mask >>= 31 + r - s;
10259 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10260 We want only bits s:xxx:0 starting at it 31-(r-1)
10261 so we LSL bit s up to bit 31 i.e. by 31 - s
10262 and then we LSL to bring bit 31 down to 31-(r-1)+s
10263 i.e. by r - (s + 1). */
10265 value >>= r - (s + 1);
10266 /* The mask must include the same bits. */
10268 mask >>= r - (s + 1);
10272 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10277 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10278 aarch64_set_reg_u64
10279 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10282 /* 64 bit bitfield move, non-affected bits left as is.
10283 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10285 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10288 unsigned rn = INSTR (9, 5);
10289 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10290 uint64_t mask = 0xffffffffffffffffULL;
10294 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10295 We want only bits s:xxx:r at the bottom of the word
10296 so we LSL bit s up to bit 63 i.e. by 63 - s
10297 and then we LSR to bring bit 63 down to bit s - r
10298 i.e. by 63 + r - s. */
10300 value >>= 63 + r - s;
10301 /* The mask must include the same bits. */
10303 mask >>= 63 + r - s;
10307 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10308 We want only bits s:xxx:0 starting at it 63-(r-1)
10309 so we LSL bit s up to bit 63 i.e. by 63 - s
10310 and then we LSL to bring bit 63 down to 63-(r-1)+s
10311 i.e. by r - (s + 1). */
10313 value >>= r - (s + 1);
10314 /* The mask must include the same bits. */
10316 mask >>= r - (s + 1);
10319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10321 aarch64_set_reg_u64
10322 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10326 dexBitfieldImmediate (sim_cpu *cpu)
10328 /* assert instr[28:23] = 100110
10329 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10330 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10331 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10332 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10333 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10337 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10340 uint32_t size = INSTR (31, 31);
10341 uint32_t N = INSTR (22, 22);
10342 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10343 /* or else we have an UNALLOC. */
10344 uint32_t immr = INSTR (21, 16);
10349 if (!size && uimm (immr, 5, 5))
10352 imms = INSTR (15, 10);
10353 if (!size && uimm (imms, 5, 5))
10356 /* Switch on combined size and op. */
10357 dispatch = INSTR (31, 29);
10360 case 0: sbfm32 (cpu, immr, imms); return;
10361 case 1: bfm32 (cpu, immr, imms); return;
10362 case 2: ubfm32 (cpu, immr, imms); return;
10363 case 4: sbfm (cpu, immr, imms); return;
10364 case 5: bfm (cpu, immr, imms); return;
10365 case 6: ubfm (cpu, immr, imms); return;
10366 default: HALT_UNALLOC;
10371 do_EXTR_32 (sim_cpu *cpu)
10373 /* instr[31:21] = 00010011100
10375 instr[15,10] = imms : 0xxxxx for 32 bit
10378 unsigned rm = INSTR (20, 16);
10379 unsigned imms = INSTR (15, 10) & 31;
10380 unsigned rn = INSTR ( 9, 5);
10381 unsigned rd = INSTR ( 4, 0);
10385 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10387 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10388 val2 <<= (32 - imms);
10390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10391 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10395 do_EXTR_64 (sim_cpu *cpu)
10397 /* instr[31:21] = 10010011100
10399 instr[15,10] = imms
10402 unsigned rm = INSTR (20, 16);
10403 unsigned imms = INSTR (15, 10) & 63;
10404 unsigned rn = INSTR ( 9, 5);
10405 unsigned rd = INSTR ( 4, 0);
10408 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10410 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10412 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10416 dexExtractImmediate (sim_cpu *cpu)
10418 /* assert instr[28:23] = 100111
10419 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10420 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10421 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10422 instr[21] = op0 : must be 0 or UNALLOC
10424 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10428 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10429 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10431 uint32_t size = INSTR (31, 31);
10432 uint32_t N = INSTR (22, 22);
10433 /* 32 bit operations must have imms[5] = 0
10434 or else we have an UNALLOC. */
10435 uint32_t imms = INSTR (15, 10);
10440 if (!size && uimm (imms, 5, 5))
10443 /* Switch on combined size and op. */
10444 dispatch = INSTR (31, 29);
10449 else if (dispatch == 4)
10452 else if (dispatch == 1)
10459 dexDPImm (sim_cpu *cpu)
10461 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10462 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10463 bits [25,23] of a DPImm are the secondary dispatch vector. */
10464 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10468 case DPIMM_PCADR_000:
10469 case DPIMM_PCADR_001:
10470 dexPCRelAddressing (cpu);
10473 case DPIMM_ADDSUB_010:
10474 case DPIMM_ADDSUB_011:
10475 dexAddSubtractImmediate (cpu);
10478 case DPIMM_LOG_100:
10479 dexLogicalImmediate (cpu);
10482 case DPIMM_MOV_101:
10483 dexMoveWideImmediate (cpu);
10486 case DPIMM_BITF_110:
10487 dexBitfieldImmediate (cpu);
10490 case DPIMM_EXTR_111:
10491 dexExtractImmediate (cpu);
10495 /* Should never reach here. */
10501 dexLoadUnscaledImmediate (sim_cpu *cpu)
10503 /* instr[29,24] == 111_00
10506 instr[31,30] = size
10509 instr[20,12] = simm9
10510 instr[9,5] = rn may be SP. */
10511 /* unsigned rt = INSTR (4, 0); */
10512 uint32_t V = INSTR (26, 26);
10513 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10514 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10518 /* GReg operations. */
10521 case 0: sturb (cpu, imm); return;
10522 case 1: ldurb32 (cpu, imm); return;
10523 case 2: ldursb64 (cpu, imm); return;
10524 case 3: ldursb32 (cpu, imm); return;
10525 case 4: sturh (cpu, imm); return;
10526 case 5: ldurh32 (cpu, imm); return;
10527 case 6: ldursh64 (cpu, imm); return;
10528 case 7: ldursh32 (cpu, imm); return;
10529 case 8: stur32 (cpu, imm); return;
10530 case 9: ldur32 (cpu, imm); return;
10531 case 10: ldursw (cpu, imm); return;
10532 case 12: stur64 (cpu, imm); return;
10533 case 13: ldur64 (cpu, imm); return;
10546 /* FReg operations. */
10549 case 2: fsturq (cpu, imm); return;
10550 case 3: fldurq (cpu, imm); return;
10551 case 8: fsturs (cpu, imm); return;
10552 case 9: fldurs (cpu, imm); return;
10553 case 12: fsturd (cpu, imm); return;
10554 case 13: fldurd (cpu, imm); return;
10556 case 0: /* STUR 8 bit FP. */
10557 case 1: /* LDUR 8 bit FP. */
10558 case 4: /* STUR 16 bit FP. */
10559 case 5: /* LDUR 8 bit FP. */
10573 /* N.B. A preliminary note regarding all the ldrs<x>32
10576 The signed value loaded by these instructions is cast to unsigned
10577 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10578 64 bit element of the GReg union. this performs a 32 bit sign extension
10579 (as required) but avoids 64 bit sign extension, thus ensuring that the
10580 top half of the register word is zero. this is what the spec demands
10581 when a 32 bit load occurs. */
10583 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10585 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10587 unsigned int rn = INSTR (9, 5);
10588 unsigned int rt = INSTR (4, 0);
10590 /* The target register may not be SP but the source may be
10591 there is no scaling required for a byte load. */
10592 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10593 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10594 (int64_t) aarch64_get_mem_s8 (cpu, address));
10597 /* 32 bit load sign-extended byte scaled or unscaled zero-
10598 or sign-extended 32-bit register offset. */
10600 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10602 unsigned int rm = INSTR (20, 16);
10603 unsigned int rn = INSTR (9, 5);
10604 unsigned int rt = INSTR (4, 0);
10606 /* rn may reference SP, rm and rt must reference ZR. */
10608 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10609 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10612 /* There is no scaling required for a byte load. */
10613 aarch64_set_reg_u64
10614 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10618 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10619 pre- or post-writeback. */
10621 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10624 unsigned int rn = INSTR (9, 5);
10625 unsigned int rt = INSTR (4, 0);
10627 if (rn == rt && wb != NoWriteBack)
10630 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10635 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10636 (int64_t) aarch64_get_mem_s8 (cpu, address));
10641 if (wb != NoWriteBack)
10642 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10645 /* 8 bit store scaled. */
10647 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10649 unsigned st = INSTR (4, 0);
10650 unsigned rn = INSTR (9, 5);
10652 aarch64_set_mem_u8 (cpu,
10653 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10654 aarch64_get_vec_u8 (cpu, st, 0));
10657 /* 8 bit store scaled or unscaled zero- or
10658 sign-extended 8-bit register offset. */
10660 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10662 unsigned rm = INSTR (20, 16);
10663 unsigned rn = INSTR (9, 5);
10664 unsigned st = INSTR (4, 0);
10666 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10667 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10669 uint64_t displacement = scaling == Scaled ? extended : 0;
10672 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10675 /* 16 bit store scaled. */
10677 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10679 unsigned st = INSTR (4, 0);
10680 unsigned rn = INSTR (9, 5);
10682 aarch64_set_mem_u16
10684 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10685 aarch64_get_vec_u16 (cpu, st, 0));
10688 /* 16 bit store scaled or unscaled zero-
10689 or sign-extended 16-bit register offset. */
10691 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10693 unsigned rm = INSTR (20, 16);
10694 unsigned rn = INSTR (9, 5);
10695 unsigned st = INSTR (4, 0);
10697 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10698 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10700 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10702 aarch64_set_mem_u16
10703 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10706 /* 32 bit store scaled unsigned 12 bit. */
10708 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10710 unsigned st = INSTR (4, 0);
10711 unsigned rn = INSTR (9, 5);
10713 aarch64_set_mem_u32
10715 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10716 aarch64_get_vec_u32 (cpu, st, 0));
10719 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10721 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10723 unsigned rn = INSTR (9, 5);
10724 unsigned st = INSTR (4, 0);
10726 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10731 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10736 if (wb != NoWriteBack)
10737 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10740 /* 32 bit store scaled or unscaled zero-
10741 or sign-extended 32-bit register offset. */
10743 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10745 unsigned rm = INSTR (20, 16);
10746 unsigned rn = INSTR (9, 5);
10747 unsigned st = INSTR (4, 0);
10749 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10750 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10752 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10754 aarch64_set_mem_u32
10755 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10758 /* 64 bit store scaled unsigned 12 bit. */
10760 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10762 unsigned st = INSTR (4, 0);
10763 unsigned rn = INSTR (9, 5);
10765 aarch64_set_mem_u64
10767 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10768 aarch64_get_vec_u64 (cpu, st, 0));
10771 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10773 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10775 unsigned rn = INSTR (9, 5);
10776 unsigned st = INSTR (4, 0);
10778 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10783 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10788 if (wb != NoWriteBack)
10789 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10792 /* 64 bit store scaled or unscaled zero-
10793 or sign-extended 32-bit register offset. */
10795 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10797 unsigned rm = INSTR (20, 16);
10798 unsigned rn = INSTR (9, 5);
10799 unsigned st = INSTR (4, 0);
10801 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10802 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10804 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10806 aarch64_set_mem_u64
10807 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10810 /* 128 bit store scaled unsigned 12 bit. */
10812 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10815 unsigned st = INSTR (4, 0);
10816 unsigned rn = INSTR (9, 5);
10819 aarch64_get_FP_long_double (cpu, st, & a);
10821 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10822 aarch64_set_mem_long_double (cpu, addr, a);
10825 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10827 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10830 unsigned rn = INSTR (9, 5);
10831 unsigned st = INSTR (4, 0);
10832 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10837 aarch64_get_FP_long_double (cpu, st, & a);
10838 aarch64_set_mem_long_double (cpu, address, a);
10843 if (wb != NoWriteBack)
10844 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10847 /* 128 bit store scaled or unscaled zero-
10848 or sign-extended 32-bit register offset. */
10850 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10852 unsigned rm = INSTR (20, 16);
10853 unsigned rn = INSTR (9, 5);
10854 unsigned st = INSTR (4, 0);
10856 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10857 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10859 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10863 aarch64_get_FP_long_double (cpu, st, & a);
10864 aarch64_set_mem_long_double (cpu, address + displacement, a);
10868 dexLoadImmediatePrePost (sim_cpu *cpu)
10870 /* instr[31,30] = size
10876 instr[20,12] = simm9
10877 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10879 instr[9,5] = Rn may be SP.
10882 uint32_t V = INSTR (26, 26);
10883 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10884 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10885 WriteBack wb = INSTR (11, 11);
10889 /* GReg operations. */
10892 case 0: strb_wb (cpu, imm, wb); return;
10893 case 1: ldrb32_wb (cpu, imm, wb); return;
10894 case 2: ldrsb_wb (cpu, imm, wb); return;
10895 case 3: ldrsb32_wb (cpu, imm, wb); return;
10896 case 4: strh_wb (cpu, imm, wb); return;
10897 case 5: ldrh32_wb (cpu, imm, wb); return;
10898 case 6: ldrsh64_wb (cpu, imm, wb); return;
10899 case 7: ldrsh32_wb (cpu, imm, wb); return;
10900 case 8: str32_wb (cpu, imm, wb); return;
10901 case 9: ldr32_wb (cpu, imm, wb); return;
10902 case 10: ldrsw_wb (cpu, imm, wb); return;
10903 case 12: str_wb (cpu, imm, wb); return;
10904 case 13: ldr_wb (cpu, imm, wb); return;
10914 /* FReg operations. */
10917 case 2: fstrq_wb (cpu, imm, wb); return;
10918 case 3: fldrq_wb (cpu, imm, wb); return;
10919 case 8: fstrs_wb (cpu, imm, wb); return;
10920 case 9: fldrs_wb (cpu, imm, wb); return;
10921 case 12: fstrd_wb (cpu, imm, wb); return;
10922 case 13: fldrd_wb (cpu, imm, wb); return;
10924 case 0: /* STUR 8 bit FP. */
10925 case 1: /* LDUR 8 bit FP. */
10926 case 4: /* STUR 16 bit FP. */
10927 case 5: /* LDUR 8 bit FP. */
10942 dexLoadRegisterOffset (sim_cpu *cpu)
10944 /* instr[31,30] = size
10951 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10952 110 ==> SXTW, 111 ==> SXTX,
10957 instr[4,0] = rt. */
10959 uint32_t V = INSTR (26, 26);
10960 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10961 Scaling scale = INSTR (12, 12);
10962 Extension extensionType = INSTR (15, 13);
10964 /* Check for illegal extension types. */
10965 if (uimm (extensionType, 1, 1) == 0)
10968 if (extensionType == UXTX || extensionType == SXTX)
10969 extensionType = NoExtension;
10973 /* GReg operations. */
10976 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10977 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10978 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10979 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10980 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10981 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10982 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10983 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10984 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10985 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10986 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10987 case 12: str_scale_ext (cpu, scale, extensionType); return;
10988 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10989 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10998 /* FReg operations. */
11001 case 1: /* LDUR 8 bit FP. */
11003 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
11004 case 5: /* LDUR 8 bit FP. */
11006 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
11007 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
11009 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
11010 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
11011 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
11012 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
11013 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11027 dexLoadUnsignedImmediate (sim_cpu *cpu)
11029 /* instr[29,24] == 111_01
11030 instr[31,30] = size
11033 instr[21,10] = uimm12 : unsigned immediate offset
11034 instr[9,5] = rn may be SP.
11035 instr[4,0] = rt. */
11037 uint32_t V = INSTR (26,26);
11038 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11039 uint32_t imm = INSTR (21, 10);
11043 /* GReg operations. */
11046 case 0: strb_abs (cpu, imm); return;
11047 case 1: ldrb32_abs (cpu, imm); return;
11048 case 2: ldrsb_abs (cpu, imm); return;
11049 case 3: ldrsb32_abs (cpu, imm); return;
11050 case 4: strh_abs (cpu, imm); return;
11051 case 5: ldrh32_abs (cpu, imm); return;
11052 case 6: ldrsh_abs (cpu, imm); return;
11053 case 7: ldrsh32_abs (cpu, imm); return;
11054 case 8: str32_abs (cpu, imm); return;
11055 case 9: ldr32_abs (cpu, imm); return;
11056 case 10: ldrsw_abs (cpu, imm); return;
11057 case 12: str_abs (cpu, imm); return;
11058 case 13: ldr_abs (cpu, imm); return;
11059 case 14: prfm_abs (cpu, imm); return;
11068 /* FReg operations. */
11071 case 0: fstrb_abs (cpu, imm); return;
11072 case 4: fstrh_abs (cpu, imm); return;
11073 case 8: fstrs_abs (cpu, imm); return;
11074 case 12: fstrd_abs (cpu, imm); return;
11075 case 2: fstrq_abs (cpu, imm); return;
11077 case 1: fldrb_abs (cpu, imm); return;
11078 case 5: fldrh_abs (cpu, imm); return;
11079 case 9: fldrs_abs (cpu, imm); return;
11080 case 13: fldrd_abs (cpu, imm); return;
11081 case 3: fldrq_abs (cpu, imm); return;
11095 dexLoadExclusive (sim_cpu *cpu)
11097 /* assert instr[29:24] = 001000;
11098 instr[31,30] = size
11099 instr[23] = 0 if exclusive
11100 instr[22] = L : 1 if load, 0 if store
11101 instr[21] = 1 if pair
11103 instr[15] = o0 : 1 if ordered
11106 instr[4.0] = Rt. */
11108 switch (INSTR (22, 21))
11110 case 2: ldxr (cpu); return;
11111 case 0: stxr (cpu); return;
11117 dexLoadOther (sim_cpu *cpu)
11121 /* instr[29,25] = 111_0
11122 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11123 instr[21:11,10] is the secondary dispatch. */
11124 if (INSTR (24, 24))
11126 dexLoadUnsignedImmediate (cpu);
11130 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11133 case 0: dexLoadUnscaledImmediate (cpu); return;
11134 case 1: dexLoadImmediatePrePost (cpu); return;
11135 case 3: dexLoadImmediatePrePost (cpu); return;
11136 case 6: dexLoadRegisterOffset (cpu); return;
11148 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11150 unsigned rn = INSTR (14, 10);
11151 unsigned rd = INSTR (9, 5);
11152 unsigned rm = INSTR (4, 0);
11153 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11155 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11156 HALT_UNALLOC; /* ??? */
11163 aarch64_set_mem_u32 (cpu, address,
11164 aarch64_get_reg_u32 (cpu, rm, NO_SP));
11165 aarch64_set_mem_u32 (cpu, address + 4,
11166 aarch64_get_reg_u32 (cpu, rn, NO_SP));
11171 if (wb != NoWriteBack)
11172 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11176 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11178 unsigned rn = INSTR (14, 10);
11179 unsigned rd = INSTR (9, 5);
11180 unsigned rm = INSTR (4, 0);
11181 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11183 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11184 HALT_UNALLOC; /* ??? */
11191 aarch64_set_mem_u64 (cpu, address,
11192 aarch64_get_reg_u64 (cpu, rm, NO_SP));
11193 aarch64_set_mem_u64 (cpu, address + 8,
11194 aarch64_get_reg_u64 (cpu, rn, NO_SP));
11199 if (wb != NoWriteBack)
11200 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11204 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11206 unsigned rn = INSTR (14, 10);
11207 unsigned rd = INSTR (9, 5);
11208 unsigned rm = INSTR (4, 0);
11209 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11211 /* Treat this as unalloc to make sure we don't do it. */
11220 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11221 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11226 if (wb != NoWriteBack)
11227 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11231 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11233 unsigned rn = INSTR (14, 10);
11234 unsigned rd = INSTR (9, 5);
11235 unsigned rm = INSTR (4, 0);
11236 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11238 /* Treat this as unalloc to make sure we don't do it. */
11247 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11248 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11253 if (wb != NoWriteBack)
11254 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11258 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11260 unsigned rn = INSTR (14, 10);
11261 unsigned rd = INSTR (9, 5);
11262 unsigned rm = INSTR (4, 0);
11263 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11265 /* Treat this as unalloc to make sure we don't do it. */
11274 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11275 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11280 if (wb != NoWriteBack)
11281 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11285 dex_load_store_pair_gr (sim_cpu *cpu)
11287 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11288 instr[29,25] = instruction encoding: 101_0
11289 instr[26] = V : 1 if fp 0 if gp
11290 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11291 instr[22] = load/store (1=> load)
11292 instr[21,15] = signed, scaled, offset
11295 instr[ 4, 0] = Rm. */
11297 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11298 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11302 case 2: store_pair_u32 (cpu, offset, Post); return;
11303 case 3: load_pair_u32 (cpu, offset, Post); return;
11304 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11305 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11306 case 6: store_pair_u32 (cpu, offset, Pre); return;
11307 case 7: load_pair_u32 (cpu, offset, Pre); return;
11309 case 11: load_pair_s32 (cpu, offset, Post); return;
11310 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11311 case 15: load_pair_s32 (cpu, offset, Pre); return;
11313 case 18: store_pair_u64 (cpu, offset, Post); return;
11314 case 19: load_pair_u64 (cpu, offset, Post); return;
11315 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11316 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11317 case 22: store_pair_u64 (cpu, offset, Pre); return;
11318 case 23: load_pair_u64 (cpu, offset, Pre); return;
11326 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11328 unsigned rn = INSTR (14, 10);
11329 unsigned rd = INSTR (9, 5);
11330 unsigned rm = INSTR (4, 0);
11331 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11338 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11339 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11344 if (wb != NoWriteBack)
11345 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11349 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11351 unsigned rn = INSTR (14, 10);
11352 unsigned rd = INSTR (9, 5);
11353 unsigned rm = INSTR (4, 0);
11354 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11361 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11362 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11367 if (wb != NoWriteBack)
11368 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11372 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11375 unsigned rn = INSTR (14, 10);
11376 unsigned rd = INSTR (9, 5);
11377 unsigned rm = INSTR (4, 0);
11378 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11385 aarch64_get_FP_long_double (cpu, rm, & a);
11386 aarch64_set_mem_long_double (cpu, address, a);
11387 aarch64_get_FP_long_double (cpu, rn, & a);
11388 aarch64_set_mem_long_double (cpu, address + 16, a);
11393 if (wb != NoWriteBack)
11394 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11398 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11400 unsigned rn = INSTR (14, 10);
11401 unsigned rd = INSTR (9, 5);
11402 unsigned rm = INSTR (4, 0);
11403 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11413 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11414 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11419 if (wb != NoWriteBack)
11420 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11424 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11426 unsigned rn = INSTR (14, 10);
11427 unsigned rd = INSTR (9, 5);
11428 unsigned rm = INSTR (4, 0);
11429 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11439 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11440 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11445 if (wb != NoWriteBack)
11446 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11450 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11453 unsigned rn = INSTR (14, 10);
11454 unsigned rd = INSTR (9, 5);
11455 unsigned rm = INSTR (4, 0);
11456 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11466 aarch64_get_mem_long_double (cpu, address, & a);
11467 aarch64_set_FP_long_double (cpu, rm, a);
11468 aarch64_get_mem_long_double (cpu, address + 16, & a);
11469 aarch64_set_FP_long_double (cpu, rn, a);
11474 if (wb != NoWriteBack)
11475 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11479 dex_load_store_pair_fp (sim_cpu *cpu)
11481 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11482 instr[29,25] = instruction encoding
11483 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11484 instr[22] = load/store (1=> load)
11485 instr[21,15] = signed, scaled, offset
11488 instr[ 4, 0] = Rm */
11490 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11491 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11495 case 2: store_pair_float (cpu, offset, Post); return;
11496 case 3: load_pair_float (cpu, offset, Post); return;
11497 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11498 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11499 case 6: store_pair_float (cpu, offset, Pre); return;
11500 case 7: load_pair_float (cpu, offset, Pre); return;
11502 case 10: store_pair_double (cpu, offset, Post); return;
11503 case 11: load_pair_double (cpu, offset, Post); return;
11504 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11505 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11506 case 14: store_pair_double (cpu, offset, Pre); return;
11507 case 15: load_pair_double (cpu, offset, Pre); return;
11509 case 18: store_pair_long_double (cpu, offset, Post); return;
11510 case 19: load_pair_long_double (cpu, offset, Post); return;
11511 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11512 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11513 case 22: store_pair_long_double (cpu, offset, Pre); return;
11514 case 23: load_pair_long_double (cpu, offset, Pre); return;
11521 static inline unsigned
11522 vec_reg (unsigned v, unsigned o)
11524 return (v + o) & 0x3F;
11527 /* Load multiple N-element structures to M consecutive registers. */
11529 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11531 int all = INSTR (30, 30);
11532 unsigned size = INSTR (11, 10);
11533 unsigned vd = INSTR (4, 0);
11534 unsigned rpt = (N == M) ? 1 : M;
11535 unsigned selem = N;
11540 case 0: /* 8-bit operations. */
11541 for (i = 0; i < rpt; i++)
11542 for (j = 0; j < (8 + (8 * all)); j++)
11543 for (k = 0; k < selem; k++)
11545 aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11546 aarch64_get_mem_u8 (cpu, address));
11551 case 1: /* 16-bit operations. */
11552 for (i = 0; i < rpt; i++)
11553 for (j = 0; j < (4 + (4 * all)); j++)
11554 for (k = 0; k < selem; k++)
11556 aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11557 aarch64_get_mem_u16 (cpu, address));
11562 case 2: /* 32-bit operations. */
11563 for (i = 0; i < rpt; i++)
11564 for (j = 0; j < (2 + (2 * all)); j++)
11565 for (k = 0; k < selem; k++)
11567 aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11568 aarch64_get_mem_u32 (cpu, address));
11573 case 3: /* 64-bit operations. */
11574 for (i = 0; i < rpt; i++)
11575 for (j = 0; j < (1 + all); j++)
11576 for (k = 0; k < selem; k++)
11578 aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11579 aarch64_get_mem_u64 (cpu, address));
11586 /* Load multiple 4-element structures into four consecutive registers. */
11588 LD4 (sim_cpu *cpu, uint64_t address)
11590 vec_load (cpu, address, 4, 4);
11593 /* Load multiple 3-element structures into three consecutive registers. */
11595 LD3 (sim_cpu *cpu, uint64_t address)
11597 vec_load (cpu, address, 3, 3);
11600 /* Load multiple 2-element structures into two consecutive registers. */
11602 LD2 (sim_cpu *cpu, uint64_t address)
11604 vec_load (cpu, address, 2, 2);
11607 /* Load multiple 1-element structures into one register. */
11609 LD1_1 (sim_cpu *cpu, uint64_t address)
11611 vec_load (cpu, address, 1, 1);
11614 /* Load multiple 1-element structures into two registers. */
11616 LD1_2 (sim_cpu *cpu, uint64_t address)
11618 vec_load (cpu, address, 1, 2);
11621 /* Load multiple 1-element structures into three registers. */
11623 LD1_3 (sim_cpu *cpu, uint64_t address)
11625 vec_load (cpu, address, 1, 3);
11628 /* Load multiple 1-element structures into four registers. */
11630 LD1_4 (sim_cpu *cpu, uint64_t address)
11632 vec_load (cpu, address, 1, 4);
11635 /* Store multiple N-element structures from M consecutive registers. */
11637 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11639 int all = INSTR (30, 30);
11640 unsigned size = INSTR (11, 10);
11641 unsigned vd = INSTR (4, 0);
11642 unsigned rpt = (N == M) ? 1 : M;
11643 unsigned selem = N;
11648 case 0: /* 8-bit operations. */
11649 for (i = 0; i < rpt; i++)
11650 for (j = 0; j < (8 + (8 * all)); j++)
11651 for (k = 0; k < selem; k++)
11655 aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11660 case 1: /* 16-bit operations. */
11661 for (i = 0; i < rpt; i++)
11662 for (j = 0; j < (4 + (4 * all)); j++)
11663 for (k = 0; k < selem; k++)
11665 aarch64_set_mem_u16
11667 aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11672 case 2: /* 32-bit operations. */
11673 for (i = 0; i < rpt; i++)
11674 for (j = 0; j < (2 + (2 * all)); j++)
11675 for (k = 0; k < selem; k++)
11677 aarch64_set_mem_u32
11679 aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11684 case 3: /* 64-bit operations. */
11685 for (i = 0; i < rpt; i++)
11686 for (j = 0; j < (1 + all); j++)
11687 for (k = 0; k < selem; k++)
11689 aarch64_set_mem_u64
11691 aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11698 /* Store multiple 4-element structure from four consecutive registers. */
11700 ST4 (sim_cpu *cpu, uint64_t address)
11702 vec_store (cpu, address, 4, 4);
11705 /* Store multiple 3-element structures from three consecutive registers. */
11707 ST3 (sim_cpu *cpu, uint64_t address)
11709 vec_store (cpu, address, 3, 3);
11712 /* Store multiple 2-element structures from two consecutive registers. */
11714 ST2 (sim_cpu *cpu, uint64_t address)
11716 vec_store (cpu, address, 2, 2);
11719 /* Store multiple 1-element structures from one register. */
11721 ST1_1 (sim_cpu *cpu, uint64_t address)
11723 vec_store (cpu, address, 1, 1);
11726 /* Store multiple 1-element structures from two registers. */
11728 ST1_2 (sim_cpu *cpu, uint64_t address)
11730 vec_store (cpu, address, 1, 2);
11733 /* Store multiple 1-element structures from three registers. */
11735 ST1_3 (sim_cpu *cpu, uint64_t address)
11737 vec_store (cpu, address, 1, 3);
11740 /* Store multiple 1-element structures from four registers. */
11742 ST1_4 (sim_cpu *cpu, uint64_t address)
11744 vec_store (cpu, address, 1, 4);
11747 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11750 switch (INSTR (15, 14)) \
11753 lane = (full << 3) | (s << 2) | size; \
11758 if ((size & 1) == 1) \
11760 lane = (full << 2) | (s << 1) | (size >> 1); \
11765 if ((size & 2) == 2) \
11768 if ((size & 1) == 0) \
11770 lane = (full << 1) | s; \
11788 /* Load single structure into one lane of N registers. */
11790 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11793 instr[30] = element selector 0=>half, 1=>all elements
11794 instr[29,24] = 00 1101
11795 instr[23] = 0=>simple, 1=>post
11797 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11798 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11799 11111 (immediate post inc)
11800 instr[15,13] = opcode
11801 instr[12] = S, used for lane number
11802 instr[11,10] = size, also used for lane number
11803 instr[9,5] = address
11806 unsigned full = INSTR (30, 30);
11807 unsigned vd = INSTR (4, 0);
11808 unsigned size = INSTR (11, 10);
11809 unsigned s = INSTR (12, 12);
11810 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11814 NYI_assert (29, 24, 0x0D);
11815 NYI_assert (22, 22, 1);
11817 /* Compute the lane number first (using size), and then compute size. */
11818 LDn_STn_SINGLE_LANE_AND_SIZE ();
11820 for (i = 0; i < nregs; i++)
11825 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11826 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11832 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11833 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11839 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11840 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11846 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11847 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11853 /* Store single structure from one lane from N registers. */
11855 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11858 instr[30] = element selector 0=>half, 1=>all elements
11859 instr[29,24] = 00 1101
11860 instr[23] = 0=>simple, 1=>post
11862 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11863 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11864 11111 (immediate post inc)
11865 instr[15,13] = opcode
11866 instr[12] = S, used for lane number
11867 instr[11,10] = size, also used for lane number
11868 instr[9,5] = address
11871 unsigned full = INSTR (30, 30);
11872 unsigned vd = INSTR (4, 0);
11873 unsigned size = INSTR (11, 10);
11874 unsigned s = INSTR (12, 12);
11875 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11879 NYI_assert (29, 24, 0x0D);
11880 NYI_assert (22, 22, 0);
11882 /* Compute the lane number first (using size), and then compute size. */
11883 LDn_STn_SINGLE_LANE_AND_SIZE ();
11885 for (i = 0; i < nregs; i++)
11890 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11891 aarch64_set_mem_u8 (cpu, address + i, val);
11897 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11898 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11904 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11905 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11911 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11912 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11918 /* Load single structure into all lanes of N registers. */
11920 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11923 instr[30] = element selector 0=>half, 1=>all elements
11924 instr[29,24] = 00 1101
11925 instr[23] = 0=>simple, 1=>post
11927 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11928 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11929 11111 (immediate post inc)
11931 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11933 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11934 10=> word(s), 11=> double(d)
11935 instr[9,5] = address
11938 unsigned full = INSTR (30, 30);
11939 unsigned vd = INSTR (4, 0);
11940 unsigned size = INSTR (11, 10);
11941 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11944 NYI_assert (29, 24, 0x0D);
11945 NYI_assert (22, 22, 1);
11946 NYI_assert (15, 14, 3);
11947 NYI_assert (12, 12, 0);
11949 for (n = 0; n < nregs; n++)
11954 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11955 for (i = 0; i < (full ? 16 : 8); i++)
11956 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11962 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11963 for (i = 0; i < (full ? 8 : 4); i++)
11964 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11970 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11971 for (i = 0; i < (full ? 4 : 2); i++)
11972 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11978 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11979 for (i = 0; i < (full ? 2 : 1); i++)
11980 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11990 do_vec_load_store (sim_cpu *cpu)
11992 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11995 instr[30] = element selector 0=>half, 1=>all elements
11996 instr[29,25] = 00110
11997 instr[24] = 0=>multiple struct, 1=>single struct
11998 instr[23] = 0=>simple, 1=>post
11999 instr[22] = 0=>store, 1=>load
12000 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
12001 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12002 11111 (immediate post inc)
12003 instr[15,12] = elements and destinations. eg for load:
12004 0000=>LD4 => load multiple 4-element to
12005 four consecutive registers
12006 0100=>LD3 => load multiple 3-element to
12007 three consecutive registers
12008 1000=>LD2 => load multiple 2-element to
12009 two consecutive registers
12010 0010=>LD1 => load multiple 1-element to
12011 four consecutive registers
12012 0110=>LD1 => load multiple 1-element to
12013 three consecutive registers
12014 1010=>LD1 => load multiple 1-element to
12015 two consecutive registers
12016 0111=>LD1 => load multiple 1-element to
12020 instr[11,10] = element size 00=> byte(b), 01=> half(h),
12021 10=> word(s), 11=> double(d)
12022 instr[9,5] = Vn, can be SP
12032 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12035 single = INSTR (24, 24);
12036 post = INSTR (23, 23);
12037 load = INSTR (22, 22);
12038 type = INSTR (15, 12);
12040 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12042 if (! single && INSTR (21, 21) != 0)
12047 unsigned vm = INSTR (20, 16);
12051 unsigned sizeof_operation;
12055 if ((type >= 0) && (type <= 11))
12057 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12058 switch (INSTR (15, 14))
12061 sizeof_operation = nregs * 1;
12064 sizeof_operation = nregs * 2;
12067 if (INSTR (10, 10) == 0)
12068 sizeof_operation = nregs * 4;
12070 sizeof_operation = nregs * 8;
12076 else if (type == 0xC)
12078 sizeof_operation = INSTR (21, 21) ? 2 : 1;
12079 sizeof_operation <<= INSTR (11, 10);
12081 else if (type == 0xE)
12083 sizeof_operation = INSTR (21, 21) ? 4 : 3;
12084 sizeof_operation <<= INSTR (11, 10);
12093 case 0: sizeof_operation = 32; break;
12094 case 4: sizeof_operation = 24; break;
12095 case 8: sizeof_operation = 16; break;
12098 /* One register, immediate offset variant. */
12099 sizeof_operation = 8;
12103 /* Two registers, immediate offset variant. */
12104 sizeof_operation = 16;
12108 /* Three registers, immediate offset variant. */
12109 sizeof_operation = 24;
12113 /* Four registers, immediate offset variant. */
12114 sizeof_operation = 32;
12121 if (INSTR (30, 30))
12122 sizeof_operation *= 2;
12125 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12128 aarch64_set_reg_u64 (cpu, vn, SP_OK,
12129 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12133 NYI_assert (20, 16, 0);
12140 if ((type >= 0) && (type <= 11))
12141 do_vec_LDn_single (cpu, address);
12142 else if ((type == 0xC) || (type == 0xE))
12143 do_vec_LDnR (cpu, address);
12150 if ((type >= 0) && (type <= 11))
12152 do_vec_STn_single (cpu, address);
12163 case 0: LD4 (cpu, address); return;
12164 case 4: LD3 (cpu, address); return;
12165 case 8: LD2 (cpu, address); return;
12166 case 2: LD1_4 (cpu, address); return;
12167 case 6: LD1_3 (cpu, address); return;
12168 case 10: LD1_2 (cpu, address); return;
12169 case 7: LD1_1 (cpu, address); return;
12179 case 0: ST4 (cpu, address); return;
12180 case 4: ST3 (cpu, address); return;
12181 case 8: ST2 (cpu, address); return;
12182 case 2: ST1_4 (cpu, address); return;
12183 case 6: ST1_3 (cpu, address); return;
12184 case 10: ST1_2 (cpu, address); return;
12185 case 7: ST1_1 (cpu, address); return;
12192 dexLdSt (sim_cpu *cpu)
12194 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12195 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12196 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12197 bits [29,28:26] of a LS are the secondary dispatch vector. */
12198 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12203 dexLoadExclusive (cpu); return;
12207 dexLoadLiteral (cpu); return;
12211 dexLoadOther (cpu); return;
12213 case LS_ADVSIMD_001:
12214 do_vec_load_store (cpu); return;
12217 dex_load_store_pair_gr (cpu); return;
12220 dex_load_store_pair_fp (cpu); return;
12223 /* Should never reach here. */
12228 /* Specific decode and execute for group Data Processing Register. */
12231 dexLogicalShiftedRegister (sim_cpu *cpu)
12233 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12235 instr[28:24] = 01010
12236 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12239 instr[15,10] = count : must be 0xxxxx for 32 bit
12243 uint32_t size = INSTR (31, 31);
12244 Shift shiftType = INSTR (23, 22);
12245 uint32_t count = INSTR (15, 10);
12247 /* 32 bit operations must have count[5] = 0.
12248 or else we have an UNALLOC. */
12249 if (size == 0 && uimm (count, 5, 5))
12252 /* Dispatch on size:op:N. */
12253 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12255 case 0: and32_shift (cpu, shiftType, count); return;
12256 case 1: bic32_shift (cpu, shiftType, count); return;
12257 case 2: orr32_shift (cpu, shiftType, count); return;
12258 case 3: orn32_shift (cpu, shiftType, count); return;
12259 case 4: eor32_shift (cpu, shiftType, count); return;
12260 case 5: eon32_shift (cpu, shiftType, count); return;
12261 case 6: ands32_shift (cpu, shiftType, count); return;
12262 case 7: bics32_shift (cpu, shiftType, count); return;
12263 case 8: and64_shift (cpu, shiftType, count); return;
12264 case 9: bic64_shift (cpu, shiftType, count); return;
12265 case 10:orr64_shift (cpu, shiftType, count); return;
12266 case 11:orn64_shift (cpu, shiftType, count); return;
12267 case 12:eor64_shift (cpu, shiftType, count); return;
12268 case 13:eon64_shift (cpu, shiftType, count); return;
12269 case 14:ands64_shift (cpu, shiftType, count); return;
12270 case 15:bics64_shift (cpu, shiftType, count); return;
12274 /* 32 bit conditional select. */
12276 csel32 (sim_cpu *cpu, CondCode cc)
12278 unsigned rm = INSTR (20, 16);
12279 unsigned rn = INSTR (9, 5);
12280 unsigned rd = INSTR (4, 0);
12282 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12283 testConditionCode (cpu, cc)
12284 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12285 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12288 /* 64 bit conditional select. */
12290 csel64 (sim_cpu *cpu, CondCode cc)
12292 unsigned rm = INSTR (20, 16);
12293 unsigned rn = INSTR (9, 5);
12294 unsigned rd = INSTR (4, 0);
12296 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12297 testConditionCode (cpu, cc)
12298 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12299 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12302 /* 32 bit conditional increment. */
12304 csinc32 (sim_cpu *cpu, CondCode cc)
12306 unsigned rm = INSTR (20, 16);
12307 unsigned rn = INSTR (9, 5);
12308 unsigned rd = INSTR (4, 0);
12310 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12311 testConditionCode (cpu, cc)
12312 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12313 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12316 /* 64 bit conditional increment. */
12318 csinc64 (sim_cpu *cpu, CondCode cc)
12320 unsigned rm = INSTR (20, 16);
12321 unsigned rn = INSTR (9, 5);
12322 unsigned rd = INSTR (4, 0);
12324 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12325 testConditionCode (cpu, cc)
12326 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12327 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12330 /* 32 bit conditional invert. */
12332 csinv32 (sim_cpu *cpu, CondCode cc)
12334 unsigned rm = INSTR (20, 16);
12335 unsigned rn = INSTR (9, 5);
12336 unsigned rd = INSTR (4, 0);
12338 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12339 testConditionCode (cpu, cc)
12340 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12341 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12344 /* 64 bit conditional invert. */
12346 csinv64 (sim_cpu *cpu, CondCode cc)
12348 unsigned rm = INSTR (20, 16);
12349 unsigned rn = INSTR (9, 5);
12350 unsigned rd = INSTR (4, 0);
12352 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12353 testConditionCode (cpu, cc)
12354 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12355 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12358 /* 32 bit conditional negate. */
12360 csneg32 (sim_cpu *cpu, CondCode cc)
12362 unsigned rm = INSTR (20, 16);
12363 unsigned rn = INSTR (9, 5);
12364 unsigned rd = INSTR (4, 0);
12366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12367 testConditionCode (cpu, cc)
12368 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12369 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12372 /* 64 bit conditional negate. */
12374 csneg64 (sim_cpu *cpu, CondCode cc)
12376 unsigned rm = INSTR (20, 16);
12377 unsigned rn = INSTR (9, 5);
12378 unsigned rd = INSTR (4, 0);
12380 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12381 testConditionCode (cpu, cc)
12382 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12383 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12387 dexCondSelect (sim_cpu *cpu)
12389 /* instr[28,21] = 11011011
12390 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12391 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12392 100 ==> CSINV, 101 ==> CSNEG,
12394 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12395 instr[15,12] = cond
12396 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12398 CondCode cc = INSTR (15, 12);
12399 uint32_t S = INSTR (29, 29);
12400 uint32_t op2 = INSTR (11, 10);
12408 switch ((INSTR (31, 30) << 1) | op2)
12410 case 0: csel32 (cpu, cc); return;
12411 case 1: csinc32 (cpu, cc); return;
12412 case 2: csinv32 (cpu, cc); return;
12413 case 3: csneg32 (cpu, cc); return;
12414 case 4: csel64 (cpu, cc); return;
12415 case 5: csinc64 (cpu, cc); return;
12416 case 6: csinv64 (cpu, cc); return;
12417 case 7: csneg64 (cpu, cc); return;
12421 /* Some helpers for counting leading 1 or 0 bits. */
12423 /* Counts the number of leading bits which are the same
12424 in a 32 bit value in the range 1 to 32. */
12426 leading32 (uint32_t value)
12428 int32_t mask= 0xffff0000;
12429 uint32_t count= 16; /* Counts number of bits set in mask. */
12430 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12431 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12433 while (lo + 1 < hi)
12435 int32_t test = (value & mask);
12437 if (test == 0 || test == mask)
12440 count = (lo + hi) / 2;
12441 mask >>= (count - lo);
12446 count = (lo + hi) / 2;
12447 mask <<= hi - count;
12456 test = (value & mask);
12458 if (test == 0 || test == mask)
12467 /* Counts the number of leading bits which are the same
12468 in a 64 bit value in the range 1 to 64. */
12470 leading64 (uint64_t value)
12472 int64_t mask= 0xffffffff00000000LL;
12473 uint64_t count = 32; /* Counts number of bits set in mask. */
12474 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12475 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12477 while (lo + 1 < hi)
12479 int64_t test = (value & mask);
12481 if (test == 0 || test == mask)
12484 count = (lo + hi) / 2;
12485 mask >>= (count - lo);
12490 count = (lo + hi) / 2;
12491 mask <<= hi - count;
12500 test = (value & mask);
12502 if (test == 0 || test == mask)
12511 /* Bit operations. */
12512 /* N.B register args may not be SP. */
12514 /* 32 bit count leading sign bits. */
12516 cls32 (sim_cpu *cpu)
12518 unsigned rn = INSTR (9, 5);
12519 unsigned rd = INSTR (4, 0);
12521 /* N.B. the result needs to exclude the leading bit. */
12522 aarch64_set_reg_u64
12523 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12526 /* 64 bit count leading sign bits. */
12528 cls64 (sim_cpu *cpu)
12530 unsigned rn = INSTR (9, 5);
12531 unsigned rd = INSTR (4, 0);
12533 /* N.B. the result needs to exclude the leading bit. */
12534 aarch64_set_reg_u64
12535 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12538 /* 32 bit count leading zero bits. */
12540 clz32 (sim_cpu *cpu)
12542 unsigned rn = INSTR (9, 5);
12543 unsigned rd = INSTR (4, 0);
12544 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12546 /* if the sign (top) bit is set then the count is 0. */
12547 if (pick32 (value, 31, 31))
12548 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12550 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12553 /* 64 bit count leading zero bits. */
12555 clz64 (sim_cpu *cpu)
12557 unsigned rn = INSTR (9, 5);
12558 unsigned rd = INSTR (4, 0);
12559 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12561 /* if the sign (top) bit is set then the count is 0. */
12562 if (pick64 (value, 63, 63))
12563 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12565 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12568 /* 32 bit reverse bits. */
12570 rbit32 (sim_cpu *cpu)
12572 unsigned rn = INSTR (9, 5);
12573 unsigned rd = INSTR (4, 0);
12574 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12575 uint32_t result = 0;
12578 for (i = 0; i < 32; i++)
12581 result |= (value & 1);
12584 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12587 /* 64 bit reverse bits. */
12589 rbit64 (sim_cpu *cpu)
12591 unsigned rn = INSTR (9, 5);
12592 unsigned rd = INSTR (4, 0);
12593 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12594 uint64_t result = 0;
12597 for (i = 0; i < 64; i++)
12600 result |= (value & 1UL);
12603 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12606 /* 32 bit reverse bytes. */
12608 rev32 (sim_cpu *cpu)
12610 unsigned rn = INSTR (9, 5);
12611 unsigned rd = INSTR (4, 0);
12612 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12613 uint32_t result = 0;
12616 for (i = 0; i < 4; i++)
12619 result |= (value & 0xff);
12622 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12625 /* 64 bit reverse bytes. */
12627 rev64 (sim_cpu *cpu)
12629 unsigned rn = INSTR (9, 5);
12630 unsigned rd = INSTR (4, 0);
12631 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12632 uint64_t result = 0;
12635 for (i = 0; i < 8; i++)
12638 result |= (value & 0xffULL);
12641 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12644 /* 32 bit reverse shorts. */
12645 /* N.B.this reverses the order of the bytes in each half word. */
12647 revh32 (sim_cpu *cpu)
12649 unsigned rn = INSTR (9, 5);
12650 unsigned rd = INSTR (4, 0);
12651 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12652 uint32_t result = 0;
12655 for (i = 0; i < 2; i++)
12658 result |= (value & 0x00ff00ff);
12661 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12664 /* 64 bit reverse shorts. */
12665 /* N.B.this reverses the order of the bytes in each half word. */
12667 revh64 (sim_cpu *cpu)
12669 unsigned rn = INSTR (9, 5);
12670 unsigned rd = INSTR (4, 0);
12671 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12672 uint64_t result = 0;
12675 for (i = 0; i < 2; i++)
12678 result |= (value & 0x00ff00ff00ff00ffULL);
12681 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12685 dexDataProc1Source (sim_cpu *cpu)
12688 instr[28,21] = 111010110
12689 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12690 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12691 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12692 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12693 000010 ==> REV, 000011 ==> UNALLOC
12694 000100 ==> CLZ, 000101 ==> CLS
12696 instr[9,5] = rn : may not be SP
12697 instr[4,0] = rd : may not be SP. */
12699 uint32_t S = INSTR (29, 29);
12700 uint32_t opcode2 = INSTR (20, 16);
12701 uint32_t opcode = INSTR (15, 10);
12702 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12715 case 0: rbit32 (cpu); return;
12716 case 1: revh32 (cpu); return;
12717 case 2: rev32 (cpu); return;
12718 case 4: clz32 (cpu); return;
12719 case 5: cls32 (cpu); return;
12720 case 8: rbit64 (cpu); return;
12721 case 9: revh64 (cpu); return;
12722 case 10:rev32 (cpu); return;
12723 case 11:rev64 (cpu); return;
12724 case 12:clz64 (cpu); return;
12725 case 13:cls64 (cpu); return;
12726 default: HALT_UNALLOC;
12731 Shifts by count supplied in register.
12732 N.B register args may not be SP.
12733 These all use the shifted auxiliary function for
12734 simplicity and clarity. Writing the actual shift
12735 inline would avoid a branch and so be faster but
12736 would also necessitate getting signs right. */
12738 /* 32 bit arithmetic shift right. */
12740 asrv32 (sim_cpu *cpu)
12742 unsigned rm = INSTR (20, 16);
12743 unsigned rn = INSTR (9, 5);
12744 unsigned rd = INSTR (4, 0);
12746 aarch64_set_reg_u64
12748 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12749 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12752 /* 64 bit arithmetic shift right. */
12754 asrv64 (sim_cpu *cpu)
12756 unsigned rm = INSTR (20, 16);
12757 unsigned rn = INSTR (9, 5);
12758 unsigned rd = INSTR (4, 0);
12760 aarch64_set_reg_u64
12762 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12763 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12766 /* 32 bit logical shift left. */
12768 lslv32 (sim_cpu *cpu)
12770 unsigned rm = INSTR (20, 16);
12771 unsigned rn = INSTR (9, 5);
12772 unsigned rd = INSTR (4, 0);
12774 aarch64_set_reg_u64
12776 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12777 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12780 /* 64 bit arithmetic shift left. */
12782 lslv64 (sim_cpu *cpu)
12784 unsigned rm = INSTR (20, 16);
12785 unsigned rn = INSTR (9, 5);
12786 unsigned rd = INSTR (4, 0);
12788 aarch64_set_reg_u64
12790 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12791 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12794 /* 32 bit logical shift right. */
12796 lsrv32 (sim_cpu *cpu)
12798 unsigned rm = INSTR (20, 16);
12799 unsigned rn = INSTR (9, 5);
12800 unsigned rd = INSTR (4, 0);
12802 aarch64_set_reg_u64
12804 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12805 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12808 /* 64 bit logical shift right. */
12810 lsrv64 (sim_cpu *cpu)
12812 unsigned rm = INSTR (20, 16);
12813 unsigned rn = INSTR (9, 5);
12814 unsigned rd = INSTR (4, 0);
12816 aarch64_set_reg_u64
12818 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12819 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12822 /* 32 bit rotate right. */
12824 rorv32 (sim_cpu *cpu)
12826 unsigned rm = INSTR (20, 16);
12827 unsigned rn = INSTR (9, 5);
12828 unsigned rd = INSTR (4, 0);
12830 aarch64_set_reg_u64
12832 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12833 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12836 /* 64 bit rotate right. */
12838 rorv64 (sim_cpu *cpu)
12840 unsigned rm = INSTR (20, 16);
12841 unsigned rn = INSTR (9, 5);
12842 unsigned rd = INSTR (4, 0);
12844 aarch64_set_reg_u64
12846 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12847 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12853 /* 32 bit signed divide. */
12855 cpuiv32 (sim_cpu *cpu)
12857 unsigned rm = INSTR (20, 16);
12858 unsigned rn = INSTR (9, 5);
12859 unsigned rd = INSTR (4, 0);
12860 /* N.B. the pseudo-code does the divide using 64 bit data. */
12861 /* TODO : check that this rounds towards zero as required. */
12862 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12863 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12865 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12866 divisor ? ((int32_t) (dividend / divisor)) : 0);
12869 /* 64 bit signed divide. */
12871 cpuiv64 (sim_cpu *cpu)
12873 unsigned rm = INSTR (20, 16);
12874 unsigned rn = INSTR (9, 5);
12875 unsigned rd = INSTR (4, 0);
12877 /* TODO : check that this rounds towards zero as required. */
12878 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12880 aarch64_set_reg_s64
12882 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12885 /* 32 bit unsigned divide. */
12887 udiv32 (sim_cpu *cpu)
12889 unsigned rm = INSTR (20, 16);
12890 unsigned rn = INSTR (9, 5);
12891 unsigned rd = INSTR (4, 0);
12893 /* N.B. the pseudo-code does the divide using 64 bit data. */
12894 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12895 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12897 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12898 divisor ? (uint32_t) (dividend / divisor) : 0);
12901 /* 64 bit unsigned divide. */
12903 udiv64 (sim_cpu *cpu)
12905 unsigned rm = INSTR (20, 16);
12906 unsigned rn = INSTR (9, 5);
12907 unsigned rd = INSTR (4, 0);
12909 /* TODO : check that this rounds towards zero as required. */
12910 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12912 aarch64_set_reg_u64
12914 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12918 dexDataProc2Source (sim_cpu *cpu)
12920 /* assert instr[30] == 0
12921 instr[28,21] == 11010110
12922 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12923 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12924 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12925 001000 ==> LSLV, 001001 ==> LSRV
12926 001010 ==> ASRV, 001011 ==> RORV
12930 uint32_t S = INSTR (29, 29);
12931 uint32_t opcode = INSTR (15, 10);
12939 dispatch = ( (INSTR (31, 31) << 3)
12940 | (uimm (opcode, 3, 3) << 2)
12941 | uimm (opcode, 1, 0));
12944 case 2: udiv32 (cpu); return;
12945 case 3: cpuiv32 (cpu); return;
12946 case 4: lslv32 (cpu); return;
12947 case 5: lsrv32 (cpu); return;
12948 case 6: asrv32 (cpu); return;
12949 case 7: rorv32 (cpu); return;
12950 case 10: udiv64 (cpu); return;
12951 case 11: cpuiv64 (cpu); return;
12952 case 12: lslv64 (cpu); return;
12953 case 13: lsrv64 (cpu); return;
12954 case 14: asrv64 (cpu); return;
12955 case 15: rorv64 (cpu); return;
12956 default: HALT_UNALLOC;
12963 /* 32 bit multiply and add. */
12965 madd32 (sim_cpu *cpu)
12967 unsigned rm = INSTR (20, 16);
12968 unsigned ra = INSTR (14, 10);
12969 unsigned rn = INSTR (9, 5);
12970 unsigned rd = INSTR (4, 0);
12972 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12973 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12974 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12975 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12976 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12979 /* 64 bit multiply and add. */
12981 madd64 (sim_cpu *cpu)
12983 unsigned rm = INSTR (20, 16);
12984 unsigned ra = INSTR (14, 10);
12985 unsigned rn = INSTR (9, 5);
12986 unsigned rd = INSTR (4, 0);
12988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12989 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12990 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12991 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12992 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12995 /* 32 bit multiply and sub. */
12997 msub32 (sim_cpu *cpu)
12999 unsigned rm = INSTR (20, 16);
13000 unsigned ra = INSTR (14, 10);
13001 unsigned rn = INSTR (9, 5);
13002 unsigned rd = INSTR (4, 0);
13004 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13005 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13006 aarch64_get_reg_u32 (cpu, ra, NO_SP)
13007 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13008 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13011 /* 64 bit multiply and sub. */
13013 msub64 (sim_cpu *cpu)
13015 unsigned rm = INSTR (20, 16);
13016 unsigned ra = INSTR (14, 10);
13017 unsigned rn = INSTR (9, 5);
13018 unsigned rd = INSTR (4, 0);
13020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13021 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13022 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13023 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13024 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13027 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
13029 smaddl (sim_cpu *cpu)
13031 unsigned rm = INSTR (20, 16);
13032 unsigned ra = INSTR (14, 10);
13033 unsigned rn = INSTR (9, 5);
13034 unsigned rd = INSTR (4, 0);
13036 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13037 obtain a 64 bit product. */
13038 aarch64_set_reg_s64
13040 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13041 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13042 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13045 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13047 smsubl (sim_cpu *cpu)
13049 unsigned rm = INSTR (20, 16);
13050 unsigned ra = INSTR (14, 10);
13051 unsigned rn = INSTR (9, 5);
13052 unsigned rd = INSTR (4, 0);
13054 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13055 obtain a 64 bit product. */
13056 aarch64_set_reg_s64
13058 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13059 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13060 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13063 /* Integer Multiply/Divide. */
13065 /* First some macros and a helper function. */
13066 /* Macros to test or access elements of 64 bit words. */
13068 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
13069 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13070 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13071 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13072 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13073 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13075 /* Offset of sign bit in 64 bit signed integger. */
13076 #define SIGN_SHIFT_U64 63
13077 /* The sign bit itself -- also identifies the minimum negative int value. */
13078 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13079 /* Return true if a 64 bit signed int presented as an unsigned int is the
13080 most negative value. */
13081 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13082 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13083 int has its sign bit set to false. */
13084 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13085 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13086 an unsigned int has its sign bit set or not. */
13087 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13088 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
13089 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13091 /* Multiply two 64 bit ints and return.
13092 the hi 64 bits of the 128 bit product. */
13095 mul64hi (uint64_t value1, uint64_t value2)
13097 uint64_t resultmid1;
13099 uint64_t value1_lo = lowWordToU64 (value1);
13100 uint64_t value1_hi = highWordToU64 (value1) ;
13101 uint64_t value2_lo = lowWordToU64 (value2);
13102 uint64_t value2_hi = highWordToU64 (value2);
13104 /* Cross-multiply and collect results. */
13105 uint64_t xproductlo = value1_lo * value2_lo;
13106 uint64_t xproductmid1 = value1_lo * value2_hi;
13107 uint64_t xproductmid2 = value1_hi * value2_lo;
13108 uint64_t xproducthi = value1_hi * value2_hi;
13109 uint64_t carry = 0;
13110 /* Start accumulating 64 bit results. */
13111 /* Drop bottom half of lowest cross-product. */
13112 uint64_t resultmid = xproductlo >> 32;
13113 /* Add in middle products. */
13114 resultmid = resultmid + xproductmid1;
13116 /* Check for overflow. */
13117 if (resultmid < xproductmid1)
13118 /* Carry over 1 into top cross-product. */
13121 resultmid1 = resultmid + xproductmid2;
13123 /* Check for overflow. */
13124 if (resultmid1 < xproductmid2)
13125 /* Carry over 1 into top cross-product. */
13128 /* Drop lowest 32 bits of middle cross-product. */
13129 result = resultmid1 >> 32;
13130 /* Move carry bit to just above middle cross-product highest bit. */
13131 carry = carry << 32;
13133 /* Add top cross-product plus and any carry. */
13134 result += xproducthi + carry;
13139 /* Signed multiply high, source, source2 :
13140 64 bit, dest <-- high 64-bit of result. */
13142 smulh (sim_cpu *cpu)
13146 unsigned rm = INSTR (20, 16);
13147 unsigned rn = INSTR (9, 5);
13148 unsigned rd = INSTR (4, 0);
13149 GReg ra = INSTR (14, 10);
13150 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13151 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13159 /* Convert to unsigned and use the unsigned mul64hi routine
13160 the fix the sign up afterwards. */
13181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13183 uresult = mul64hi (uvalue1, uvalue2);
13188 /* Multiply 128-bit result by -1, which means highpart gets inverted,
13189 and has carry in added only if low part is 0. */
13191 if ((uvalue1 * uvalue2) == 0)
13195 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13198 /* Unsigned multiply add long -- source, source2 :
13199 32 bit, source3 : 64 bit. */
13201 umaddl (sim_cpu *cpu)
13203 unsigned rm = INSTR (20, 16);
13204 unsigned ra = INSTR (14, 10);
13205 unsigned rn = INSTR (9, 5);
13206 unsigned rd = INSTR (4, 0);
13208 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13209 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13210 obtain a 64 bit product. */
13211 aarch64_set_reg_u64
13213 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13214 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13215 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13218 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13220 umsubl (sim_cpu *cpu)
13222 unsigned rm = INSTR (20, 16);
13223 unsigned ra = INSTR (14, 10);
13224 unsigned rn = INSTR (9, 5);
13225 unsigned rd = INSTR (4, 0);
13227 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13228 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13229 obtain a 64 bit product. */
13230 aarch64_set_reg_u64
13232 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13233 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13234 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13237 /* Unsigned multiply high, source, source2 :
13238 64 bit, dest <-- high 64-bit of result. */
13240 umulh (sim_cpu *cpu)
13242 unsigned rm = INSTR (20, 16);
13243 unsigned rn = INSTR (9, 5);
13244 unsigned rd = INSTR (4, 0);
13245 GReg ra = INSTR (14, 10);
13250 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13251 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13252 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13253 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13257 dexDataProc3Source (sim_cpu *cpu)
13259 /* assert instr[28,24] == 11011. */
13260 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13261 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13262 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13263 instr[15] = o0 : 0/1 ==> ok
13264 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13265 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13266 0100 ==> SMULH, (64 bit only)
13267 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13268 1100 ==> UMULH (64 bit only)
13272 uint32_t size = INSTR (31, 31);
13273 uint32_t op54 = INSTR (30, 29);
13274 uint32_t op31 = INSTR (23, 21);
13275 uint32_t o0 = INSTR (15, 15);
13292 dispatch = (op31 << 1) | o0;
13296 case 0: madd64 (cpu); return;
13297 case 1: msub64 (cpu); return;
13298 case 2: smaddl (cpu); return;
13299 case 3: smsubl (cpu); return;
13300 case 4: smulh (cpu); return;
13301 case 10: umaddl (cpu); return;
13302 case 11: umsubl (cpu); return;
13303 case 12: umulh (cpu); return;
13304 default: HALT_UNALLOC;
13309 dexDPReg (sim_cpu *cpu)
13311 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13312 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13313 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13314 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13318 case DPREG_LOG_000:
13319 case DPREG_LOG_001:
13320 dexLogicalShiftedRegister (cpu); return;
13322 case DPREG_ADDSHF_010:
13323 dexAddSubtractShiftedRegister (cpu); return;
13325 case DPREG_ADDEXT_011:
13326 dexAddSubtractExtendedRegister (cpu); return;
13328 case DPREG_ADDCOND_100:
13330 /* This set bundles a variety of different operations. */
13332 /* 1) add/sub w carry. */
13333 uint32_t mask1 = 0x1FE00000U;
13334 uint32_t val1 = 0x1A000000U;
13335 /* 2) cond compare register/immediate. */
13336 uint32_t mask2 = 0x1FE00000U;
13337 uint32_t val2 = 0x1A400000U;
13338 /* 3) cond select. */
13339 uint32_t mask3 = 0x1FE00000U;
13340 uint32_t val3 = 0x1A800000U;
13341 /* 4) data proc 1/2 source. */
13342 uint32_t mask4 = 0x1FE00000U;
13343 uint32_t val4 = 0x1AC00000U;
13345 if ((aarch64_get_instr (cpu) & mask1) == val1)
13346 dexAddSubtractWithCarry (cpu);
13348 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13351 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13352 dexCondSelect (cpu);
13354 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13356 /* Bit 30 is clear for data proc 2 source
13357 and set for data proc 1 source. */
13358 if (aarch64_get_instr (cpu) & (1U << 30))
13359 dexDataProc1Source (cpu);
13361 dexDataProc2Source (cpu);
13365 /* Should not reach here. */
13371 case DPREG_3SRC_110:
13372 dexDataProc3Source (cpu); return;
13374 case DPREG_UNALLOC_101:
13377 case DPREG_3SRC_111:
13378 dexDataProc3Source (cpu); return;
13381 /* Should never reach here. */
13386 /* Unconditional Branch immediate.
13387 Offset is a PC-relative byte offset in the range +/- 128MiB.
13388 The offset is assumed to be raw from the decode i.e. the
13389 simulator is expected to scale them from word offsets to byte. */
13391 /* Unconditional branch. */
13393 buc (sim_cpu *cpu, int32_t offset)
13395 aarch64_set_next_PC_by_offset (cpu, offset);
13398 static unsigned stack_depth = 0;
13400 /* Unconditional branch and link -- writes return PC to LR. */
13402 bl (sim_cpu *cpu, int32_t offset)
13404 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13405 aarch64_save_LR (cpu);
13406 aarch64_set_next_PC_by_offset (cpu, offset);
13408 if (TRACE_BRANCH_P (cpu))
13412 " %*scall %" PRIx64 " [%s]"
13413 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13414 stack_depth, " ", aarch64_get_next_PC (cpu),
13415 aarch64_get_func (CPU_STATE (cpu),
13416 aarch64_get_next_PC (cpu)),
13417 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13418 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13419 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13424 /* Unconditional Branch register.
13425 Branch/return address is in source register. */
13427 /* Unconditional branch. */
13431 unsigned rn = INSTR (9, 5);
13432 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13433 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13436 /* Unconditional branch and link -- writes return PC to LR. */
13440 unsigned rn = INSTR (9, 5);
13442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13443 /* The pseudo code in the spec says we update LR before fetching.
13444 the value from the rn. */
13445 aarch64_save_LR (cpu);
13446 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13448 if (TRACE_BRANCH_P (cpu))
13452 " %*scall %" PRIx64 " [%s]"
13453 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13454 stack_depth, " ", aarch64_get_next_PC (cpu),
13455 aarch64_get_func (CPU_STATE (cpu),
13456 aarch64_get_next_PC (cpu)),
13457 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13458 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13459 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13464 /* Return -- assembler will default source to LR this is functionally
13465 equivalent to br but, presumably, unlike br it side effects the
13466 branch predictor. */
13470 unsigned rn = INSTR (9, 5);
13471 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13473 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13474 if (TRACE_BRANCH_P (cpu))
13477 " %*sreturn [result: %" PRIx64 "]",
13478 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13483 /* NOP -- we implement this and call it from the decode in case we
13484 want to intercept it later. */
13489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13492 /* Data synchronization barrier. */
13497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13500 /* Data memory barrier. */
13505 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13508 /* Instruction synchronization barrier. */
13513 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13517 dexBranchImmediate (sim_cpu *cpu)
13519 /* assert instr[30,26] == 00101
13520 instr[31] ==> 0 == B, 1 == BL
13521 instr[25,0] == imm26 branch offset counted in words. */
13523 uint32_t top = INSTR (31, 31);
13524 /* We have a 26 byte signed word offset which we need to pass to the
13525 execute routine as a signed byte offset. */
13526 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13534 /* Control Flow. */
13536 /* Conditional branch
13538 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13539 a bit position in the range 0 .. 63
13541 cc is a CondCode enum value as pulled out of the decode
13543 N.B. any offset register (source) can only be Xn or Wn. */
13546 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13548 /* The test returns TRUE if CC is met. */
13549 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13550 if (testConditionCode (cpu, cc))
13551 aarch64_set_next_PC_by_offset (cpu, offset);
13554 /* 32 bit branch on register non-zero. */
13556 cbnz32 (sim_cpu *cpu, int32_t offset)
13558 unsigned rt = INSTR (4, 0);
13560 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13561 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13562 aarch64_set_next_PC_by_offset (cpu, offset);
13565 /* 64 bit branch on register zero. */
13567 cbnz (sim_cpu *cpu, int32_t offset)
13569 unsigned rt = INSTR (4, 0);
13571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13572 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13573 aarch64_set_next_PC_by_offset (cpu, offset);
13576 /* 32 bit branch on register non-zero. */
13578 cbz32 (sim_cpu *cpu, int32_t offset)
13580 unsigned rt = INSTR (4, 0);
13582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13583 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13584 aarch64_set_next_PC_by_offset (cpu, offset);
13587 /* 64 bit branch on register zero. */
13589 cbz (sim_cpu *cpu, int32_t offset)
13591 unsigned rt = INSTR (4, 0);
13593 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13594 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13595 aarch64_set_next_PC_by_offset (cpu, offset);
13598 /* Branch on register bit test non-zero -- one size fits all. */
13600 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13602 unsigned rt = INSTR (4, 0);
13604 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13605 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13606 aarch64_set_next_PC_by_offset (cpu, offset);
13609 /* Branch on register bit test zero -- one size fits all. */
13611 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13613 unsigned rt = INSTR (4, 0);
13615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13616 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13617 aarch64_set_next_PC_by_offset (cpu, offset);
13621 dexCompareBranchImmediate (sim_cpu *cpu)
13623 /* instr[30,25] = 01 1010
13624 instr[31] = size : 0 ==> 32, 1 ==> 64
13625 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13626 instr[23,5] = simm19 branch offset counted in words
13629 uint32_t size = INSTR (31, 31);
13630 uint32_t op = INSTR (24, 24);
13631 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13636 cbz32 (cpu, offset);
13638 cbnz32 (cpu, offset);
13645 cbnz (cpu, offset);
13650 dexTestBranchImmediate (sim_cpu *cpu)
13652 /* instr[31] = b5 : bit 5 of test bit idx
13653 instr[30,25] = 01 1011
13654 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13655 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13656 instr[18,5] = simm14 : signed offset counted in words
13657 instr[4,0] = uimm5 */
13659 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13660 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13662 NYI_assert (30, 25, 0x1b);
13664 if (INSTR (24, 24) == 0)
13665 tbz (cpu, pos, offset);
13667 tbnz (cpu, pos, offset);
13671 dexCondBranchImmediate (sim_cpu *cpu)
13673 /* instr[31,25] = 010 1010
13674 instr[24] = op1; op => 00 ==> B.cond
13675 instr[23,5] = simm19 : signed offset counted in words
13677 instr[3,0] = cond */
13680 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13682 NYI_assert (31, 25, 0x2a);
13687 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13689 bcc (cpu, offset, INSTR (3, 0));
13693 dexBranchRegister (sim_cpu *cpu)
13695 /* instr[31,25] = 110 1011
13696 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13697 instr[20,16] = op2 : must be 11111
13698 instr[15,10] = op3 : must be 000000
13699 instr[4,0] = op2 : must be 11111. */
13701 uint32_t op = INSTR (24, 21);
13702 uint32_t op2 = INSTR (20, 16);
13703 uint32_t op3 = INSTR (15, 10);
13704 uint32_t op4 = INSTR (4, 0);
13706 NYI_assert (31, 25, 0x6b);
13708 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13722 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13723 /* anything else is unallocated. */
13724 uint32_t rn = INSTR (4, 0);
13729 if (op == 4 || op == 5)
13736 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13737 but this may not be available. So instead we define the values we need
13739 #define AngelSVC_Reason_Open 0x01
13740 #define AngelSVC_Reason_Close 0x02
13741 #define AngelSVC_Reason_Write 0x05
13742 #define AngelSVC_Reason_Read 0x06
13743 #define AngelSVC_Reason_IsTTY 0x09
13744 #define AngelSVC_Reason_Seek 0x0A
13745 #define AngelSVC_Reason_FLen 0x0C
13746 #define AngelSVC_Reason_Remove 0x0E
13747 #define AngelSVC_Reason_Rename 0x0F
13748 #define AngelSVC_Reason_Clock 0x10
13749 #define AngelSVC_Reason_Time 0x11
13750 #define AngelSVC_Reason_System 0x12
13751 #define AngelSVC_Reason_Errno 0x13
13752 #define AngelSVC_Reason_GetCmdLine 0x15
13753 #define AngelSVC_Reason_HeapInfo 0x16
13754 #define AngelSVC_Reason_ReportException 0x18
13755 #define AngelSVC_Reason_Elapsed 0x30
13759 handle_halt (sim_cpu *cpu, uint32_t val)
13761 uint64_t result = 0;
13763 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13766 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13767 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13768 sim_stopped, SIM_SIGTRAP);
13771 /* We have encountered an Angel SVC call. See if we can process it. */
13772 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13774 case AngelSVC_Reason_HeapInfo:
13776 /* Get the values. */
13777 uint64_t stack_top = aarch64_get_stack_start (cpu);
13778 uint64_t heap_base = aarch64_get_heap_start (cpu);
13780 /* Get the pointer */
13781 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13782 ptr = aarch64_get_mem_u64 (cpu, ptr);
13784 /* Fill in the memory block. */
13785 /* Start addr of heap. */
13786 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13787 /* End addr of heap. */
13788 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13789 /* Lowest stack addr. */
13790 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13791 /* Initial stack addr. */
13792 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13794 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13798 case AngelSVC_Reason_Open:
13800 /* Get the pointer */
13801 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13802 /* FIXME: For now we just assume that we will only be asked
13803 to open the standard file descriptors. */
13807 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13811 case AngelSVC_Reason_Close:
13813 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13814 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13819 case AngelSVC_Reason_Errno:
13821 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13824 case AngelSVC_Reason_Clock:
13826 #ifdef CLOCKS_PER_SEC
13827 (CLOCKS_PER_SEC >= 100)
13828 ? (clock () / (CLOCKS_PER_SEC / 100))
13829 : ((clock () * 100) / CLOCKS_PER_SEC)
13831 /* Presume unix... clock() returns microseconds. */
13835 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13838 case AngelSVC_Reason_GetCmdLine:
13840 /* Get the pointer */
13841 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13842 ptr = aarch64_get_mem_u64 (cpu, ptr);
13844 /* FIXME: No command line for now. */
13845 aarch64_set_mem_u64 (cpu, ptr, 0);
13846 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13850 case AngelSVC_Reason_IsTTY:
13852 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13855 case AngelSVC_Reason_Write:
13857 /* Get the pointer */
13858 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13859 /* Get the write control block. */
13860 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13861 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13862 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13864 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13865 PRIx64 " on descriptor %" PRIx64,
13870 TRACE_SYSCALL (cpu,
13871 " AngelSVC: Write: Suspiciously long write: %ld",
13873 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13874 sim_stopped, SIM_SIGBUS);
13878 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13882 TRACE (cpu, 0, "\n");
13883 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13884 (int) len, aarch64_get_mem_ptr (cpu, buf));
13885 TRACE (cpu, 0, "\n");
13889 TRACE_SYSCALL (cpu,
13890 " AngelSVC: Write: Unexpected file handle: %d",
13892 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13893 sim_stopped, SIM_SIGABRT);
13898 case AngelSVC_Reason_ReportException:
13900 /* Get the pointer */
13901 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13902 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13903 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13904 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13906 TRACE_SYSCALL (cpu,
13907 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13910 if (type == 0x20026)
13911 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13912 sim_exited, state);
13914 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13915 sim_stopped, SIM_SIGINT);
13919 case AngelSVC_Reason_Read:
13920 case AngelSVC_Reason_FLen:
13921 case AngelSVC_Reason_Seek:
13922 case AngelSVC_Reason_Remove:
13923 case AngelSVC_Reason_Time:
13924 case AngelSVC_Reason_System:
13925 case AngelSVC_Reason_Rename:
13926 case AngelSVC_Reason_Elapsed:
13928 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13929 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13930 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13931 sim_stopped, SIM_SIGTRAP);
13934 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13938 dexExcpnGen (sim_cpu *cpu)
13940 /* instr[31:24] = 11010100
13941 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13942 010 ==> HLT, 101 ==> DBG GEN EXCPN
13943 instr[20,5] = imm16
13944 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13945 instr[1,0] = LL : discriminates opc */
13947 uint32_t opc = INSTR (23, 21);
13948 uint32_t imm16 = INSTR (20, 5);
13949 uint32_t opc2 = INSTR (4, 2);
13952 NYI_assert (31, 24, 0xd4);
13959 /* We only implement HLT and BRK for now. */
13960 if (opc == 1 && LL == 0)
13962 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13963 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13964 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13967 if (opc == 2 && LL == 0)
13968 handle_halt (cpu, imm16);
13970 else if (opc == 0 || opc == 5)
13977 /* Stub for accessing system registers. */
13980 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13981 unsigned crm, unsigned op2)
13983 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13984 /* DCZID_EL0 - the Data Cache Zero ID register.
13985 We do not support DC ZVA at the moment, so
13986 we return a value with the disable bit set.
13987 We implement support for the DCZID register since
13988 it is used by the C library's memset function. */
13989 return ((uint64_t) 1) << 4;
13991 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13992 /* Cache Type Register. */
13993 return 0x80008000UL;
13995 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13996 /* TPIDR_EL0 - thread pointer id. */
13997 return aarch64_get_thread_id (cpu);
13999 if (op1 == 3 && crm == 4 && op2 == 0)
14000 return aarch64_get_FPCR (cpu);
14002 if (op1 == 3 && crm == 4 && op2 == 1)
14003 return aarch64_get_FPSR (cpu);
14005 else if (op1 == 3 && crm == 2 && op2 == 0)
14006 return aarch64_get_CPSR (cpu);
14012 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14013 unsigned crm, unsigned op2, uint64_t val)
14015 if (op1 == 3 && crm == 4 && op2 == 0)
14016 aarch64_set_FPCR (cpu, val);
14018 else if (op1 == 3 && crm == 4 && op2 == 1)
14019 aarch64_set_FPSR (cpu, val);
14021 else if (op1 == 3 && crm == 2 && op2 == 0)
14022 aarch64_set_CPSR (cpu, val);
14029 do_mrs (sim_cpu *cpu)
14031 /* instr[31:20] = 1101 0101 0001 1
14038 unsigned sys_op0 = INSTR (19, 19) + 2;
14039 unsigned sys_op1 = INSTR (18, 16);
14040 unsigned sys_crn = INSTR (15, 12);
14041 unsigned sys_crm = INSTR (11, 8);
14042 unsigned sys_op2 = INSTR (7, 5);
14043 unsigned rt = INSTR (4, 0);
14045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14046 aarch64_set_reg_u64 (cpu, rt, NO_SP,
14047 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14051 do_MSR_immediate (sim_cpu *cpu)
14053 /* instr[31:19] = 1101 0101 0000 0
14055 instr[15,12] = 0100
14058 instr[4,0] = 1 1111 */
14060 unsigned op1 = INSTR (18, 16);
14061 /*unsigned crm = INSTR (11, 8);*/
14062 unsigned op2 = INSTR (7, 5);
14064 NYI_assert (31, 19, 0x1AA0);
14065 NYI_assert (15, 12, 0x4);
14066 NYI_assert (4, 0, 0x1F);
14071 HALT_NYI; /* set SPSel. */
14078 HALT_NYI; /* set DAIFset. */
14080 HALT_NYI; /* set DAIFclr. */
14089 do_MSR_reg (sim_cpu *cpu)
14091 /* instr[31:20] = 1101 0101 0001
14099 unsigned sys_op0 = INSTR (19, 19) + 2;
14100 unsigned sys_op1 = INSTR (18, 16);
14101 unsigned sys_crn = INSTR (15, 12);
14102 unsigned sys_crm = INSTR (11, 8);
14103 unsigned sys_op2 = INSTR (7, 5);
14104 unsigned rt = INSTR (4, 0);
14106 NYI_assert (31, 20, 0xD51);
14108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14109 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14110 aarch64_get_reg_u64 (cpu, rt, NO_SP));
14114 do_SYS (sim_cpu *cpu)
14116 /* instr[31,19] = 1101 0101 0000 1
14122 NYI_assert (31, 19, 0x1AA1);
14124 /* FIXME: For now we just silently accept system ops. */
14128 dexSystem (sim_cpu *cpu)
14130 /* instr[31:22] = 1101 01010 0
14137 instr[4,0] = uimm5 */
14139 /* We are interested in HINT, DSB, DMB and ISB
14141 Hint #0 encodes NOOP (this is the only hint we care about)
14142 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14143 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14145 DSB, DMB, ISB are data store barrier, data memory barrier and
14146 instruction store barrier, respectively, where
14148 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14149 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14150 CRm<3:2> ==> domain, CRm<1:0> ==> types,
14151 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14152 10 ==> InerShareable, 11 ==> FullSystem
14153 types : 01 ==> Reads, 10 ==> Writes,
14154 11 ==> All, 00 ==> All (domain == FullSystem). */
14156 unsigned rt = INSTR (4, 0);
14158 NYI_assert (31, 22, 0x354);
14160 switch (INSTR (21, 12))
14165 /* NOP has CRm != 0000 OR. */
14166 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
14167 uint32_t crm = INSTR (11, 8);
14168 uint32_t op2 = INSTR (7, 5);
14170 if (crm != 0 || (op2 == 0 || op2 > 5))
14172 /* Actually call nop method so we can reimplement it later. */
14181 uint32_t op2 = INSTR (7, 5);
14186 case 4: dsb (cpu); return;
14187 case 5: dmb (cpu); return;
14188 case 6: isb (cpu); return;
14189 default: HALT_UNALLOC;
14200 do_SYS (cpu); /* DC is an alias of SYS. */
14204 if (INSTR (21, 20) == 0x1)
14206 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14207 do_MSR_immediate (cpu);
14215 dexBr (sim_cpu *cpu)
14217 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14218 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14219 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14220 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14225 return dexBranchImmediate (cpu);
14227 case BR_IMMCMP_001:
14228 /* Compare has bit 25 clear while test has it set. */
14229 if (!INSTR (25, 25))
14230 dexCompareBranchImmediate (cpu);
14232 dexTestBranchImmediate (cpu);
14235 case BR_IMMCOND_010:
14236 /* This is a conditional branch if bit 25 is clear otherwise
14238 if (!INSTR (25, 25))
14239 dexCondBranchImmediate (cpu);
14244 case BR_UNALLOC_011:
14248 dexBranchImmediate (cpu);
14251 case BR_IMMCMP_101:
14252 /* Compare has bit 25 clear while test has it set. */
14253 if (!INSTR (25, 25))
14254 dexCompareBranchImmediate (cpu);
14256 dexTestBranchImmediate (cpu);
14260 /* Unconditional branch reg has bit 25 set. */
14261 if (INSTR (25, 25))
14262 dexBranchRegister (cpu);
14264 /* This includes both Excpn Gen, System and unalloc operations.
14265 We need to decode the Excpn Gen operation BRK so we can plant
14266 debugger entry points.
14267 Excpn Gen operations have instr [24] = 0.
14268 we need to decode at least one of the System operations NOP
14269 which is an alias for HINT #0.
14270 System operations have instr [24,22] = 100. */
14271 else if (INSTR (24, 24) == 0)
14274 else if (INSTR (24, 22) == 4)
14282 case BR_UNALLOC_111:
14286 /* Should never reach here. */
14292 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14294 /* We need to check if gdb wants an in here. */
14295 /* checkBreak (cpu);. */
14297 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14301 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14302 case GROUP_LDST_0100: dexLdSt (cpu); break;
14303 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14304 case GROUP_LDST_0110: dexLdSt (cpu); break;
14305 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14306 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14307 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14308 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14309 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14310 case GROUP_LDST_1100: dexLdSt (cpu); break;
14311 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14312 case GROUP_LDST_1110: dexLdSt (cpu); break;
14313 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14315 case GROUP_UNALLOC_0001:
14316 case GROUP_UNALLOC_0010:
14317 case GROUP_UNALLOC_0011:
14321 /* Should never reach here. */
14327 aarch64_step (sim_cpu *cpu)
14329 uint64_t pc = aarch64_get_PC (cpu);
14331 if (pc == TOP_LEVEL_RETURN_PC)
14334 aarch64_set_next_PC (cpu, pc + 4);
14336 /* Code is always little-endian. */
14337 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14338 & aarch64_get_instr (cpu), pc, 4);
14339 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14341 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14342 aarch64_get_instr (cpu));
14343 TRACE_DISASM (cpu, pc);
14345 aarch64_decode_and_execute (cpu, pc);
14351 aarch64_run (SIM_DESC sd)
14353 sim_cpu *cpu = STATE_CPU (sd, 0);
14355 while (aarch64_step (cpu))
14357 aarch64_update_PC (cpu);
14359 if (sim_events_tick (sd))
14360 sim_events_process (sd);
14363 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14364 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14368 aarch64_init (sim_cpu *cpu, uint64_t pc)
14370 uint64_t sp = aarch64_get_stack_start (cpu);
14372 /* Install SP, FP and PC and set LR to -20
14373 so we can detect a top-level return. */
14374 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14375 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14376 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14377 aarch64_set_next_PC (cpu, pc);
14378 aarch64_update_PC (cpu);
14379 aarch64_init_LIT_table ();