1 /* simulator.c -- Interface for the AArch64 simulator.
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
5 Contributed by Red Hat.
7 This file is part of GDB.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include <sys/types.h>
31 #include "simulator.h"
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
45 #define HALT_UNALLOC \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
74 #define NYI_assert(HI, LO, EXPECTED) \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
82 /* Helper functions used by expandLogicalImmediate. */
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
95 return pickbits64 (val, N, N);
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
110 mask = 0xffffffffffffffffull;
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
136 /* Rotate to the left by simd_size - R. */
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
140 /* Replicate the value according to SIMD size. */
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
161 aarch64_init_LIT_table (void)
165 for (index = 0; index < LI_TABLE_SIZE; index++)
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
171 LITable [index] = expand_logical_immediate (imms, immr, N);
176 dexNotify (sim_cpu *cpu)
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
195 /* aarch64_notifyMethodExit (); */
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
204 /* secondary decode within top level groups */
207 dexPseudo (sim_cpu *cpu)
209 /* assert instr[28,27] = 00
211 We provide 2 pseudo instructions:
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
235 dispatch = INSTR (31, 15);
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
245 else if (dispatch == PSEUDO_NOTIFY)
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
261 ldur32 (sim_cpu *cpu, int32_t offset)
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
274 ldur64 (sim_cpu *cpu, int32_t offset)
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
287 ldurb32 (sim_cpu *cpu, int32_t offset)
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
300 ldursb32 (sim_cpu *cpu, int32_t offset)
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
313 ldursb64 (sim_cpu *cpu, int32_t offset)
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
326 ldurh32 (sim_cpu *cpu, int32_t offset)
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
339 ldursh32 (sim_cpu *cpu, int32_t offset)
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
352 ldursh64 (sim_cpu *cpu, int32_t offset)
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
365 ldursw (sim_cpu *cpu, int32_t offset)
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
381 stur32 (sim_cpu *cpu, int32_t offset)
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
392 /* 64 bit store 64 bit unscaled signed 9 bit */
394 stur64 (sim_cpu *cpu, int32_t offset)
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
405 /* 32 bit store byte unscaled signed 9 bit */
407 sturb (sim_cpu *cpu, int32_t offset)
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
418 /* 32 bit store short unscaled signed 9 bit */
420 sturh (sim_cpu *cpu, int32_t offset)
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
435 /* 32 bit pc-relative load */
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
439 unsigned rd = INSTR (4, 0);
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
447 /* 64 bit pc-relative load */
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
451 unsigned rd = INSTR (4, 0);
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
459 /* sign extended 32 bit pc-relative load */
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
463 unsigned rd = INSTR (4, 0);
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
471 /* float pc-relative load */
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
475 unsigned int rd = INSTR (4, 0);
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
483 /* double pc-relative load */
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
487 unsigned int st = INSTR (4, 0);
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
495 /* long double pc-relative load. */
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
547 /* Scalar Floating Point
549 FP load/store single register (4 addressing modes)
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
574 /* Load 8 bit with unsigned 12 bit offset. */
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
586 /* Load 16 bit scaled unsigned 12 bit. */
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
598 /* Load 32 bit scaled unsigned 12 bit. */
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
610 /* Load 64 bit scaled unsigned 12 bit. */
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
622 /* Load 128 bit scaled unsigned 12 bit. */
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
681 fldrd_wb (cpu, displacement, NoWriteBack);
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
715 fldrq_wb (cpu, displacement, NoWriteBack);
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
740 A separate method is provided for each possible addressing mode. */
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
764 if (rn == rt && wb != NoWriteBack)
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
823 if (rn == rt && wb != NoWriteBack)
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
883 if (rn == rt && wb != NoWriteBack)
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
931 if (rn == rt && wb != NoWriteBack)
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
954 ldrsb_wb (cpu, offset, NoWriteBack);
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
1000 if (rn == rt && wb != NoWriteBack)
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1061 if (rn == rt && wb != NoWriteBack)
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1125 if (rn == rt && wb != NoWriteBack)
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1153 /* rn may reference SP, rm and rt must reference ZR */
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1189 if (rn == rt && wb != NoWriteBack)
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1229 /* 32 bit store scaled unsigned 12 bit. */
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1251 if (rn == rt && wb != NoWriteBack)
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1286 /* 64 bit store scaled unsigned 12 bit. */
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1308 if (rn == rt && wb != NoWriteBack)
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1369 if (rn == rt && wb != NoWriteBack)
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1407 /* 32 bit store short scaled unsigned 12 bit. */
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1429 if (rn == rt && wb != NoWriteBack)
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1466 /* Prefetch unsigned 12 bit. */
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1481 /* TODO : implement prefetch of address. */
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1503 /* TODO : implement prefetch of address */
1506 /* 64 bit pc-relative prefetch. */
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1520 /* TODO : implement this */
1523 /* Load-store exclusive. */
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1576 dexLoadLiteral (sim_cpu *cpu)
1578 /* instr[29,27] == 011
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1614 /* 32 bit add immediate. */
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1626 /* 64 bit add immediate. */
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1650 if (result & (1 << 31))
1653 if (uresult != result)
1656 if (sresult != result)
1659 aarch64_set_CPSR (cpu, flags);
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1668 uint64_t result = value1 + value2;
1670 uint64_t signbit = 1ULL << 63;
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1687 aarch64_set_CPSR (cpu, flags);
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1693 uint32_t result = value1 - value2;
1695 uint32_t signbit = 1U << 31;
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1712 aarch64_set_CPSR (cpu, flags);
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1718 uint64_t result = value1 - value2;
1720 uint64_t signbit = 1ULL << 63;
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1737 aarch64_set_CPSR (cpu, flags);
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1750 if (result & (1 << 31))
1755 aarch64_set_CPSR (cpu, flags);
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1768 if (result & (1ULL << 63))
1773 aarch64_set_CPSR (cpu, flags);
1776 /* 32 bit add immediate set flags. */
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1790 /* 64 bit add immediate set flags. */
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1804 /* 32 bit sub immediate. */
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1816 /* 64 bit sub immediate. */
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1828 /* 32 bit sub immediate set flags. */
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1842 /* 64 bit sub immediate set flags. */
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1856 /* Data Processing Register. */
1858 /* First two helpers to perform the shift operations. */
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1867 return (value << count);
1869 return (value >> count);
1872 int32_t svalue = value;
1873 return (svalue >> count);
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1891 return (value << count);
1893 return (value >> count);
1896 int64_t svalue = value;
1897 return (svalue >> count);
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1912 N.B register args may not be SP. */
1914 /* 32 bit ADD shifted register. */
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1929 /* 64 bit ADD shifted register. */
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1944 /* 32 bit ADD shifted register setting flags. */
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1961 /* 64 bit ADD shifted register setting flags. */
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1978 /* 32 bit SUB shifted register. */
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1993 /* 64 bit SUB shifted register. */
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2008 /* 32 bit SUB shifted register setting flags. */
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2025 /* 64 bit SUB shifted register setting flags. */
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2090 /* 32 bit ADD extending register. */
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2119 /* 32 bit ADD extending register setting flags. */
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2152 /* 32 bit SUB extending register. */
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2181 /* 32 bit SUB extending register setting flags. */
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2231 NYI_assert (28, 24, 0x11);
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2319 /* Shift may not exceed 4. */
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2344 adc32 (sim_cpu *cpu)
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2357 /* 64 bit add with carry */
2359 adc64 (sim_cpu *cpu)
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2372 /* 32 bit add with carry setting flags. */
2374 adcs32 (sim_cpu *cpu)
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2389 /* 64 bit add with carry setting flags. */
2391 adcs64 (sim_cpu *cpu)
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2406 /* 32 bit sub with carry. */
2408 sbc32 (sim_cpu *cpu)
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2421 /* 64 bit sub with carry */
2423 sbc64 (sim_cpu *cpu)
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2436 /* 32 bit sub with carry setting flags */
2438 sbcs32 (sim_cpu *cpu)
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2454 /* 64 bit sub with carry setting flags */
2456 sbcs64 (sim_cpu *cpu)
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2484 uint32_t op2 = INSTR (15, 10);
2486 NYI_assert (28, 21, 0xD0);
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2513 For now we do it with a switch. */
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2549 instr[11] = compare reg (0) or const (1)
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2602 instr[15,10] = 000111
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2612 if (INSTR (20, 16) != vs)
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2623 do_vec_MOV_into_scalar (sim_cpu *cpu)
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,18] = element size and index
2629 instr[17,10] = 00 0011 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2636 NYI_assert (29, 21, 0x070);
2637 NYI_assert (17, 10, 0x0F);
2639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2640 switch (INSTR (20, 18))
2643 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2647 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2654 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2655 (cpu, vs, INSTR (20, 19)));
2664 do_vec_INS (sim_cpu *cpu)
2666 /* instr[31,21] = 01001110000
2667 instr[20,16] = element size and index
2668 instr[15,10] = 000111
2669 instr[9,5] = W source
2670 instr[4,0] = V dest */
2673 unsigned rs = INSTR (9, 5);
2674 unsigned vd = INSTR (4, 0);
2676 NYI_assert (31, 21, 0x270);
2677 NYI_assert (15, 10, 0x07);
2679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2682 index = INSTR (20, 17);
2683 aarch64_set_vec_u8 (cpu, vd, index,
2684 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2686 else if (INSTR (17, 17))
2688 index = INSTR (20, 18);
2689 aarch64_set_vec_u16 (cpu, vd, index,
2690 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2692 else if (INSTR (18, 18))
2694 index = INSTR (20, 19);
2695 aarch64_set_vec_u32 (cpu, vd, index,
2696 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2698 else if (INSTR (19, 19))
2700 index = INSTR (20, 20);
2701 aarch64_set_vec_u64 (cpu, vd, index,
2702 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2709 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2712 instr[30] = half(0)/full(1)
2713 instr[29,21] = 00 1110 000
2714 instr[20,16] = element size and index
2715 instr[15,10] = 0000 01
2716 instr[9,5] = V source
2717 instr[4,0] = V dest. */
2719 unsigned full = INSTR (30, 30);
2720 unsigned vs = INSTR (9, 5);
2721 unsigned vd = INSTR (4, 0);
2724 NYI_assert (29, 21, 0x070);
2725 NYI_assert (15, 10, 0x01);
2727 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2730 index = INSTR (20, 17);
2732 for (i = 0; i < (full ? 16 : 8); i++)
2733 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2735 else if (INSTR (17, 17))
2737 index = INSTR (20, 18);
2739 for (i = 0; i < (full ? 8 : 4); i++)
2740 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2742 else if (INSTR (18, 18))
2744 index = INSTR (20, 19);
2746 for (i = 0; i < (full ? 4 : 2); i++)
2747 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2751 if (INSTR (19, 19) == 0)
2757 index = INSTR (20, 20);
2759 for (i = 0; i < 2; i++)
2760 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2765 do_vec_TBL (sim_cpu *cpu)
2768 instr[30] = half(0)/full(1)
2769 instr[29,21] = 00 1110 000
2772 instr[14,13] = vec length
2774 instr[9,5] = V start
2775 instr[4,0] = V dest */
2777 int full = INSTR (30, 30);
2778 int len = INSTR (14, 13) + 1;
2779 unsigned vm = INSTR (20, 16);
2780 unsigned vn = INSTR (9, 5);
2781 unsigned vd = INSTR (4, 0);
2784 NYI_assert (29, 21, 0x070);
2785 NYI_assert (12, 10, 0);
2787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 for (i = 0; i < (full ? 16 : 8); i++)
2790 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2794 val = aarch64_get_vec_u8 (cpu, vn, selector);
2795 else if (selector < 32)
2796 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2797 else if (selector < 48)
2798 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2799 else if (selector < 64)
2800 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2804 aarch64_set_vec_u8 (cpu, vd, i, val);
2809 do_vec_TRN (sim_cpu *cpu)
2812 instr[30] = half(0)/full(1)
2813 instr[29,24] = 00 1110
2818 instr[14] = TRN1 (0) / TRN2 (1)
2820 instr[9,5] = V source
2821 instr[4,0] = V dest. */
2823 int full = INSTR (30, 30);
2824 int second = INSTR (14, 14);
2825 unsigned vm = INSTR (20, 16);
2826 unsigned vn = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2830 NYI_assert (29, 24, 0x0E);
2831 NYI_assert (13, 10, 0xA);
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834 switch (INSTR (23, 22))
2837 for (i = 0; i < (full ? 8 : 4); i++)
2841 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2843 (cpu, vd, 1 * 2 + 1,
2844 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2849 for (i = 0; i < (full ? 4 : 2); i++)
2853 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2855 (cpu, vd, 1 * 2 + 1,
2856 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2862 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2864 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2866 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2868 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2875 aarch64_set_vec_u64 (cpu, vd, 0,
2876 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2877 aarch64_set_vec_u64 (cpu, vd, 1,
2878 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2884 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2887 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2888 [must be 1 for 64-bit xfer]
2889 instr[29,20] = 00 1110 0000
2890 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2891 0100=> 32-bits. 1000=>64-bits
2892 instr[15,10] = 0000 11
2893 instr[9,5] = W source
2894 instr[4,0] = V dest. */
2897 unsigned Vd = INSTR (4, 0);
2898 unsigned Rs = INSTR (9, 5);
2899 int both = INSTR (30, 30);
2901 NYI_assert (29, 20, 0x0E0);
2902 NYI_assert (15, 10, 0x03);
2904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2905 switch (INSTR (19, 16))
2908 for (i = 0; i < (both ? 16 : 8); i++)
2909 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2913 for (i = 0; i < (both ? 8 : 4); i++)
2914 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2918 for (i = 0; i < (both ? 4 : 2); i++)
2919 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2925 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2926 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2935 do_vec_UZP (sim_cpu *cpu)
2938 instr[30] = half(0)/full(1)
2939 instr[29,24] = 00 1110
2940 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2944 instr[14] = lower (0) / upper (1)
2949 int full = INSTR (30, 30);
2950 int upper = INSTR (14, 14);
2952 unsigned vm = INSTR (20, 16);
2953 unsigned vn = INSTR (9, 5);
2954 unsigned vd = INSTR (4, 0);
2956 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2957 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2958 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2959 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2964 uint64_t input1 = upper ? val_n1 : val_m1;
2965 uint64_t input2 = upper ? val_n2 : val_m2;
2968 NYI_assert (29, 24, 0x0E);
2969 NYI_assert (21, 21, 0);
2970 NYI_assert (15, 15, 0);
2971 NYI_assert (13, 10, 6);
2973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2974 switch (INSTR (23, 23))
2977 for (i = 0; i < 8; i++)
2979 val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
2980 val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
2985 for (i = 0; i < 4; i++)
2987 val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
2988 val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
2993 val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
2994 val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
3002 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3004 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3008 do_vec_ZIP (sim_cpu *cpu)
3011 instr[30] = half(0)/full(1)
3012 instr[29,24] = 00 1110
3013 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3017 instr[14] = lower (0) / upper (1)
3022 int full = INSTR (30, 30);
3023 int upper = INSTR (14, 14);
3025 unsigned vm = INSTR (20, 16);
3026 unsigned vn = INSTR (9, 5);
3027 unsigned vd = INSTR (4, 0);
3029 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3030 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3031 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3032 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3037 uint64_t input1 = upper ? val_n1 : val_m1;
3038 uint64_t input2 = upper ? val_n2 : val_m2;
3040 NYI_assert (29, 24, 0x0E);
3041 NYI_assert (21, 21, 0);
3042 NYI_assert (15, 15, 0);
3043 NYI_assert (13, 10, 0xE);
3045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3046 switch (INSTR (23, 23))
3050 ((input1 << 0) & (0xFF << 0))
3051 | ((input2 << 8) & (0xFF << 8))
3052 | ((input1 << 8) & (0xFF << 16))
3053 | ((input2 << 16) & (0xFF << 24))
3054 | ((input1 << 16) & (0xFFULL << 32))
3055 | ((input2 << 24) & (0xFFULL << 40))
3056 | ((input1 << 24) & (0xFFULL << 48))
3057 | ((input2 << 32) & (0xFFULL << 56));
3060 ((input1 >> 32) & (0xFF << 0))
3061 | ((input2 >> 24) & (0xFF << 8))
3062 | ((input1 >> 24) & (0xFF << 16))
3063 | ((input2 >> 16) & (0xFF << 24))
3064 | ((input1 >> 16) & (0xFFULL << 32))
3065 | ((input2 >> 8) & (0xFFULL << 40))
3066 | ((input1 >> 8) & (0xFFULL << 48))
3067 | ((input2 >> 0) & (0xFFULL << 56));
3072 ((input1 << 0) & (0xFFFF << 0))
3073 | ((input2 << 16) & (0xFFFF << 16))
3074 | ((input1 << 16) & (0xFFFFULL << 32))
3075 | ((input2 << 32) & (0xFFFFULL << 48));
3078 ((input1 >> 32) & (0xFFFF << 0))
3079 | ((input2 >> 16) & (0xFFFF << 16))
3080 | ((input1 >> 16) & (0xFFFFULL << 32))
3081 | ((input2 >> 0) & (0xFFFFULL << 48));
3085 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3086 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3095 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3097 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3100 /* Floating point immediates are encoded in 8 bits.
3101 fpimm[7] = sign bit.
3102 fpimm[6:4] = signed exponent.
3103 fpimm[3:0] = fraction (assuming leading 1).
3104 i.e. F = s * 1.f * 2^(e - b). */
3107 fp_immediate_for_encoding_32 (uint32_t imm8)
3110 uint32_t s, e, f, i;
3112 s = (imm8 >> 7) & 0x1;
3113 e = (imm8 >> 4) & 0x7;
3116 /* The fp value is s * n/16 * 2r where n is 16+e. */
3117 u = (16.0 + f) / 16.0;
3119 /* N.B. exponent is signed. */
3124 for (i = 0; i <= epos; i++)
3131 for (i = 0; i < eneg; i++)
3142 fp_immediate_for_encoding_64 (uint32_t imm8)
3145 uint32_t s, e, f, i;
3147 s = (imm8 >> 7) & 0x1;
3148 e = (imm8 >> 4) & 0x7;
3151 /* The fp value is s * n/16 * 2r where n is 16+e. */
3152 u = (16.0 + f) / 16.0;
3154 /* N.B. exponent is signed. */
3159 for (i = 0; i <= epos; i++)
3166 for (i = 0; i < eneg; i++)
3177 do_vec_MOV_immediate (sim_cpu *cpu)
3180 instr[30] = full/half selector
3181 instr[29,19] = 00111100000
3182 instr[18,16] = high 3 bits of uimm8
3183 instr[15,12] = size & shift:
3185 0010 => 32-bit + LSL#8
3186 0100 => 32-bit + LSL#16
3187 0110 => 32-bit + LSL#24
3188 1010 => 16-bit + LSL#8
3190 1101 => 32-bit + MSL#16
3191 1100 => 32-bit + MSL#8
3195 instr[9,5] = low 5-bits of uimm8
3198 int full = INSTR (30, 30);
3199 unsigned vd = INSTR (4, 0);
3200 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3203 NYI_assert (29, 19, 0x1E0);
3204 NYI_assert (11, 10, 1);
3206 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3207 switch (INSTR (15, 12))
3209 case 0x0: /* 32-bit, no shift. */
3210 case 0x2: /* 32-bit, shift by 8. */
3211 case 0x4: /* 32-bit, shift by 16. */
3212 case 0x6: /* 32-bit, shift by 24. */
3213 val <<= (8 * INSTR (14, 13));
3214 for (i = 0; i < (full ? 4 : 2); i++)
3215 aarch64_set_vec_u32 (cpu, vd, i, val);
3218 case 0xa: /* 16-bit, shift by 8. */
3221 case 0x8: /* 16-bit, no shift. */
3222 for (i = 0; i < (full ? 8 : 4); i++)
3223 aarch64_set_vec_u16 (cpu, vd, i, val);
3226 case 0xd: /* 32-bit, mask shift by 16. */
3230 case 0xc: /* 32-bit, mask shift by 8. */
3233 for (i = 0; i < (full ? 4 : 2); i++)
3234 aarch64_set_vec_u32 (cpu, vd, i, val);
3237 case 0xe: /* 8-bit, no shift. */
3238 for (i = 0; i < (full ? 16 : 8); i++)
3239 aarch64_set_vec_u8 (cpu, vd, i, val);
3242 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3244 float u = fp_immediate_for_encoding_32 (val);
3245 for (i = 0; i < (full ? 4 : 2); i++)
3246 aarch64_set_vec_float (cpu, vd, i, u);
3256 do_vec_MVNI (sim_cpu *cpu)
3259 instr[30] = full/half selector
3260 instr[29,19] = 10111100000
3261 instr[18,16] = high 3 bits of uimm8
3262 instr[15,12] = selector
3264 instr[9,5] = low 5-bits of uimm8
3267 int full = INSTR (30, 30);
3268 unsigned vd = INSTR (4, 0);
3269 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3272 NYI_assert (29, 19, 0x5E0);
3273 NYI_assert (11, 10, 1);
3275 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3276 switch (INSTR (15, 12))
3278 case 0x0: /* 32-bit, no shift. */
3279 case 0x2: /* 32-bit, shift by 8. */
3280 case 0x4: /* 32-bit, shift by 16. */
3281 case 0x6: /* 32-bit, shift by 24. */
3282 val <<= (8 * INSTR (14, 13));
3284 for (i = 0; i < (full ? 4 : 2); i++)
3285 aarch64_set_vec_u32 (cpu, vd, i, val);
3288 case 0xa: /* 16-bit, 8 bit shift. */
3290 case 0x8: /* 16-bit, no shift. */
3292 for (i = 0; i < (full ? 8 : 4); i++)
3293 aarch64_set_vec_u16 (cpu, vd, i, val);
3296 case 0xd: /* 32-bit, mask shift by 16. */
3299 case 0xc: /* 32-bit, mask shift by 8. */
3303 for (i = 0; i < (full ? 4 : 2); i++)
3304 aarch64_set_vec_u32 (cpu, vd, i, val);
3307 case 0xE: /* MOVI Dn, #mask64 */
3311 for (i = 0; i < 8; i++)
3313 mask |= (0xFFUL << (i * 8));
3314 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3315 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3319 case 0xf: /* FMOV Vd.2D, #fpimm. */
3321 double u = fp_immediate_for_encoding_64 (val);
3326 aarch64_set_vec_double (cpu, vd, 0, u);
3327 aarch64_set_vec_double (cpu, vd, 1, u);
3336 #define ABS(A) ((A) < 0 ? - (A) : (A))
3339 do_vec_ABS (sim_cpu *cpu)
3342 instr[30] = half(0)/full(1)
3343 instr[29,24] = 00 1110
3344 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3345 instr[21,10] = 10 0000 1011 10
3349 unsigned vn = INSTR (9, 5);
3350 unsigned vd = INSTR (4, 0);
3351 unsigned full = INSTR (30, 30);
3354 NYI_assert (29, 24, 0x0E);
3355 NYI_assert (21, 10, 0x82E);
3357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3358 switch (INSTR (23, 22))
3361 for (i = 0; i < (full ? 16 : 8); i++)
3362 aarch64_set_vec_s8 (cpu, vd, i,
3363 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3367 for (i = 0; i < (full ? 8 : 4); i++)
3368 aarch64_set_vec_s16 (cpu, vd, i,
3369 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3373 for (i = 0; i < (full ? 4 : 2); i++)
3374 aarch64_set_vec_s32 (cpu, vd, i,
3375 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3381 for (i = 0; i < 2; i++)
3382 aarch64_set_vec_s64 (cpu, vd, i,
3383 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3389 do_vec_ADDV (sim_cpu *cpu)
3392 instr[30] = full/half selector
3393 instr[29,24] = 00 1110
3394 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3395 instr[21,10] = 11 0001 1011 10
3399 unsigned vm = INSTR (9, 5);
3400 unsigned rd = INSTR (4, 0);
3403 int full = INSTR (30, 30);
3405 NYI_assert (29, 24, 0x0E);
3406 NYI_assert (21, 10, 0xC6E);
3408 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3409 switch (INSTR (23, 22))
3412 for (i = 0; i < (full ? 16 : 8); i++)
3413 val += aarch64_get_vec_u8 (cpu, vm, i);
3414 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3418 for (i = 0; i < (full ? 8 : 4); i++)
3419 val += aarch64_get_vec_u16 (cpu, vm, i);
3420 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3424 for (i = 0; i < (full ? 4 : 2); i++)
3425 val += aarch64_get_vec_u32 (cpu, vm, i);
3426 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3432 val = aarch64_get_vec_u64 (cpu, vm, 0);
3433 val += aarch64_get_vec_u64 (cpu, vm, 1);
3434 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3440 do_vec_ins_2 (sim_cpu *cpu)
3442 /* instr[31,21] = 01001110000
3443 instr[20,18] = size & element selector
3445 instr[13] = direction: to vec(0), from vec (1)
3451 unsigned vm = INSTR (9, 5);
3452 unsigned vd = INSTR (4, 0);
3454 NYI_assert (31, 21, 0x270);
3455 NYI_assert (17, 14, 0);
3456 NYI_assert (12, 10, 7);
3458 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3459 if (INSTR (13, 13) == 1)
3461 if (INSTR (18, 18) == 1)
3464 elem = INSTR (20, 19);
3465 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3466 aarch64_get_vec_u32 (cpu, vm, elem));
3471 if (INSTR (19, 19) != 1)
3474 elem = INSTR (20, 20);
3475 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3476 aarch64_get_vec_u64 (cpu, vm, elem));
3481 if (INSTR (18, 18) == 1)
3484 elem = INSTR (20, 19);
3485 aarch64_set_vec_u32 (cpu, vd, elem,
3486 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3491 if (INSTR (19, 19) != 1)
3494 elem = INSTR (20, 20);
3495 aarch64_set_vec_u64 (cpu, vd, elem,
3496 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3501 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3504 DST_TYPE a[N], b[N]; \
3506 for (i = 0; i < (N); i++) \
3508 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3509 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3511 for (i = 0; i < (N); i++) \
3512 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3517 do_vec_mull (sim_cpu *cpu)
3520 instr[30] = lower(0)/upper(1) selector
3521 instr[29] = signed(0)/unsigned(1)
3522 instr[28,24] = 0 1110
3523 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3526 instr[15,10] = 11 0000
3530 int unsign = INSTR (29, 29);
3531 int bias = INSTR (30, 30);
3532 unsigned vm = INSTR (20, 16);
3533 unsigned vn = INSTR ( 9, 5);
3534 unsigned vd = INSTR ( 4, 0);
3537 NYI_assert (28, 24, 0x0E);
3538 NYI_assert (15, 10, 0x30);
3540 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3541 /* NB: Read source values before writing results, in case
3542 the source and destination vectors are the same. */
3543 switch (INSTR (23, 22))
3549 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3551 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3558 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3560 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3567 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3569 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3578 do_vec_fadd (sim_cpu *cpu)
3581 instr[30] = half(0)/full(1)
3582 instr[29,24] = 001110
3583 instr[23] = FADD(0)/FSUB(1)
3584 instr[22] = float (0)/double(1)
3587 instr[15,10] = 110101
3591 unsigned vm = INSTR (20, 16);
3592 unsigned vn = INSTR (9, 5);
3593 unsigned vd = INSTR (4, 0);
3595 int full = INSTR (30, 30);
3597 NYI_assert (29, 24, 0x0E);
3598 NYI_assert (21, 21, 1);
3599 NYI_assert (15, 10, 0x35);
3601 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3609 for (i = 0; i < 2; i++)
3610 aarch64_set_vec_double (cpu, vd, i,
3611 aarch64_get_vec_double (cpu, vn, i)
3612 - aarch64_get_vec_double (cpu, vm, i));
3616 for (i = 0; i < (full ? 4 : 2); i++)
3617 aarch64_set_vec_float (cpu, vd, i,
3618 aarch64_get_vec_float (cpu, vn, i)
3619 - aarch64_get_vec_float (cpu, vm, i));
3629 for (i = 0; i < 2; i++)
3630 aarch64_set_vec_double (cpu, vd, i,
3631 aarch64_get_vec_double (cpu, vm, i)
3632 + aarch64_get_vec_double (cpu, vn, i));
3636 for (i = 0; i < (full ? 4 : 2); i++)
3637 aarch64_set_vec_float (cpu, vd, i,
3638 aarch64_get_vec_float (cpu, vm, i)
3639 + aarch64_get_vec_float (cpu, vn, i));
3645 do_vec_add (sim_cpu *cpu)
3648 instr[30] = full/half selector
3649 instr[29,24] = 001110
3650 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3653 instr[15,10] = 100001
3657 unsigned vm = INSTR (20, 16);
3658 unsigned vn = INSTR (9, 5);
3659 unsigned vd = INSTR (4, 0);
3661 int full = INSTR (30, 30);
3663 NYI_assert (29, 24, 0x0E);
3664 NYI_assert (21, 21, 1);
3665 NYI_assert (15, 10, 0x21);
3667 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3668 switch (INSTR (23, 22))
3671 for (i = 0; i < (full ? 16 : 8); i++)
3672 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3673 + aarch64_get_vec_u8 (cpu, vm, i));
3677 for (i = 0; i < (full ? 8 : 4); i++)
3678 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3679 + aarch64_get_vec_u16 (cpu, vm, i));
3683 for (i = 0; i < (full ? 4 : 2); i++)
3684 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3685 + aarch64_get_vec_u32 (cpu, vm, i));
3691 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3692 + aarch64_get_vec_u64 (cpu, vm, 0));
3693 aarch64_set_vec_u64 (cpu, vd, 1,
3694 aarch64_get_vec_u64 (cpu, vn, 1)
3695 + aarch64_get_vec_u64 (cpu, vm, 1));
3701 do_vec_mul (sim_cpu *cpu)
3704 instr[30] = full/half selector
3705 instr[29,24] = 00 1110
3706 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3709 instr[15,10] = 10 0111
3713 unsigned vm = INSTR (20, 16);
3714 unsigned vn = INSTR (9, 5);
3715 unsigned vd = INSTR (4, 0);
3717 int full = INSTR (30, 30);
3720 NYI_assert (29, 24, 0x0E);
3721 NYI_assert (21, 21, 1);
3722 NYI_assert (15, 10, 0x27);
3724 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3725 switch (INSTR (23, 22))
3728 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3732 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3736 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3745 do_vec_MLA (sim_cpu *cpu)
3748 instr[30] = full/half selector
3749 instr[29,24] = 00 1110
3750 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3753 instr[15,10] = 1001 01
3757 unsigned vm = INSTR (20, 16);
3758 unsigned vn = INSTR (9, 5);
3759 unsigned vd = INSTR (4, 0);
3761 int full = INSTR (30, 30);
3763 NYI_assert (29, 24, 0x0E);
3764 NYI_assert (21, 21, 1);
3765 NYI_assert (15, 10, 0x25);
3767 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3768 switch (INSTR (23, 22))
3772 uint16_t a[16], b[16];
3774 for (i = 0; i < (full ? 16 : 8); i++)
3776 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3777 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3780 for (i = 0; i < (full ? 16 : 8); i++)
3782 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3784 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3791 uint32_t a[8], b[8];
3793 for (i = 0; i < (full ? 8 : 4); i++)
3795 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3796 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3799 for (i = 0; i < (full ? 8 : 4); i++)
3801 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3803 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3810 uint64_t a[4], b[4];
3812 for (i = 0; i < (full ? 4 : 2); i++)
3814 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3815 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3818 for (i = 0; i < (full ? 4 : 2); i++)
3820 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3822 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3833 fmaxnm (float a, float b)
3838 return a > b ? a : b;
3841 else if (! isnan (b))
3847 fminnm (float a, float b)
3852 return a < b ? a : b;
3855 else if (! isnan (b))
3861 dmaxnm (double a, double b)
3866 return a > b ? a : b;
3869 else if (! isnan (b))
3875 dminnm (double a, double b)
3880 return a < b ? a : b;
3883 else if (! isnan (b))
3889 do_vec_FminmaxNMP (sim_cpu *cpu)
3892 instr [30] = half (0)/full (1)
3893 instr [29,24] = 10 1110
3894 instr [23] = max(0)/min(1)
3895 instr [22] = float (0)/double (1)
3898 instr [15,10] = 1100 01
3900 instr [4.0] = Vd. */
3902 unsigned vm = INSTR (20, 16);
3903 unsigned vn = INSTR (9, 5);
3904 unsigned vd = INSTR (4, 0);
3905 int full = INSTR (30, 30);
3907 NYI_assert (29, 24, 0x2E);
3908 NYI_assert (21, 21, 1);
3909 NYI_assert (15, 10, 0x31);
3911 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3914 double (* fn)(double, double) = INSTR (23, 23)
3919 aarch64_set_vec_double (cpu, vd, 0,
3920 fn (aarch64_get_vec_double (cpu, vn, 0),
3921 aarch64_get_vec_double (cpu, vn, 1)));
3922 aarch64_set_vec_double (cpu, vd, 0,
3923 fn (aarch64_get_vec_double (cpu, vm, 0),
3924 aarch64_get_vec_double (cpu, vm, 1)));
3928 float (* fn)(float, float) = INSTR (23, 23)
3931 aarch64_set_vec_float (cpu, vd, 0,
3932 fn (aarch64_get_vec_float (cpu, vn, 0),
3933 aarch64_get_vec_float (cpu, vn, 1)));
3935 aarch64_set_vec_float (cpu, vd, 1,
3936 fn (aarch64_get_vec_float (cpu, vn, 2),
3937 aarch64_get_vec_float (cpu, vn, 3)));
3939 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3940 fn (aarch64_get_vec_float (cpu, vm, 0),
3941 aarch64_get_vec_float (cpu, vm, 1)));
3943 aarch64_set_vec_float (cpu, vd, 3,
3944 fn (aarch64_get_vec_float (cpu, vm, 2),
3945 aarch64_get_vec_float (cpu, vm, 3)));
3950 do_vec_AND (sim_cpu *cpu)
3953 instr[30] = half (0)/full (1)
3954 instr[29,21] = 001110001
3956 instr[15,10] = 000111
3960 unsigned vm = INSTR (20, 16);
3961 unsigned vn = INSTR (9, 5);
3962 unsigned vd = INSTR (4, 0);
3964 int full = INSTR (30, 30);
3966 NYI_assert (29, 21, 0x071);
3967 NYI_assert (15, 10, 0x07);
3969 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3970 for (i = 0; i < (full ? 4 : 2); i++)
3971 aarch64_set_vec_u32 (cpu, vd, i,
3972 aarch64_get_vec_u32 (cpu, vn, i)
3973 & aarch64_get_vec_u32 (cpu, vm, i));
3977 do_vec_BSL (sim_cpu *cpu)
3980 instr[30] = half (0)/full (1)
3981 instr[29,21] = 101110011
3983 instr[15,10] = 000111
3987 unsigned vm = INSTR (20, 16);
3988 unsigned vn = INSTR (9, 5);
3989 unsigned vd = INSTR (4, 0);
3991 int full = INSTR (30, 30);
3993 NYI_assert (29, 21, 0x173);
3994 NYI_assert (15, 10, 0x07);
3996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3997 for (i = 0; i < (full ? 16 : 8); i++)
3998 aarch64_set_vec_u8 (cpu, vd, i,
3999 ( aarch64_get_vec_u8 (cpu, vd, i)
4000 & aarch64_get_vec_u8 (cpu, vn, i))
4001 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4002 & aarch64_get_vec_u8 (cpu, vm, i)));
4006 do_vec_EOR (sim_cpu *cpu)
4009 instr[30] = half (0)/full (1)
4010 instr[29,21] = 10 1110 001
4012 instr[15,10] = 000111
4016 unsigned vm = INSTR (20, 16);
4017 unsigned vn = INSTR (9, 5);
4018 unsigned vd = INSTR (4, 0);
4020 int full = INSTR (30, 30);
4022 NYI_assert (29, 21, 0x171);
4023 NYI_assert (15, 10, 0x07);
4025 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4026 for (i = 0; i < (full ? 4 : 2); i++)
4027 aarch64_set_vec_u32 (cpu, vd, i,
4028 aarch64_get_vec_u32 (cpu, vn, i)
4029 ^ aarch64_get_vec_u32 (cpu, vm, i));
4033 do_vec_bit (sim_cpu *cpu)
4036 instr[30] = half (0)/full (1)
4037 instr[29,23] = 10 1110 1
4038 instr[22] = BIT (0) / BIF (1)
4041 instr[15,10] = 0001 11
4045 unsigned vm = INSTR (20, 16);
4046 unsigned vn = INSTR (9, 5);
4047 unsigned vd = INSTR (4, 0);
4048 unsigned full = INSTR (30, 30);
4049 unsigned test_false = INSTR (22, 22);
4052 NYI_assert (29, 23, 0x5D);
4053 NYI_assert (21, 21, 1);
4054 NYI_assert (15, 10, 0x07);
4056 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4059 for (i = 0; i < (full ? 16 : 8); i++)
4060 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
4061 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4065 for (i = 0; i < (full ? 16 : 8); i++)
4066 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
4067 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4072 do_vec_ORN (sim_cpu *cpu)
4075 instr[30] = half (0)/full (1)
4076 instr[29,21] = 00 1110 111
4078 instr[15,10] = 00 0111
4082 unsigned vm = INSTR (20, 16);
4083 unsigned vn = INSTR (9, 5);
4084 unsigned vd = INSTR (4, 0);
4086 int full = INSTR (30, 30);
4088 NYI_assert (29, 21, 0x077);
4089 NYI_assert (15, 10, 0x07);
4091 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4092 for (i = 0; i < (full ? 16 : 8); i++)
4093 aarch64_set_vec_u8 (cpu, vd, i,
4094 aarch64_get_vec_u8 (cpu, vn, i)
4095 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4099 do_vec_ORR (sim_cpu *cpu)
4102 instr[30] = half (0)/full (1)
4103 instr[29,21] = 00 1110 101
4105 instr[15,10] = 0001 11
4109 unsigned vm = INSTR (20, 16);
4110 unsigned vn = INSTR (9, 5);
4111 unsigned vd = INSTR (4, 0);
4113 int full = INSTR (30, 30);
4115 NYI_assert (29, 21, 0x075);
4116 NYI_assert (15, 10, 0x07);
4118 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4119 for (i = 0; i < (full ? 16 : 8); i++)
4120 aarch64_set_vec_u8 (cpu, vd, i,
4121 aarch64_get_vec_u8 (cpu, vn, i)
4122 | aarch64_get_vec_u8 (cpu, vm, i));
4126 do_vec_BIC (sim_cpu *cpu)
4129 instr[30] = half (0)/full (1)
4130 instr[29,21] = 00 1110 011
4132 instr[15,10] = 00 0111
4136 unsigned vm = INSTR (20, 16);
4137 unsigned vn = INSTR (9, 5);
4138 unsigned vd = INSTR (4, 0);
4140 int full = INSTR (30, 30);
4142 NYI_assert (29, 21, 0x073);
4143 NYI_assert (15, 10, 0x07);
4145 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4146 for (i = 0; i < (full ? 16 : 8); i++)
4147 aarch64_set_vec_u8 (cpu, vd, i,
4148 aarch64_get_vec_u8 (cpu, vn, i)
4149 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4153 do_vec_XTN (sim_cpu *cpu)
4156 instr[30] = first part (0)/ second part (1)
4157 instr[29,24] = 00 1110
4158 instr[23,22] = size: byte(00), half(01), word (10)
4159 instr[21,10] = 1000 0100 1010
4163 unsigned vs = INSTR (9, 5);
4164 unsigned vd = INSTR (4, 0);
4165 unsigned bias = INSTR (30, 30);
4168 NYI_assert (29, 24, 0x0E);
4169 NYI_assert (21, 10, 0x84A);
4171 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4172 switch (INSTR (23, 22))
4176 for (i = 0; i < 8; i++)
4177 aarch64_set_vec_u8 (cpu, vd, i + 8,
4178 aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4180 for (i = 0; i < 8; i++)
4181 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4186 for (i = 0; i < 4; i++)
4187 aarch64_set_vec_u16 (cpu, vd, i + 4,
4188 aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4190 for (i = 0; i < 4; i++)
4191 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4196 for (i = 0; i < 2; i++)
4197 aarch64_set_vec_u32 (cpu, vd, i + 4,
4198 aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4200 for (i = 0; i < 2; i++)
4201 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4207 do_vec_maxv (sim_cpu *cpu)
4210 instr[30] = half(0)/full(1)
4211 instr[29] = signed (0)/unsigned(1)
4212 instr[28,24] = 0 1110
4213 instr[23,22] = size: byte(00), half(01), word (10)
4215 instr[20,17] = 1 000
4216 instr[16] = max(0)/min(1)
4217 instr[15,10] = 1010 10
4218 instr[9,5] = V source
4219 instr[4.0] = R dest. */
4221 unsigned vs = INSTR (9, 5);
4222 unsigned rd = INSTR (4, 0);
4223 unsigned full = INSTR (30, 30);
4226 NYI_assert (28, 24, 0x0E);
4227 NYI_assert (21, 21, 1);
4228 NYI_assert (20, 17, 8);
4229 NYI_assert (15, 10, 0x2A);
4231 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4232 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4234 case 0: /* SMAXV. */
4237 switch (INSTR (23, 22))
4240 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4241 for (i = 1; i < (full ? 16 : 8); i++)
4242 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4245 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4246 for (i = 1; i < (full ? 8 : 4); i++)
4247 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4250 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4251 for (i = 1; i < (full ? 4 : 2); i++)
4252 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4257 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4261 case 1: /* SMINV. */
4264 switch (INSTR (23, 22))
4267 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4268 for (i = 1; i < (full ? 16 : 8); i++)
4269 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4272 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4273 for (i = 1; i < (full ? 8 : 4); i++)
4274 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4277 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4278 for (i = 1; i < (full ? 4 : 2); i++)
4279 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4285 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4289 case 2: /* UMAXV. */
4292 switch (INSTR (23, 22))
4295 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4296 for (i = 1; i < (full ? 16 : 8); i++)
4297 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4300 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4301 for (i = 1; i < (full ? 8 : 4); i++)
4302 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4305 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4306 for (i = 1; i < (full ? 4 : 2); i++)
4307 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4313 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4317 case 3: /* UMINV. */
4320 switch (INSTR (23, 22))
4323 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4324 for (i = 1; i < (full ? 16 : 8); i++)
4325 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4328 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4329 for (i = 1; i < (full ? 8 : 4); i++)
4330 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4333 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4334 for (i = 1; i < (full ? 4 : 2); i++)
4335 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4341 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4348 do_vec_fminmaxV (sim_cpu *cpu)
4350 /* instr[31,24] = 0110 1110
4351 instr[23] = max(0)/min(1)
4352 instr[22,14] = 011 0000 11
4353 instr[13,12] = nm(00)/normal(11)
4355 instr[9,5] = V source
4356 instr[4.0] = R dest. */
4358 unsigned vs = INSTR (9, 5);
4359 unsigned rd = INSTR (4, 0);
4361 float res = aarch64_get_vec_float (cpu, vs, 0);
4363 NYI_assert (31, 24, 0x6E);
4364 NYI_assert (22, 14, 0x0C3);
4365 NYI_assert (11, 10, 2);
4367 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4370 switch (INSTR (13, 12))
4372 case 0: /* FMNINNMV. */
4373 for (i = 1; i < 4; i++)
4374 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4377 case 3: /* FMINV. */
4378 for (i = 1; i < 4; i++)
4379 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4388 switch (INSTR (13, 12))
4390 case 0: /* FMNAXNMV. */
4391 for (i = 1; i < 4; i++)
4392 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4395 case 3: /* FMAXV. */
4396 for (i = 1; i < 4; i++)
4397 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4405 aarch64_set_FP_float (cpu, rd, res);
4409 do_vec_Fminmax (sim_cpu *cpu)
4412 instr[30] = half(0)/full(1)
4413 instr[29,24] = 00 1110
4414 instr[23] = max(0)/min(1)
4415 instr[22] = float(0)/double(1)
4419 instr[13,12] = nm(00)/normal(11)
4424 unsigned vm = INSTR (20, 16);
4425 unsigned vn = INSTR (9, 5);
4426 unsigned vd = INSTR (4, 0);
4427 unsigned full = INSTR (30, 30);
4428 unsigned min = INSTR (23, 23);
4431 NYI_assert (29, 24, 0x0E);
4432 NYI_assert (21, 21, 1);
4433 NYI_assert (15, 14, 3);
4434 NYI_assert (11, 10, 1);
4436 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4439 double (* func)(double, double);
4444 if (INSTR (13, 12) == 0)
4445 func = min ? dminnm : dmaxnm;
4446 else if (INSTR (13, 12) == 3)
4447 func = min ? fmin : fmax;
4451 for (i = 0; i < 2; i++)
4452 aarch64_set_vec_double (cpu, vd, i,
4453 func (aarch64_get_vec_double (cpu, vn, i),
4454 aarch64_get_vec_double (cpu, vm, i)));
4458 float (* func)(float, float);
4460 if (INSTR (13, 12) == 0)
4461 func = min ? fminnm : fmaxnm;
4462 else if (INSTR (13, 12) == 3)
4463 func = min ? fminf : fmaxf;
4467 for (i = 0; i < (full ? 4 : 2); i++)
4468 aarch64_set_vec_float (cpu, vd, i,
4469 func (aarch64_get_vec_float (cpu, vn, i),
4470 aarch64_get_vec_float (cpu, vm, i)));
4475 do_vec_SCVTF (sim_cpu *cpu)
4479 instr[29,23] = 00 1110 0
4480 instr[22] = float(0)/double(1)
4481 instr[21,10] = 10 0001 1101 10
4485 unsigned vn = INSTR (9, 5);
4486 unsigned vd = INSTR (4, 0);
4487 unsigned full = INSTR (30, 30);
4488 unsigned size = INSTR (22, 22);
4491 NYI_assert (29, 23, 0x1C);
4492 NYI_assert (21, 10, 0x876);
4494 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4500 for (i = 0; i < 2; i++)
4502 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4503 aarch64_set_vec_double (cpu, vd, i, val);
4508 for (i = 0; i < (full ? 4 : 2); i++)
4510 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4511 aarch64_set_vec_float (cpu, vd, i, val);
4516 #define VEC_CMP(SOURCE, CMP) \
4522 for (i = 0; i < (full ? 16 : 8); i++) \
4523 aarch64_set_vec_u8 (cpu, vd, i, \
4524 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4526 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4530 for (i = 0; i < (full ? 8 : 4); i++) \
4531 aarch64_set_vec_u16 (cpu, vd, i, \
4532 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4534 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4538 for (i = 0; i < (full ? 4 : 2); i++) \
4539 aarch64_set_vec_u32 (cpu, vd, i, \
4540 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4542 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4548 for (i = 0; i < 2; i++) \
4549 aarch64_set_vec_u64 (cpu, vd, i, \
4550 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4552 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4559 #define VEC_CMP0(SOURCE, CMP) \
4565 for (i = 0; i < (full ? 16 : 8); i++) \
4566 aarch64_set_vec_u8 (cpu, vd, i, \
4567 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4571 for (i = 0; i < (full ? 8 : 4); i++) \
4572 aarch64_set_vec_u16 (cpu, vd, i, \
4573 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4577 for (i = 0; i < (full ? 4 : 2); i++) \
4578 aarch64_set_vec_u32 (cpu, vd, i, \
4579 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4585 for (i = 0; i < 2; i++) \
4586 aarch64_set_vec_u64 (cpu, vd, i, \
4587 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4588 CMP 0 ? -1ULL : 0); \
4594 #define VEC_FCMP0(CMP) \
4599 if (INSTR (22, 22)) \
4603 for (i = 0; i < 2; i++) \
4604 aarch64_set_vec_u64 (cpu, vd, i, \
4605 aarch64_get_vec_double (cpu, vn, i) \
4606 CMP 0.0 ? -1 : 0); \
4610 for (i = 0; i < (full ? 4 : 2); i++) \
4611 aarch64_set_vec_u32 (cpu, vd, i, \
4612 aarch64_get_vec_float (cpu, vn, i) \
4613 CMP 0.0 ? -1 : 0); \
4619 #define VEC_FCMP(CMP) \
4622 if (INSTR (22, 22)) \
4626 for (i = 0; i < 2; i++) \
4627 aarch64_set_vec_u64 (cpu, vd, i, \
4628 aarch64_get_vec_double (cpu, vn, i) \
4630 aarch64_get_vec_double (cpu, vm, i) \
4635 for (i = 0; i < (full ? 4 : 2); i++) \
4636 aarch64_set_vec_u32 (cpu, vd, i, \
4637 aarch64_get_vec_float (cpu, vn, i) \
4639 aarch64_get_vec_float (cpu, vm, i) \
4647 do_vec_compare (sim_cpu *cpu)
4650 instr[30] = half(0)/full(1)
4651 instr[29] = part-of-comparison-type
4652 instr[28,24] = 0 1110
4653 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4654 type of float compares: single (-0) / double (-1)
4656 instr[20,16] = Vm or 00000 (compare vs 0)
4657 instr[15,10] = part-of-comparison-type
4661 int full = INSTR (30, 30);
4662 int size = INSTR (23, 22);
4663 unsigned vm = INSTR (20, 16);
4664 unsigned vn = INSTR (9, 5);
4665 unsigned vd = INSTR (4, 0);
4668 NYI_assert (28, 24, 0x0E);
4669 NYI_assert (21, 21, 1);
4671 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4674 || ((INSTR (11, 11) == 0
4675 && INSTR (10, 10) == 0)))
4677 /* A compare vs 0. */
4680 if (INSTR (15, 10) == 0x2A)
4682 else if (INSTR (15, 10) == 0x32
4683 || INSTR (15, 10) == 0x3E)
4684 do_vec_fminmaxV (cpu);
4685 else if (INSTR (29, 23) == 0x1C
4686 && INSTR (21, 10) == 0x876)
4696 /* A floating point compare. */
4697 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4700 NYI_assert (15, 15, 1);
4704 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4705 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4706 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4707 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4708 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4709 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4710 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4711 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4719 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4723 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4724 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4725 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4726 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4727 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4728 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4729 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4730 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4731 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4732 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4742 do_vec_SSHL (sim_cpu *cpu)
4745 instr[30] = first part (0)/ second part (1)
4746 instr[29,24] = 00 1110
4747 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4750 instr[15,10] = 0100 01
4754 unsigned full = INSTR (30, 30);
4755 unsigned vm = INSTR (20, 16);
4756 unsigned vn = INSTR (9, 5);
4757 unsigned vd = INSTR (4, 0);
4761 NYI_assert (29, 24, 0x0E);
4762 NYI_assert (21, 21, 1);
4763 NYI_assert (15, 10, 0x11);
4765 /* FIXME: What is a signed shift left in this context ?. */
4767 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4768 switch (INSTR (23, 22))
4771 for (i = 0; i < (full ? 16 : 8); i++)
4773 shift = aarch64_get_vec_s8 (cpu, vm, i);
4775 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4778 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4784 for (i = 0; i < (full ? 8 : 4); i++)
4786 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4788 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4791 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4797 for (i = 0; i < (full ? 4 : 2); i++)
4799 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4801 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4804 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4812 for (i = 0; i < 2; i++)
4814 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4816 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4819 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4827 do_vec_USHL (sim_cpu *cpu)
4830 instr[30] = first part (0)/ second part (1)
4831 instr[29,24] = 10 1110
4832 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4835 instr[15,10] = 0100 01
4839 unsigned full = INSTR (30, 30);
4840 unsigned vm = INSTR (20, 16);
4841 unsigned vn = INSTR (9, 5);
4842 unsigned vd = INSTR (4, 0);
4846 NYI_assert (29, 24, 0x2E);
4847 NYI_assert (15, 10, 0x11);
4849 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4850 switch (INSTR (23, 22))
4853 for (i = 0; i < (full ? 16 : 8); i++)
4855 shift = aarch64_get_vec_s8 (cpu, vm, i);
4857 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4860 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4866 for (i = 0; i < (full ? 8 : 4); i++)
4868 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4870 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4873 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4879 for (i = 0; i < (full ? 4 : 2); i++)
4881 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4883 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4886 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4894 for (i = 0; i < 2; i++)
4896 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4898 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4901 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4909 do_vec_FMLA (sim_cpu *cpu)
4912 instr[30] = full/half selector
4913 instr[29,23] = 0011100
4914 instr[22] = size: 0=>float, 1=>double
4917 instr[15,10] = 1100 11
4921 unsigned vm = INSTR (20, 16);
4922 unsigned vn = INSTR (9, 5);
4923 unsigned vd = INSTR (4, 0);
4925 int full = INSTR (30, 30);
4927 NYI_assert (29, 23, 0x1C);
4928 NYI_assert (21, 21, 1);
4929 NYI_assert (15, 10, 0x33);
4931 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4936 for (i = 0; i < 2; i++)
4937 aarch64_set_vec_double (cpu, vd, i,
4938 aarch64_get_vec_double (cpu, vn, i) *
4939 aarch64_get_vec_double (cpu, vm, i) +
4940 aarch64_get_vec_double (cpu, vd, i));
4944 for (i = 0; i < (full ? 4 : 2); i++)
4945 aarch64_set_vec_float (cpu, vd, i,
4946 aarch64_get_vec_float (cpu, vn, i) *
4947 aarch64_get_vec_float (cpu, vm, i) +
4948 aarch64_get_vec_float (cpu, vd, i));
4953 do_vec_max (sim_cpu *cpu)
4956 instr[30] = full/half selector
4957 instr[29] = SMAX (0) / UMAX (1)
4958 instr[28,24] = 0 1110
4959 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4962 instr[15,10] = 0110 01
4966 unsigned vm = INSTR (20, 16);
4967 unsigned vn = INSTR (9, 5);
4968 unsigned vd = INSTR (4, 0);
4970 int full = INSTR (30, 30);
4972 NYI_assert (28, 24, 0x0E);
4973 NYI_assert (21, 21, 1);
4974 NYI_assert (15, 10, 0x19);
4976 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4979 switch (INSTR (23, 22))
4982 for (i = 0; i < (full ? 16 : 8); i++)
4983 aarch64_set_vec_u8 (cpu, vd, i,
4984 aarch64_get_vec_u8 (cpu, vn, i)
4985 > aarch64_get_vec_u8 (cpu, vm, i)
4986 ? aarch64_get_vec_u8 (cpu, vn, i)
4987 : aarch64_get_vec_u8 (cpu, vm, i));
4991 for (i = 0; i < (full ? 8 : 4); i++)
4992 aarch64_set_vec_u16 (cpu, vd, i,
4993 aarch64_get_vec_u16 (cpu, vn, i)
4994 > aarch64_get_vec_u16 (cpu, vm, i)
4995 ? aarch64_get_vec_u16 (cpu, vn, i)
4996 : aarch64_get_vec_u16 (cpu, vm, i));
5000 for (i = 0; i < (full ? 4 : 2); i++)
5001 aarch64_set_vec_u32 (cpu, vd, i,
5002 aarch64_get_vec_u32 (cpu, vn, i)
5003 > aarch64_get_vec_u32 (cpu, vm, i)
5004 ? aarch64_get_vec_u32 (cpu, vn, i)
5005 : aarch64_get_vec_u32 (cpu, vm, i));
5014 switch (INSTR (23, 22))
5017 for (i = 0; i < (full ? 16 : 8); i++)
5018 aarch64_set_vec_s8 (cpu, vd, i,
5019 aarch64_get_vec_s8 (cpu, vn, i)
5020 > aarch64_get_vec_s8 (cpu, vm, i)
5021 ? aarch64_get_vec_s8 (cpu, vn, i)
5022 : aarch64_get_vec_s8 (cpu, vm, i));
5026 for (i = 0; i < (full ? 8 : 4); i++)
5027 aarch64_set_vec_s16 (cpu, vd, i,
5028 aarch64_get_vec_s16 (cpu, vn, i)
5029 > aarch64_get_vec_s16 (cpu, vm, i)
5030 ? aarch64_get_vec_s16 (cpu, vn, i)
5031 : aarch64_get_vec_s16 (cpu, vm, i));
5035 for (i = 0; i < (full ? 4 : 2); i++)
5036 aarch64_set_vec_s32 (cpu, vd, i,
5037 aarch64_get_vec_s32 (cpu, vn, i)
5038 > aarch64_get_vec_s32 (cpu, vm, i)
5039 ? aarch64_get_vec_s32 (cpu, vn, i)
5040 : aarch64_get_vec_s32 (cpu, vm, i));
5050 do_vec_min (sim_cpu *cpu)
5053 instr[30] = full/half selector
5054 instr[29] = SMIN (0) / UMIN (1)
5055 instr[28,24] = 0 1110
5056 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5059 instr[15,10] = 0110 11
5063 unsigned vm = INSTR (20, 16);
5064 unsigned vn = INSTR (9, 5);
5065 unsigned vd = INSTR (4, 0);
5067 int full = INSTR (30, 30);
5069 NYI_assert (28, 24, 0x0E);
5070 NYI_assert (21, 21, 1);
5071 NYI_assert (15, 10, 0x1B);
5073 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5076 switch (INSTR (23, 22))
5079 for (i = 0; i < (full ? 16 : 8); i++)
5080 aarch64_set_vec_u8 (cpu, vd, i,
5081 aarch64_get_vec_u8 (cpu, vn, i)
5082 < aarch64_get_vec_u8 (cpu, vm, i)
5083 ? aarch64_get_vec_u8 (cpu, vn, i)
5084 : aarch64_get_vec_u8 (cpu, vm, i));
5088 for (i = 0; i < (full ? 8 : 4); i++)
5089 aarch64_set_vec_u16 (cpu, vd, i,
5090 aarch64_get_vec_u16 (cpu, vn, i)
5091 < aarch64_get_vec_u16 (cpu, vm, i)
5092 ? aarch64_get_vec_u16 (cpu, vn, i)
5093 : aarch64_get_vec_u16 (cpu, vm, i));
5097 for (i = 0; i < (full ? 4 : 2); i++)
5098 aarch64_set_vec_u32 (cpu, vd, i,
5099 aarch64_get_vec_u32 (cpu, vn, i)
5100 < aarch64_get_vec_u32 (cpu, vm, i)
5101 ? aarch64_get_vec_u32 (cpu, vn, i)
5102 : aarch64_get_vec_u32 (cpu, vm, i));
5111 switch (INSTR (23, 22))
5114 for (i = 0; i < (full ? 16 : 8); i++)
5115 aarch64_set_vec_s8 (cpu, vd, i,
5116 aarch64_get_vec_s8 (cpu, vn, i)
5117 < aarch64_get_vec_s8 (cpu, vm, i)
5118 ? aarch64_get_vec_s8 (cpu, vn, i)
5119 : aarch64_get_vec_s8 (cpu, vm, i));
5123 for (i = 0; i < (full ? 8 : 4); i++)
5124 aarch64_set_vec_s16 (cpu, vd, i,
5125 aarch64_get_vec_s16 (cpu, vn, i)
5126 < aarch64_get_vec_s16 (cpu, vm, i)
5127 ? aarch64_get_vec_s16 (cpu, vn, i)
5128 : aarch64_get_vec_s16 (cpu, vm, i));
5132 for (i = 0; i < (full ? 4 : 2); i++)
5133 aarch64_set_vec_s32 (cpu, vd, i,
5134 aarch64_get_vec_s32 (cpu, vn, i)
5135 < aarch64_get_vec_s32 (cpu, vm, i)
5136 ? aarch64_get_vec_s32 (cpu, vn, i)
5137 : aarch64_get_vec_s32 (cpu, vm, i));
5147 do_vec_sub_long (sim_cpu *cpu)
5150 instr[30] = lower (0) / upper (1)
5151 instr[29] = signed (0) / unsigned (1)
5152 instr[28,24] = 0 1110
5153 instr[23,22] = size: bytes (00), half (01), word (10)
5156 instr[15,10] = 0010 00
5158 instr[4,0] = V dest. */
5160 unsigned size = INSTR (23, 22);
5161 unsigned vm = INSTR (20, 16);
5162 unsigned vn = INSTR (9, 5);
5163 unsigned vd = INSTR (4, 0);
5167 NYI_assert (28, 24, 0x0E);
5168 NYI_assert (21, 21, 1);
5169 NYI_assert (15, 10, 0x08);
5174 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5175 switch (INSTR (30, 29))
5177 case 2: /* SSUBL2. */
5179 case 0: /* SSUBL. */
5184 for (i = 0; i < 8; i++)
5185 aarch64_set_vec_s16 (cpu, vd, i,
5186 aarch64_get_vec_s8 (cpu, vn, i + bias)
5187 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5192 for (i = 0; i < 4; i++)
5193 aarch64_set_vec_s32 (cpu, vd, i,
5194 aarch64_get_vec_s16 (cpu, vn, i + bias)
5195 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5199 for (i = 0; i < 2; i++)
5200 aarch64_set_vec_s64 (cpu, vd, i,
5201 aarch64_get_vec_s32 (cpu, vn, i + bias)
5202 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5210 case 3: /* USUBL2. */
5212 case 1: /* USUBL. */
5217 for (i = 0; i < 8; i++)
5218 aarch64_set_vec_u16 (cpu, vd, i,
5219 aarch64_get_vec_u8 (cpu, vn, i + bias)
5220 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5225 for (i = 0; i < 4; i++)
5226 aarch64_set_vec_u32 (cpu, vd, i,
5227 aarch64_get_vec_u16 (cpu, vn, i + bias)
5228 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5232 for (i = 0; i < 2; i++)
5233 aarch64_set_vec_u64 (cpu, vd, i,
5234 aarch64_get_vec_u32 (cpu, vn, i + bias)
5235 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5246 do_vec_ADDP (sim_cpu *cpu)
5249 instr[30] = half(0)/full(1)
5250 instr[29,24] = 00 1110
5251 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5254 instr[15,10] = 1011 11
5256 instr[4,0] = V dest. */
5260 unsigned full = INSTR (30, 30);
5261 unsigned size = INSTR (23, 22);
5262 unsigned vm = INSTR (20, 16);
5263 unsigned vn = INSTR (9, 5);
5264 unsigned vd = INSTR (4, 0);
5267 NYI_assert (29, 24, 0x0E);
5268 NYI_assert (21, 21, 1);
5269 NYI_assert (15, 10, 0x2F);
5271 /* Make copies of the source registers in case vd == vn/vm. */
5272 copy_vn = cpu->fr[vn];
5273 copy_vm = cpu->fr[vm];
5275 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5279 range = full ? 8 : 4;
5280 for (i = 0; i < range; i++)
5282 aarch64_set_vec_u8 (cpu, vd, i,
5283 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5284 aarch64_set_vec_u8 (cpu, vd, i + range,
5285 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5290 range = full ? 4 : 2;
5291 for (i = 0; i < range; i++)
5293 aarch64_set_vec_u16 (cpu, vd, i,
5294 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5295 aarch64_set_vec_u16 (cpu, vd, i + range,
5296 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5301 range = full ? 2 : 1;
5302 for (i = 0; i < range; i++)
5304 aarch64_set_vec_u32 (cpu, vd, i,
5305 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5306 aarch64_set_vec_u32 (cpu, vd, i + range,
5307 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5314 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5315 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5321 do_vec_UMOV (sim_cpu *cpu)
5324 instr[30] = 32-bit(0)/64-bit(1)
5325 instr[29,21] = 00 1110 000
5326 insrt[20,16] = size & index
5327 instr[15,10] = 0011 11
5328 instr[9,5] = V source
5329 instr[4,0] = R dest. */
5331 unsigned vs = INSTR (9, 5);
5332 unsigned rd = INSTR (4, 0);
5335 NYI_assert (29, 21, 0x070);
5336 NYI_assert (15, 10, 0x0F);
5338 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5341 /* Byte transfer. */
5342 index = INSTR (20, 17);
5343 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5344 aarch64_get_vec_u8 (cpu, vs, index));
5346 else if (INSTR (17, 17))
5348 index = INSTR (20, 18);
5349 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5350 aarch64_get_vec_u16 (cpu, vs, index));
5352 else if (INSTR (18, 18))
5354 index = INSTR (20, 19);
5355 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5356 aarch64_get_vec_u32 (cpu, vs, index));
5360 if (INSTR (30, 30) != 1)
5363 index = INSTR (20, 20);
5364 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5365 aarch64_get_vec_u64 (cpu, vs, index));
5370 do_vec_FABS (sim_cpu *cpu)
5373 instr[30] = half(0)/full(1)
5374 instr[29,23] = 00 1110 1
5375 instr[22] = float(0)/double(1)
5376 instr[21,16] = 10 0000
5377 instr[15,10] = 1111 10
5381 unsigned vn = INSTR (9, 5);
5382 unsigned vd = INSTR (4, 0);
5383 unsigned full = INSTR (30, 30);
5386 NYI_assert (29, 23, 0x1D);
5387 NYI_assert (21, 10, 0x83E);
5389 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5395 for (i = 0; i < 2; i++)
5396 aarch64_set_vec_double (cpu, vd, i,
5397 fabs (aarch64_get_vec_double (cpu, vn, i)));
5401 for (i = 0; i < (full ? 4 : 2); i++)
5402 aarch64_set_vec_float (cpu, vd, i,
5403 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5408 do_vec_FCVTZS (sim_cpu *cpu)
5411 instr[30] = half (0) / all (1)
5412 instr[29,23] = 00 1110 1
5413 instr[22] = single (0) / double (1)
5414 instr[21,10] = 10 0001 1011 10
5418 unsigned rn = INSTR (9, 5);
5419 unsigned rd = INSTR (4, 0);
5420 unsigned full = INSTR (30, 30);
5423 NYI_assert (31, 31, 0);
5424 NYI_assert (29, 23, 0x1D);
5425 NYI_assert (21, 10, 0x86E);
5427 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5433 for (i = 0; i < 2; i++)
5434 aarch64_set_vec_s64 (cpu, rd, i,
5435 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5438 for (i = 0; i < (full ? 4 : 2); i++)
5439 aarch64_set_vec_s32 (cpu, rd, i,
5440 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5444 do_vec_REV64 (sim_cpu *cpu)
5447 instr[30] = full/half
5448 instr[29,24] = 00 1110
5450 instr[21,10] = 10 0000 0000 10
5454 unsigned rn = INSTR (9, 5);
5455 unsigned rd = INSTR (4, 0);
5456 unsigned size = INSTR (23, 22);
5457 unsigned full = INSTR (30, 30);
5461 NYI_assert (29, 24, 0x0E);
5462 NYI_assert (21, 10, 0x802);
5464 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5468 for (i = 0; i < (full ? 16 : 8); i++)
5469 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5473 for (i = 0; i < (full ? 8 : 4); i++)
5474 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5478 for (i = 0; i < (full ? 4 : 2); i++)
5479 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5486 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5488 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5492 do_vec_REV16 (sim_cpu *cpu)
5495 instr[30] = full/half
5496 instr[29,24] = 00 1110
5498 instr[21,10] = 10 0000 0001 10
5502 unsigned rn = INSTR (9, 5);
5503 unsigned rd = INSTR (4, 0);
5504 unsigned size = INSTR (23, 22);
5505 unsigned full = INSTR (30, 30);
5509 NYI_assert (29, 24, 0x0E);
5510 NYI_assert (21, 10, 0x806);
5512 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5516 for (i = 0; i < (full ? 16 : 8); i++)
5517 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5524 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5526 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5530 do_vec_op1 (sim_cpu *cpu)
5533 instr[30] = half/full
5534 instr[29,24] = 00 1110
5537 instr[15,10] = sub-opcode
5540 NYI_assert (29, 24, 0x0E);
5542 if (INSTR (21, 21) == 0)
5544 if (INSTR (23, 22) == 0)
5546 if (INSTR (30, 30) == 1
5547 && INSTR (17, 14) == 0
5548 && INSTR (12, 10) == 7)
5549 return do_vec_ins_2 (cpu);
5551 switch (INSTR (15, 10))
5553 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5554 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5555 case 0x07: do_vec_INS (cpu); return;
5556 case 0x0A: do_vec_TRN (cpu); return;
5559 if (INSTR (17, 16) == 0)
5561 do_vec_MOV_into_scalar (cpu);
5570 do_vec_TBL (cpu); return;
5574 do_vec_UZP (cpu); return;
5578 do_vec_ZIP (cpu); return;
5585 switch (INSTR (13, 10))
5587 case 0x6: do_vec_UZP (cpu); return;
5588 case 0xE: do_vec_ZIP (cpu); return;
5589 case 0xA: do_vec_TRN (cpu); return;
5590 case 0xF: do_vec_UMOV (cpu); return;
5595 switch (INSTR (15, 10))
5597 case 0x02: do_vec_REV64 (cpu); return;
5598 case 0x06: do_vec_REV16 (cpu); return;
5601 switch (INSTR (23, 21))
5603 case 1: do_vec_AND (cpu); return;
5604 case 3: do_vec_BIC (cpu); return;
5605 case 5: do_vec_ORR (cpu); return;
5606 case 7: do_vec_ORN (cpu); return;
5610 case 0x08: do_vec_sub_long (cpu); return;
5611 case 0x0a: do_vec_XTN (cpu); return;
5612 case 0x11: do_vec_SSHL (cpu); return;
5613 case 0x19: do_vec_max (cpu); return;
5614 case 0x1B: do_vec_min (cpu); return;
5615 case 0x21: do_vec_add (cpu); return;
5616 case 0x25: do_vec_MLA (cpu); return;
5617 case 0x27: do_vec_mul (cpu); return;
5618 case 0x2F: do_vec_ADDP (cpu); return;
5619 case 0x30: do_vec_mull (cpu); return;
5620 case 0x33: do_vec_FMLA (cpu); return;
5621 case 0x35: do_vec_fadd (cpu); return;
5624 switch (INSTR (20, 16))
5626 case 0x00: do_vec_ABS (cpu); return;
5627 case 0x01: do_vec_FCVTZS (cpu); return;
5628 case 0x11: do_vec_ADDV (cpu); return;
5634 do_vec_Fminmax (cpu); return;
5646 do_vec_compare (cpu); return;
5649 do_vec_FABS (cpu); return;
5657 do_vec_xtl (sim_cpu *cpu)
5660 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5661 instr[28,22] = 0 1111 00
5662 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5663 instr[15,10] = 1010 01
5664 instr[9,5] = V source
5665 instr[4,0] = V dest. */
5667 unsigned vs = INSTR (9, 5);
5668 unsigned vd = INSTR (4, 0);
5669 unsigned i, shift, bias = 0;
5671 NYI_assert (28, 22, 0x3C);
5672 NYI_assert (15, 10, 0x29);
5674 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5675 switch (INSTR (30, 29))
5677 case 2: /* SXTL2, SSHLL2. */
5679 case 0: /* SXTL, SSHLL. */
5684 shift = INSTR (20, 16);
5685 /* Get the source values before setting the destination values
5686 in case the source and destination are the same. */
5687 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5688 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5689 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5690 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5692 else if (INSTR (20, 20))
5695 int32_t v1,v2,v3,v4;
5697 shift = INSTR (19, 16);
5699 for (i = 0; i < 4; i++)
5700 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5701 for (i = 0; i < 4; i++)
5702 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5707 NYI_assert (19, 19, 1);
5709 shift = INSTR (18, 16);
5711 for (i = 0; i < 8; i++)
5712 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5713 for (i = 0; i < 8; i++)
5714 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5718 case 3: /* UXTL2, USHLL2. */
5720 case 1: /* UXTL, USHLL. */
5724 shift = INSTR (20, 16);
5725 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5726 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5727 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5728 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5730 else if (INSTR (20, 20))
5733 shift = INSTR (19, 16);
5735 for (i = 0; i < 4; i++)
5736 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5737 for (i = 0; i < 4; i++)
5738 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5743 NYI_assert (19, 19, 1);
5745 shift = INSTR (18, 16);
5747 for (i = 0; i < 8; i++)
5748 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5749 for (i = 0; i < 8; i++)
5750 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5757 do_vec_SHL (sim_cpu *cpu)
5760 instr [30] = half(0)/full(1)
5761 instr [29,23] = 001 1110
5762 instr [22,16] = size and shift amount
5763 instr [15,10] = 01 0101
5765 instr [4, 0] = Vd. */
5768 int full = INSTR (30, 30);
5769 unsigned vs = INSTR (9, 5);
5770 unsigned vd = INSTR (4, 0);
5773 NYI_assert (29, 23, 0x1E);
5774 NYI_assert (15, 10, 0x15);
5776 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5779 shift = INSTR (21, 16);
5784 for (i = 0; i < 2; i++)
5786 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5787 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5795 shift = INSTR (20, 16);
5797 for (i = 0; i < (full ? 4 : 2); i++)
5799 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5800 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5808 shift = INSTR (19, 16);
5810 for (i = 0; i < (full ? 8 : 4); i++)
5812 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5813 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5819 if (INSTR (19, 19) == 0)
5822 shift = INSTR (18, 16);
5824 for (i = 0; i < (full ? 16 : 8); i++)
5826 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5827 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5832 do_vec_SSHR_USHR (sim_cpu *cpu)
5835 instr [30] = half(0)/full(1)
5836 instr [29] = signed(0)/unsigned(1)
5837 instr [28,23] = 0 1111 0
5838 instr [22,16] = size and shift amount
5839 instr [15,10] = 0000 01
5841 instr [4, 0] = Vd. */
5843 int full = INSTR (30, 30);
5844 int sign = ! INSTR (29, 29);
5845 unsigned shift = INSTR (22, 16);
5846 unsigned vs = INSTR (9, 5);
5847 unsigned vd = INSTR (4, 0);
5850 NYI_assert (28, 23, 0x1E);
5851 NYI_assert (15, 10, 0x01);
5853 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5856 shift = 128 - shift;
5862 for (i = 0; i < 2; i++)
5864 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5865 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5868 for (i = 0; i < 2; i++)
5870 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5871 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5882 for (i = 0; i < (full ? 4 : 2); i++)
5884 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5885 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5888 for (i = 0; i < (full ? 4 : 2); i++)
5890 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5891 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5902 for (i = 0; i < (full ? 8 : 4); i++)
5904 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5905 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5908 for (i = 0; i < (full ? 8 : 4); i++)
5910 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5911 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5917 if (INSTR (19, 19) == 0)
5923 for (i = 0; i < (full ? 16 : 8); i++)
5925 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5926 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5929 for (i = 0; i < (full ? 16 : 8); i++)
5931 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5932 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5937 do_vec_MUL_by_element (sim_cpu *cpu)
5940 instr[30] = half/full
5941 instr[29,24] = 00 1111
5952 unsigned full = INSTR (30, 30);
5953 unsigned L = INSTR (21, 21);
5954 unsigned H = INSTR (11, 11);
5955 unsigned vn = INSTR (9, 5);
5956 unsigned vd = INSTR (4, 0);
5957 unsigned size = INSTR (23, 22);
5962 NYI_assert (29, 24, 0x0F);
5963 NYI_assert (15, 12, 0x8);
5964 NYI_assert (10, 10, 0);
5966 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5971 /* 16 bit products. */
5976 index = (H << 2) | (L << 1) | INSTR (20, 20);
5977 vm = INSTR (19, 16);
5978 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5980 for (e = 0; e < (full ? 8 : 4); e ++)
5982 element1 = aarch64_get_vec_u16 (cpu, vn, e);
5983 product = element1 * element2;
5984 aarch64_set_vec_u16 (cpu, vd, e, product);
5991 /* 32 bit products. */
5996 index = (H << 1) | L;
5997 vm = INSTR (20, 16);
5998 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6000 for (e = 0; e < (full ? 4 : 2); e ++)
6002 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6003 product = element1 * element2;
6004 aarch64_set_vec_u32 (cpu, vd, e, product);
6015 do_FMLA_by_element (sim_cpu *cpu)
6018 instr[30] = half/full
6019 instr[29,23] = 00 1111 1
6029 unsigned full = INSTR (30, 30);
6030 unsigned size = INSTR (22, 22);
6031 unsigned L = INSTR (21, 21);
6032 unsigned vm = INSTR (20, 16);
6033 unsigned H = INSTR (11, 11);
6034 unsigned vn = INSTR (9, 5);
6035 unsigned vd = INSTR (4, 0);
6038 NYI_assert (29, 23, 0x1F);
6039 NYI_assert (15, 12, 0x1);
6040 NYI_assert (10, 10, 0);
6042 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6045 double element1, element2;
6050 element2 = aarch64_get_vec_double (cpu, vm, H);
6052 for (e = 0; e < 2; e++)
6054 element1 = aarch64_get_vec_double (cpu, vn, e);
6055 element1 *= element2;
6056 element1 += aarch64_get_vec_double (cpu, vd, e);
6057 aarch64_set_vec_double (cpu, vd, e, element1);
6063 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6065 for (e = 0; e < (full ? 4 : 2); e++)
6067 element1 = aarch64_get_vec_float (cpu, vn, e);
6068 element1 *= element2;
6069 element1 += aarch64_get_vec_float (cpu, vd, e);
6070 aarch64_set_vec_float (cpu, vd, e, element1);
6076 do_vec_op2 (sim_cpu *cpu)
6079 instr[30] = half/full
6080 instr[29,24] = 00 1111
6082 instr[22,16] = element size & index
6083 instr[15,10] = sub-opcode
6087 NYI_assert (29, 24, 0x0F);
6089 if (INSTR (23, 23) != 0)
6091 switch (INSTR (15, 10))
6095 do_FMLA_by_element (cpu);
6100 do_vec_MUL_by_element (cpu);
6109 switch (INSTR (15, 10))
6111 case 0x01: do_vec_SSHR_USHR (cpu); return;
6112 case 0x15: do_vec_SHL (cpu); return;
6114 case 0x22: do_vec_MUL_by_element (cpu); return;
6115 case 0x29: do_vec_xtl (cpu); return;
6122 do_vec_neg (sim_cpu *cpu)
6125 instr[30] = full(1)/half(0)
6126 instr[29,24] = 10 1110
6127 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6128 instr[21,10] = 1000 0010 1110
6132 int full = INSTR (30, 30);
6133 unsigned vs = INSTR (9, 5);
6134 unsigned vd = INSTR (4, 0);
6137 NYI_assert (29, 24, 0x2E);
6138 NYI_assert (21, 10, 0x82E);
6140 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6141 switch (INSTR (23, 22))
6144 for (i = 0; i < (full ? 16 : 8); i++)
6145 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6149 for (i = 0; i < (full ? 8 : 4); i++)
6150 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6154 for (i = 0; i < (full ? 4 : 2); i++)
6155 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6161 for (i = 0; i < 2; i++)
6162 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6168 do_vec_sqrt (sim_cpu *cpu)
6171 instr[30] = full(1)/half(0)
6172 instr[29,23] = 101 1101
6173 instr[22] = single(0)/double(1)
6174 instr[21,10] = 1000 0111 1110
6178 int full = INSTR (30, 30);
6179 unsigned vs = INSTR (9, 5);
6180 unsigned vd = INSTR (4, 0);
6183 NYI_assert (29, 23, 0x5B);
6184 NYI_assert (21, 10, 0x87E);
6186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6187 if (INSTR (22, 22) == 0)
6188 for (i = 0; i < (full ? 4 : 2); i++)
6189 aarch64_set_vec_float (cpu, vd, i,
6190 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6192 for (i = 0; i < 2; i++)
6193 aarch64_set_vec_double (cpu, vd, i,
6194 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6198 do_vec_mls_indexed (sim_cpu *cpu)
6201 instr[30] = half(0)/full(1)
6202 instr[29,24] = 10 1111
6203 instr[23,22] = 16-bit(01)/32-bit(10)
6204 instr[21,20+11] = index (if 16-bit)
6205 instr[21+11] = index (if 32-bit)
6208 instr[11] = part of index
6213 int full = INSTR (30, 30);
6214 unsigned vs = INSTR (9, 5);
6215 unsigned vd = INSTR (4, 0);
6216 unsigned vm = INSTR (20, 16);
6219 NYI_assert (15, 12, 4);
6220 NYI_assert (10, 10, 0);
6222 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6223 switch (INSTR (23, 22))
6233 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6234 val = aarch64_get_vec_u16 (cpu, vm, elem);
6236 for (i = 0; i < (full ? 8 : 4); i++)
6237 aarch64_set_vec_u32 (cpu, vd, i,
6238 aarch64_get_vec_u32 (cpu, vd, i) -
6239 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6245 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6246 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6248 for (i = 0; i < (full ? 4 : 2); i++)
6249 aarch64_set_vec_u64 (cpu, vd, i,
6250 aarch64_get_vec_u64 (cpu, vd, i) -
6251 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6263 do_vec_SUB (sim_cpu *cpu)
6266 instr [30] = half(0)/full(1)
6267 instr [29,24] = 10 1110
6268 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6271 instr [15,10] = 10 0001
6273 instr [4, 0] = Vd. */
6275 unsigned full = INSTR (30, 30);
6276 unsigned vm = INSTR (20, 16);
6277 unsigned vn = INSTR (9, 5);
6278 unsigned vd = INSTR (4, 0);
6281 NYI_assert (29, 24, 0x2E);
6282 NYI_assert (21, 21, 1);
6283 NYI_assert (15, 10, 0x21);
6285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6286 switch (INSTR (23, 22))
6289 for (i = 0; i < (full ? 16 : 8); i++)
6290 aarch64_set_vec_s8 (cpu, vd, i,
6291 aarch64_get_vec_s8 (cpu, vn, i)
6292 - aarch64_get_vec_s8 (cpu, vm, i));
6296 for (i = 0; i < (full ? 8 : 4); i++)
6297 aarch64_set_vec_s16 (cpu, vd, i,
6298 aarch64_get_vec_s16 (cpu, vn, i)
6299 - aarch64_get_vec_s16 (cpu, vm, i));
6303 for (i = 0; i < (full ? 4 : 2); i++)
6304 aarch64_set_vec_s32 (cpu, vd, i,
6305 aarch64_get_vec_s32 (cpu, vn, i)
6306 - aarch64_get_vec_s32 (cpu, vm, i));
6313 for (i = 0; i < 2; i++)
6314 aarch64_set_vec_s64 (cpu, vd, i,
6315 aarch64_get_vec_s64 (cpu, vn, i)
6316 - aarch64_get_vec_s64 (cpu, vm, i));
6322 do_vec_MLS (sim_cpu *cpu)
6325 instr [30] = half(0)/full(1)
6326 instr [29,24] = 10 1110
6327 instr [23,22] = size: byte(00, half(01), word (10)
6330 instr [15,10] = 10 0101
6332 instr [4, 0] = Vd. */
6334 unsigned full = INSTR (30, 30);
6335 unsigned vm = INSTR (20, 16);
6336 unsigned vn = INSTR (9, 5);
6337 unsigned vd = INSTR (4, 0);
6340 NYI_assert (29, 24, 0x2E);
6341 NYI_assert (21, 21, 1);
6342 NYI_assert (15, 10, 0x25);
6344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6345 switch (INSTR (23, 22))
6348 for (i = 0; i < (full ? 16 : 8); i++)
6349 aarch64_set_vec_u8 (cpu, vd, i,
6350 aarch64_get_vec_u8 (cpu, vd, i)
6351 - (aarch64_get_vec_u8 (cpu, vn, i)
6352 * aarch64_get_vec_u8 (cpu, vm, i)));
6356 for (i = 0; i < (full ? 8 : 4); i++)
6357 aarch64_set_vec_u16 (cpu, vd, i,
6358 aarch64_get_vec_u16 (cpu, vd, i)
6359 - (aarch64_get_vec_u16 (cpu, vn, i)
6360 * aarch64_get_vec_u16 (cpu, vm, i)));
6364 for (i = 0; i < (full ? 4 : 2); i++)
6365 aarch64_set_vec_u32 (cpu, vd, i,
6366 aarch64_get_vec_u32 (cpu, vd, i)
6367 - (aarch64_get_vec_u32 (cpu, vn, i)
6368 * aarch64_get_vec_u32 (cpu, vm, i)));
6377 do_vec_FDIV (sim_cpu *cpu)
6380 instr [30] = half(0)/full(1)
6381 instr [29,23] = 10 1110 0
6382 instr [22] = float()/double(1)
6385 instr [15,10] = 1111 11
6387 instr [4, 0] = Vd. */
6389 unsigned full = INSTR (30, 30);
6390 unsigned vm = INSTR (20, 16);
6391 unsigned vn = INSTR (9, 5);
6392 unsigned vd = INSTR (4, 0);
6395 NYI_assert (29, 23, 0x5C);
6396 NYI_assert (21, 21, 1);
6397 NYI_assert (15, 10, 0x3F);
6399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6405 for (i = 0; i < 2; i++)
6406 aarch64_set_vec_double (cpu, vd, i,
6407 aarch64_get_vec_double (cpu, vn, i)
6408 / aarch64_get_vec_double (cpu, vm, i));
6411 for (i = 0; i < (full ? 4 : 2); i++)
6412 aarch64_set_vec_float (cpu, vd, i,
6413 aarch64_get_vec_float (cpu, vn, i)
6414 / aarch64_get_vec_float (cpu, vm, i));
6418 do_vec_FMUL (sim_cpu *cpu)
6421 instr [30] = half(0)/full(1)
6422 instr [29,23] = 10 1110 0
6423 instr [22] = float(0)/double(1)
6426 instr [15,10] = 1101 11
6428 instr [4, 0] = Vd. */
6430 unsigned full = INSTR (30, 30);
6431 unsigned vm = INSTR (20, 16);
6432 unsigned vn = INSTR (9, 5);
6433 unsigned vd = INSTR (4, 0);
6436 NYI_assert (29, 23, 0x5C);
6437 NYI_assert (21, 21, 1);
6438 NYI_assert (15, 10, 0x37);
6440 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6446 for (i = 0; i < 2; i++)
6447 aarch64_set_vec_double (cpu, vd, i,
6448 aarch64_get_vec_double (cpu, vn, i)
6449 * aarch64_get_vec_double (cpu, vm, i));
6452 for (i = 0; i < (full ? 4 : 2); i++)
6453 aarch64_set_vec_float (cpu, vd, i,
6454 aarch64_get_vec_float (cpu, vn, i)
6455 * aarch64_get_vec_float (cpu, vm, i));
6459 do_vec_FADDP (sim_cpu *cpu)
6462 instr [30] = half(0)/full(1)
6463 instr [29,23] = 10 1110 0
6464 instr [22] = float(0)/double(1)
6467 instr [15,10] = 1101 01
6469 instr [4, 0] = Vd. */
6471 unsigned full = INSTR (30, 30);
6472 unsigned vm = INSTR (20, 16);
6473 unsigned vn = INSTR (9, 5);
6474 unsigned vd = INSTR (4, 0);
6476 NYI_assert (29, 23, 0x5C);
6477 NYI_assert (21, 21, 1);
6478 NYI_assert (15, 10, 0x35);
6480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6483 /* Extract values before adding them incase vd == vn/vm. */
6484 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6485 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6486 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6487 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6492 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6493 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6497 /* Extract values before adding them incase vd == vn/vm. */
6498 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6499 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6500 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6501 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6505 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6506 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6507 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6508 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6510 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6511 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6512 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6513 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6517 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6518 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6524 do_vec_FSQRT (sim_cpu *cpu)
6527 instr[30] = half(0)/full(1)
6528 instr[29,23] = 10 1110 1
6529 instr[22] = single(0)/double(1)
6530 instr[21,10] = 10 0001 1111 10
6532 instr[4,0] = Vdest. */
6534 unsigned vn = INSTR (9, 5);
6535 unsigned vd = INSTR (4, 0);
6536 unsigned full = INSTR (30, 30);
6539 NYI_assert (29, 23, 0x5D);
6540 NYI_assert (21, 10, 0x87E);
6542 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6548 for (i = 0; i < 2; i++)
6549 aarch64_set_vec_double (cpu, vd, i,
6550 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6554 for (i = 0; i < (full ? 4 : 2); i++)
6555 aarch64_set_vec_float (cpu, vd, i,
6556 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6561 do_vec_FNEG (sim_cpu *cpu)
6564 instr[30] = half (0)/full (1)
6565 instr[29,23] = 10 1110 1
6566 instr[22] = single (0)/double (1)
6567 instr[21,10] = 10 0000 1111 10
6569 instr[4,0] = Vdest. */
6571 unsigned vn = INSTR (9, 5);
6572 unsigned vd = INSTR (4, 0);
6573 unsigned full = INSTR (30, 30);
6576 NYI_assert (29, 23, 0x5D);
6577 NYI_assert (21, 10, 0x83E);
6579 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6585 for (i = 0; i < 2; i++)
6586 aarch64_set_vec_double (cpu, vd, i,
6587 - aarch64_get_vec_double (cpu, vn, i));
6591 for (i = 0; i < (full ? 4 : 2); i++)
6592 aarch64_set_vec_float (cpu, vd, i,
6593 - aarch64_get_vec_float (cpu, vn, i));
6598 do_vec_NOT (sim_cpu *cpu)
6601 instr[30] = half (0)/full (1)
6602 instr[29,10] = 10 1110 0010 0000 0101 10
6606 unsigned vn = INSTR (9, 5);
6607 unsigned vd = INSTR (4, 0);
6609 int full = INSTR (30, 30);
6611 NYI_assert (29, 10, 0xB8816);
6613 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6614 for (i = 0; i < (full ? 16 : 8); i++)
6615 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6619 clz (uint64_t val, unsigned size)
6624 mask <<= (size - 1);
6639 do_vec_CLZ (sim_cpu *cpu)
6642 instr[30] = half (0)/full (1)
6643 instr[29,24] = 10 1110
6645 instr[21,10] = 10 0000 0100 10
6649 unsigned vn = INSTR (9, 5);
6650 unsigned vd = INSTR (4, 0);
6652 int full = INSTR (30,30);
6654 NYI_assert (29, 24, 0x2E);
6655 NYI_assert (21, 10, 0x812);
6657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6658 switch (INSTR (23, 22))
6661 for (i = 0; i < (full ? 16 : 8); i++)
6662 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6665 for (i = 0; i < (full ? 8 : 4); i++)
6666 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6669 for (i = 0; i < (full ? 4 : 2); i++)
6670 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6675 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6676 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6682 do_vec_MOV_element (sim_cpu *cpu)
6684 /* instr[31,21] = 0110 1110 000
6685 instr[20,16] = size & dest index
6687 instr[14,11] = source index
6692 unsigned vs = INSTR (9, 5);
6693 unsigned vd = INSTR (4, 0);
6697 NYI_assert (31, 21, 0x370);
6698 NYI_assert (15, 15, 0);
6699 NYI_assert (10, 10, 1);
6701 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6705 src_index = INSTR (14, 11);
6706 dst_index = INSTR (20, 17);
6707 aarch64_set_vec_u8 (cpu, vd, dst_index,
6708 aarch64_get_vec_u8 (cpu, vs, src_index));
6710 else if (INSTR (17, 17))
6713 NYI_assert (11, 11, 0);
6714 src_index = INSTR (14, 12);
6715 dst_index = INSTR (20, 18);
6716 aarch64_set_vec_u16 (cpu, vd, dst_index,
6717 aarch64_get_vec_u16 (cpu, vs, src_index));
6719 else if (INSTR (18, 18))
6722 NYI_assert (12, 11, 0);
6723 src_index = INSTR (14, 13);
6724 dst_index = INSTR (20, 19);
6725 aarch64_set_vec_u32 (cpu, vd, dst_index,
6726 aarch64_get_vec_u32 (cpu, vs, src_index));
6730 NYI_assert (19, 19, 1);
6731 NYI_assert (13, 11, 0);
6732 src_index = INSTR (14, 14);
6733 dst_index = INSTR (20, 20);
6734 aarch64_set_vec_u64 (cpu, vd, dst_index,
6735 aarch64_get_vec_u64 (cpu, vs, src_index));
6740 do_vec_REV32 (sim_cpu *cpu)
6743 instr[30] = full/half
6744 instr[29,24] = 10 1110
6746 instr[21,10] = 10 0000 0000 10
6750 unsigned rn = INSTR (9, 5);
6751 unsigned rd = INSTR (4, 0);
6752 unsigned size = INSTR (23, 22);
6753 unsigned full = INSTR (30, 30);
6757 NYI_assert (29, 24, 0x2E);
6758 NYI_assert (21, 10, 0x802);
6760 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6764 for (i = 0; i < (full ? 16 : 8); i++)
6765 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6769 for (i = 0; i < (full ? 8 : 4); i++)
6770 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6777 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6779 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6783 do_vec_EXT (sim_cpu *cpu)
6786 instr[30] = full/half
6787 instr[29,21] = 10 1110 000
6790 instr[14,11] = source index
6795 unsigned vm = INSTR (20, 16);
6796 unsigned vn = INSTR (9, 5);
6797 unsigned vd = INSTR (4, 0);
6798 unsigned src_index = INSTR (14, 11);
6799 unsigned full = INSTR (30, 30);
6804 NYI_assert (31, 21, 0x370);
6805 NYI_assert (15, 15, 0);
6806 NYI_assert (10, 10, 0);
6808 if (!full && (src_index & 0x8))
6813 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6814 for (i = src_index; i < (full ? 16 : 8); i++)
6815 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6816 for (i = 0; i < src_index; i++)
6817 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6819 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6821 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6825 dexAdvSIMD0 (sim_cpu *cpu)
6827 /* instr [28,25] = 0 111. */
6828 if ( INSTR (15, 10) == 0x07
6832 if (INSTR (31, 21) == 0x075
6833 || INSTR (31, 21) == 0x275)
6835 do_vec_MOV_whole_vector (cpu);
6840 if (INSTR (29, 19) == 0x1E0)
6842 do_vec_MOV_immediate (cpu);
6846 if (INSTR (29, 19) == 0x5E0)
6852 if (INSTR (29, 19) == 0x1C0
6853 || INSTR (29, 19) == 0x1C1)
6855 if (INSTR (15, 10) == 0x03)
6857 do_vec_DUP_scalar_into_vector (cpu);
6862 switch (INSTR (29, 24))
6864 case 0x0E: do_vec_op1 (cpu); return;
6865 case 0x0F: do_vec_op2 (cpu); return;
6868 if (INSTR (21, 21) == 1)
6870 switch (INSTR (15, 10))
6877 switch (INSTR (23, 22))
6879 case 0: do_vec_EOR (cpu); return;
6880 case 1: do_vec_BSL (cpu); return;
6882 case 3: do_vec_bit (cpu); return;
6886 case 0x08: do_vec_sub_long (cpu); return;
6887 case 0x11: do_vec_USHL (cpu); return;
6888 case 0x12: do_vec_CLZ (cpu); return;
6889 case 0x16: do_vec_NOT (cpu); return;
6890 case 0x19: do_vec_max (cpu); return;
6891 case 0x1B: do_vec_min (cpu); return;
6892 case 0x21: do_vec_SUB (cpu); return;
6893 case 0x25: do_vec_MLS (cpu); return;
6894 case 0x31: do_vec_FminmaxNMP (cpu); return;
6895 case 0x35: do_vec_FADDP (cpu); return;
6896 case 0x37: do_vec_FMUL (cpu); return;
6897 case 0x3F: do_vec_FDIV (cpu); return;
6900 switch (INSTR (20, 16))
6902 case 0x00: do_vec_FNEG (cpu); return;
6903 case 0x01: do_vec_FSQRT (cpu); return;
6917 do_vec_compare (cpu); return;
6924 if (INSTR (31, 21) == 0x370)
6927 do_vec_MOV_element (cpu);
6933 switch (INSTR (21, 10))
6935 case 0x82E: do_vec_neg (cpu); return;
6936 case 0x87E: do_vec_sqrt (cpu); return;
6938 if (INSTR (15, 10) == 0x30)
6948 switch (INSTR (15, 10))
6950 case 0x01: do_vec_SSHR_USHR (cpu); return;
6952 case 0x12: do_vec_mls_indexed (cpu); return;
6953 case 0x29: do_vec_xtl (cpu); return;
6967 /* Float multiply add. */
6969 fmadds (sim_cpu *cpu)
6971 unsigned sa = INSTR (14, 10);
6972 unsigned sm = INSTR (20, 16);
6973 unsigned sn = INSTR ( 9, 5);
6974 unsigned sd = INSTR ( 4, 0);
6976 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6977 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6978 + aarch64_get_FP_float (cpu, sn)
6979 * aarch64_get_FP_float (cpu, sm));
6982 /* Double multiply add. */
6984 fmaddd (sim_cpu *cpu)
6986 unsigned sa = INSTR (14, 10);
6987 unsigned sm = INSTR (20, 16);
6988 unsigned sn = INSTR ( 9, 5);
6989 unsigned sd = INSTR ( 4, 0);
6991 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6992 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6993 + aarch64_get_FP_double (cpu, sn)
6994 * aarch64_get_FP_double (cpu, sm));
6997 /* Float multiply subtract. */
6999 fmsubs (sim_cpu *cpu)
7001 unsigned sa = INSTR (14, 10);
7002 unsigned sm = INSTR (20, 16);
7003 unsigned sn = INSTR ( 9, 5);
7004 unsigned sd = INSTR ( 4, 0);
7006 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7007 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7008 - aarch64_get_FP_float (cpu, sn)
7009 * aarch64_get_FP_float (cpu, sm));
7012 /* Double multiply subtract. */
7014 fmsubd (sim_cpu *cpu)
7016 unsigned sa = INSTR (14, 10);
7017 unsigned sm = INSTR (20, 16);
7018 unsigned sn = INSTR ( 9, 5);
7019 unsigned sd = INSTR ( 4, 0);
7021 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7022 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7023 - aarch64_get_FP_double (cpu, sn)
7024 * aarch64_get_FP_double (cpu, sm));
7027 /* Float negative multiply add. */
7029 fnmadds (sim_cpu *cpu)
7031 unsigned sa = INSTR (14, 10);
7032 unsigned sm = INSTR (20, 16);
7033 unsigned sn = INSTR ( 9, 5);
7034 unsigned sd = INSTR ( 4, 0);
7036 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7037 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7038 + (- aarch64_get_FP_float (cpu, sn))
7039 * aarch64_get_FP_float (cpu, sm));
7042 /* Double negative multiply add. */
7044 fnmaddd (sim_cpu *cpu)
7046 unsigned sa = INSTR (14, 10);
7047 unsigned sm = INSTR (20, 16);
7048 unsigned sn = INSTR ( 9, 5);
7049 unsigned sd = INSTR ( 4, 0);
7051 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7052 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7053 + (- aarch64_get_FP_double (cpu, sn))
7054 * aarch64_get_FP_double (cpu, sm));
7057 /* Float negative multiply subtract. */
7059 fnmsubs (sim_cpu *cpu)
7061 unsigned sa = INSTR (14, 10);
7062 unsigned sm = INSTR (20, 16);
7063 unsigned sn = INSTR ( 9, 5);
7064 unsigned sd = INSTR ( 4, 0);
7066 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7067 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7068 + aarch64_get_FP_float (cpu, sn)
7069 * aarch64_get_FP_float (cpu, sm));
7072 /* Double negative multiply subtract. */
7074 fnmsubd (sim_cpu *cpu)
7076 unsigned sa = INSTR (14, 10);
7077 unsigned sm = INSTR (20, 16);
7078 unsigned sn = INSTR ( 9, 5);
7079 unsigned sd = INSTR ( 4, 0);
7081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7082 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7083 + aarch64_get_FP_double (cpu, sn)
7084 * aarch64_get_FP_double (cpu, sm));
7088 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7090 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7092 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7095 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7096 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7097 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7099 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7100 /* dispatch on combined type:o1:o2. */
7101 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7108 case 0: fmadds (cpu); return;
7109 case 1: fmsubs (cpu); return;
7110 case 2: fnmadds (cpu); return;
7111 case 3: fnmsubs (cpu); return;
7112 case 4: fmaddd (cpu); return;
7113 case 5: fmsubd (cpu); return;
7114 case 6: fnmaddd (cpu); return;
7115 case 7: fnmsubd (cpu); return;
7117 /* type > 1 is currently unallocated. */
7123 dexSimpleFPFixedConvert (sim_cpu *cpu)
7129 dexSimpleFPCondCompare (sim_cpu *cpu)
7131 /* instr [31,23] = 0001 1110 0
7135 instr [15,12] = condition
7139 instr [3,0] = nzcv */
7141 unsigned rm = INSTR (20, 16);
7142 unsigned rn = INSTR (9, 5);
7144 NYI_assert (31, 23, 0x3C);
7145 NYI_assert (11, 10, 0x1);
7146 NYI_assert (4, 4, 0);
7148 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7149 if (! testConditionCode (cpu, INSTR (15, 12)))
7151 aarch64_set_CPSR (cpu, INSTR (3, 0));
7157 /* Double precision. */
7158 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7159 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7161 /* FIXME: Check for NaNs. */
7163 aarch64_set_CPSR (cpu, (Z | C));
7164 else if (val1 < val2)
7165 aarch64_set_CPSR (cpu, N);
7166 else /* val1 > val2 */
7167 aarch64_set_CPSR (cpu, C);
7171 /* Single precision. */
7172 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7173 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7175 /* FIXME: Check for NaNs. */
7177 aarch64_set_CPSR (cpu, (Z | C));
7178 else if (val1 < val2)
7179 aarch64_set_CPSR (cpu, N);
7180 else /* val1 > val2 */
7181 aarch64_set_CPSR (cpu, C);
7189 fadds (sim_cpu *cpu)
7191 unsigned sm = INSTR (20, 16);
7192 unsigned sn = INSTR ( 9, 5);
7193 unsigned sd = INSTR ( 4, 0);
7195 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7196 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7197 + aarch64_get_FP_float (cpu, sm));
7202 faddd (sim_cpu *cpu)
7204 unsigned sm = INSTR (20, 16);
7205 unsigned sn = INSTR ( 9, 5);
7206 unsigned sd = INSTR ( 4, 0);
7208 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7209 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7210 + aarch64_get_FP_double (cpu, sm));
7215 fdivs (sim_cpu *cpu)
7217 unsigned sm = INSTR (20, 16);
7218 unsigned sn = INSTR ( 9, 5);
7219 unsigned sd = INSTR ( 4, 0);
7221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7222 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7223 / aarch64_get_FP_float (cpu, sm));
7226 /* Double divide. */
7228 fdivd (sim_cpu *cpu)
7230 unsigned sm = INSTR (20, 16);
7231 unsigned sn = INSTR ( 9, 5);
7232 unsigned sd = INSTR ( 4, 0);
7234 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7235 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7236 / aarch64_get_FP_double (cpu, sm));
7239 /* Float multiply. */
7241 fmuls (sim_cpu *cpu)
7243 unsigned sm = INSTR (20, 16);
7244 unsigned sn = INSTR ( 9, 5);
7245 unsigned sd = INSTR ( 4, 0);
7247 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7248 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7249 * aarch64_get_FP_float (cpu, sm));
7252 /* Double multiply. */
7254 fmuld (sim_cpu *cpu)
7256 unsigned sm = INSTR (20, 16);
7257 unsigned sn = INSTR ( 9, 5);
7258 unsigned sd = INSTR ( 4, 0);
7260 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7261 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7262 * aarch64_get_FP_double (cpu, sm));
7265 /* Float negate and multiply. */
7267 fnmuls (sim_cpu *cpu)
7269 unsigned sm = INSTR (20, 16);
7270 unsigned sn = INSTR ( 9, 5);
7271 unsigned sd = INSTR ( 4, 0);
7273 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7274 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7275 * aarch64_get_FP_float (cpu, sm)));
7278 /* Double negate and multiply. */
7280 fnmuld (sim_cpu *cpu)
7282 unsigned sm = INSTR (20, 16);
7283 unsigned sn = INSTR ( 9, 5);
7284 unsigned sd = INSTR ( 4, 0);
7286 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7287 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7288 * aarch64_get_FP_double (cpu, sm)));
7291 /* Float subtract. */
7293 fsubs (sim_cpu *cpu)
7295 unsigned sm = INSTR (20, 16);
7296 unsigned sn = INSTR ( 9, 5);
7297 unsigned sd = INSTR ( 4, 0);
7299 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7300 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7301 - aarch64_get_FP_float (cpu, sm));
7304 /* Double subtract. */
7306 fsubd (sim_cpu *cpu)
7308 unsigned sm = INSTR (20, 16);
7309 unsigned sn = INSTR ( 9, 5);
7310 unsigned sd = INSTR ( 4, 0);
7312 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7313 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7314 - aarch64_get_FP_double (cpu, sm));
7318 do_FMINNM (sim_cpu *cpu)
7320 /* instr[31,23] = 0 0011 1100
7321 instr[22] = float(0)/double(1)
7324 instr[15,10] = 01 1110
7328 unsigned sm = INSTR (20, 16);
7329 unsigned sn = INSTR ( 9, 5);
7330 unsigned sd = INSTR ( 4, 0);
7332 NYI_assert (31, 23, 0x03C);
7333 NYI_assert (15, 10, 0x1E);
7335 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7337 aarch64_set_FP_double (cpu, sd,
7338 dminnm (aarch64_get_FP_double (cpu, sn),
7339 aarch64_get_FP_double (cpu, sm)));
7341 aarch64_set_FP_float (cpu, sd,
7342 fminnm (aarch64_get_FP_float (cpu, sn),
7343 aarch64_get_FP_float (cpu, sm)));
7347 do_FMAXNM (sim_cpu *cpu)
7349 /* instr[31,23] = 0 0011 1100
7350 instr[22] = float(0)/double(1)
7353 instr[15,10] = 01 1010
7357 unsigned sm = INSTR (20, 16);
7358 unsigned sn = INSTR ( 9, 5);
7359 unsigned sd = INSTR ( 4, 0);
7361 NYI_assert (31, 23, 0x03C);
7362 NYI_assert (15, 10, 0x1A);
7364 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7366 aarch64_set_FP_double (cpu, sd,
7367 dmaxnm (aarch64_get_FP_double (cpu, sn),
7368 aarch64_get_FP_double (cpu, sm)));
7370 aarch64_set_FP_float (cpu, sd,
7371 fmaxnm (aarch64_get_FP_float (cpu, sn),
7372 aarch64_get_FP_float (cpu, sm)));
7376 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7378 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7380 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7383 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7386 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7387 0010 ==> FADD, 0011 ==> FSUB,
7388 0100 ==> FMAX, 0101 ==> FMIN
7389 0110 ==> FMAXNM, 0111 ==> FMINNM
7390 1000 ==> FNMUL, ow ==> UNALLOC
7395 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7396 uint32_t type = INSTR (23, 22);
7397 /* Dispatch on opcode. */
7398 uint32_t dispatch = INSTR (15, 12);
7409 case 0: fmuld (cpu); return;
7410 case 1: fdivd (cpu); return;
7411 case 2: faddd (cpu); return;
7412 case 3: fsubd (cpu); return;
7413 case 6: do_FMAXNM (cpu); return;
7414 case 7: do_FMINNM (cpu); return;
7415 case 8: fnmuld (cpu); return;
7417 /* Have not yet implemented fmax and fmin. */
7425 else /* type == 0 => floats. */
7428 case 0: fmuls (cpu); return;
7429 case 1: fdivs (cpu); return;
7430 case 2: fadds (cpu); return;
7431 case 3: fsubs (cpu); return;
7432 case 6: do_FMAXNM (cpu); return;
7433 case 7: do_FMINNM (cpu); return;
7434 case 8: fnmuls (cpu); return;
7446 dexSimpleFPCondSelect (sim_cpu *cpu)
7449 instr[31,23] = 0 0011 1100
7450 instr[22] = 0=>single 1=>double
7457 unsigned sm = INSTR (20, 16);
7458 unsigned sn = INSTR ( 9, 5);
7459 unsigned sd = INSTR ( 4, 0);
7460 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7462 NYI_assert (31, 23, 0x03C);
7463 NYI_assert (11, 10, 0x3);
7465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7467 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7468 : aarch64_get_FP_double (cpu, sm)));
7470 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7471 : aarch64_get_FP_float (cpu, sm)));
7474 /* Store 32 bit unscaled signed 9 bit. */
7476 fsturs (sim_cpu *cpu, int32_t offset)
7478 unsigned int rn = INSTR (9, 5);
7479 unsigned int st = INSTR (4, 0);
7481 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7482 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7483 aarch64_get_vec_u32 (cpu, st, 0));
7486 /* Store 64 bit unscaled signed 9 bit. */
7488 fsturd (sim_cpu *cpu, int32_t offset)
7490 unsigned int rn = INSTR (9, 5);
7491 unsigned int st = INSTR (4, 0);
7493 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7494 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7495 aarch64_get_vec_u64 (cpu, st, 0));
7498 /* Store 128 bit unscaled signed 9 bit. */
7500 fsturq (sim_cpu *cpu, int32_t offset)
7502 unsigned int rn = INSTR (9, 5);
7503 unsigned int st = INSTR (4, 0);
7506 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7507 aarch64_get_FP_long_double (cpu, st, & a);
7508 aarch64_set_mem_long_double (cpu,
7509 aarch64_get_reg_u64 (cpu, rn, 1)
7513 /* TODO FP move register. */
7515 /* 32 bit fp to fp move register. */
7517 ffmovs (sim_cpu *cpu)
7519 unsigned int rn = INSTR (9, 5);
7520 unsigned int st = INSTR (4, 0);
7522 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7523 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7526 /* 64 bit fp to fp move register. */
7528 ffmovd (sim_cpu *cpu)
7530 unsigned int rn = INSTR (9, 5);
7531 unsigned int st = INSTR (4, 0);
7533 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7534 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7537 /* 32 bit GReg to Vec move register. */
7539 fgmovs (sim_cpu *cpu)
7541 unsigned int rn = INSTR (9, 5);
7542 unsigned int st = INSTR (4, 0);
7544 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7545 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7548 /* 64 bit g to fp move register. */
7550 fgmovd (sim_cpu *cpu)
7552 unsigned int rn = INSTR (9, 5);
7553 unsigned int st = INSTR (4, 0);
7555 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7556 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7559 /* 32 bit fp to g move register. */
7561 gfmovs (sim_cpu *cpu)
7563 unsigned int rn = INSTR (9, 5);
7564 unsigned int st = INSTR (4, 0);
7566 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7567 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7570 /* 64 bit fp to g move register. */
7572 gfmovd (sim_cpu *cpu)
7574 unsigned int rn = INSTR (9, 5);
7575 unsigned int st = INSTR (4, 0);
7577 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7578 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7581 /* FP move immediate
7583 These install an immediate 8 bit value in the target register
7584 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7588 fmovs (sim_cpu *cpu)
7590 unsigned int sd = INSTR (4, 0);
7591 uint32_t imm = INSTR (20, 13);
7592 float f = fp_immediate_for_encoding_32 (imm);
7594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7595 aarch64_set_FP_float (cpu, sd, f);
7599 fmovd (sim_cpu *cpu)
7601 unsigned int sd = INSTR (4, 0);
7602 uint32_t imm = INSTR (20, 13);
7603 double d = fp_immediate_for_encoding_64 (imm);
7605 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7606 aarch64_set_FP_double (cpu, sd, d);
7610 dexSimpleFPImmediate (sim_cpu *cpu)
7612 /* instr[31,23] == 00111100
7613 instr[22] == type : single(0)/double(1)
7615 instr[20,13] == imm8
7617 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7619 uint32_t imm5 = INSTR (9, 5);
7621 NYI_assert (31, 23, 0x3C);
7632 /* TODO specific decode and execute for group Load Store. */
7634 /* TODO FP load/store single register (unscaled offset). */
7636 /* TODO load 8 bit unscaled signed 9 bit. */
7637 /* TODO load 16 bit unscaled signed 9 bit. */
7639 /* Load 32 bit unscaled signed 9 bit. */
7641 fldurs (sim_cpu *cpu, int32_t offset)
7643 unsigned int rn = INSTR (9, 5);
7644 unsigned int st = INSTR (4, 0);
7646 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7647 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7648 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7651 /* Load 64 bit unscaled signed 9 bit. */
7653 fldurd (sim_cpu *cpu, int32_t offset)
7655 unsigned int rn = INSTR (9, 5);
7656 unsigned int st = INSTR (4, 0);
7658 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7659 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7660 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7663 /* Load 128 bit unscaled signed 9 bit. */
7665 fldurq (sim_cpu *cpu, int32_t offset)
7667 unsigned int rn = INSTR (9, 5);
7668 unsigned int st = INSTR (4, 0);
7670 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7672 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7673 aarch64_get_mem_long_double (cpu, addr, & a);
7674 aarch64_set_FP_long_double (cpu, st, a);
7677 /* TODO store 8 bit unscaled signed 9 bit. */
7678 /* TODO store 16 bit unscaled signed 9 bit. */
7683 /* Float absolute value. */
7685 fabss (sim_cpu *cpu)
7687 unsigned sn = INSTR (9, 5);
7688 unsigned sd = INSTR (4, 0);
7689 float value = aarch64_get_FP_float (cpu, sn);
7691 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7692 aarch64_set_FP_float (cpu, sd, fabsf (value));
7695 /* Double absolute value. */
7697 fabcpu (sim_cpu *cpu)
7699 unsigned sn = INSTR (9, 5);
7700 unsigned sd = INSTR (4, 0);
7701 double value = aarch64_get_FP_double (cpu, sn);
7703 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7704 aarch64_set_FP_double (cpu, sd, fabs (value));
7707 /* Float negative value. */
7709 fnegs (sim_cpu *cpu)
7711 unsigned sn = INSTR (9, 5);
7712 unsigned sd = INSTR (4, 0);
7714 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7715 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7718 /* Double negative value. */
7720 fnegd (sim_cpu *cpu)
7722 unsigned sn = INSTR (9, 5);
7723 unsigned sd = INSTR (4, 0);
7725 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7726 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7729 /* Float square root. */
7731 fsqrts (sim_cpu *cpu)
7733 unsigned sn = INSTR (9, 5);
7734 unsigned sd = INSTR (4, 0);
7736 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7737 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7740 /* Double square root. */
7742 fsqrtd (sim_cpu *cpu)
7744 unsigned sn = INSTR (9, 5);
7745 unsigned sd = INSTR (4, 0);
7747 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7748 aarch64_set_FP_double (cpu, sd,
7749 sqrt (aarch64_get_FP_double (cpu, sn)));
7752 /* Convert double to float. */
7754 fcvtds (sim_cpu *cpu)
7756 unsigned sn = INSTR (9, 5);
7757 unsigned sd = INSTR (4, 0);
7759 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7760 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7763 /* Convert float to double. */
7765 fcvtcpu (sim_cpu *cpu)
7767 unsigned sn = INSTR (9, 5);
7768 unsigned sd = INSTR (4, 0);
7770 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7771 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7775 do_FRINT (sim_cpu *cpu)
7777 /* instr[31,23] = 0001 1110 0
7778 instr[22] = single(0)/double(1)
7780 instr[17,15] = rounding mode
7781 instr[14,10] = 10000
7783 instr[4,0] = dest */
7786 unsigned rs = INSTR (9, 5);
7787 unsigned rd = INSTR (4, 0);
7788 unsigned int rmode = INSTR (17, 15);
7790 NYI_assert (31, 23, 0x03C);
7791 NYI_assert (21, 18, 0x9);
7792 NYI_assert (14, 10, 0x10);
7794 if (rmode == 6 || rmode == 7)
7795 /* FIXME: Add support for rmode == 6 exactness check. */
7796 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7801 double val = aarch64_get_FP_double (cpu, rs);
7805 case 0: /* mode N: nearest or even. */
7807 double rval = round (val);
7809 if (val - rval == 0.5)
7811 if (((rval / 2.0) * 2.0) != rval)
7815 aarch64_set_FP_double (cpu, rd, round (val));
7819 case 1: /* mode P: towards +inf. */
7821 aarch64_set_FP_double (cpu, rd, trunc (val));
7823 aarch64_set_FP_double (cpu, rd, round (val));
7826 case 2: /* mode M: towards -inf. */
7828 aarch64_set_FP_double (cpu, rd, round (val));
7830 aarch64_set_FP_double (cpu, rd, trunc (val));
7833 case 3: /* mode Z: towards 0. */
7834 aarch64_set_FP_double (cpu, rd, trunc (val));
7837 case 4: /* mode A: away from 0. */
7838 aarch64_set_FP_double (cpu, rd, round (val));
7841 case 6: /* mode X: use FPCR with exactness check. */
7842 case 7: /* mode I: use FPCR mode. */
7850 val = aarch64_get_FP_float (cpu, rs);
7854 case 0: /* mode N: nearest or even. */
7856 float rval = roundf (val);
7858 if (val - rval == 0.5)
7860 if (((rval / 2.0) * 2.0) != rval)
7864 aarch64_set_FP_float (cpu, rd, rval);
7868 case 1: /* mode P: towards +inf. */
7870 aarch64_set_FP_float (cpu, rd, truncf (val));
7872 aarch64_set_FP_float (cpu, rd, roundf (val));
7875 case 2: /* mode M: towards -inf. */
7877 aarch64_set_FP_float (cpu, rd, truncf (val));
7879 aarch64_set_FP_float (cpu, rd, roundf (val));
7882 case 3: /* mode Z: towards 0. */
7883 aarch64_set_FP_float (cpu, rd, truncf (val));
7886 case 4: /* mode A: away from 0. */
7887 aarch64_set_FP_float (cpu, rd, roundf (val));
7890 case 6: /* mode X: use FPCR with exactness check. */
7891 case 7: /* mode I: use FPCR mode. */
7899 /* Convert half to float. */
7901 do_FCVT_half_to_single (sim_cpu *cpu)
7903 unsigned rn = INSTR (9, 5);
7904 unsigned rd = INSTR (4, 0);
7906 NYI_assert (31, 10, 0x7B890);
7908 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7909 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7912 /* Convert half to double. */
7914 do_FCVT_half_to_double (sim_cpu *cpu)
7916 unsigned rn = INSTR (9, 5);
7917 unsigned rd = INSTR (4, 0);
7919 NYI_assert (31, 10, 0x7B8B0);
7921 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7922 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7926 do_FCVT_single_to_half (sim_cpu *cpu)
7928 unsigned rn = INSTR (9, 5);
7929 unsigned rd = INSTR (4, 0);
7931 NYI_assert (31, 10, 0x788F0);
7933 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7934 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7937 /* Convert double to half. */
7939 do_FCVT_double_to_half (sim_cpu *cpu)
7941 unsigned rn = INSTR (9, 5);
7942 unsigned rd = INSTR (4, 0);
7944 NYI_assert (31, 10, 0x798F0);
7946 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7947 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7951 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7953 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7955 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7958 instr[23,22] ==> type : 00 ==> source is single,
7959 01 ==> source is double
7961 11 ==> UNALLOC or source is half
7963 instr[20,15] ==> opcode : with type 00 or 01
7964 000000 ==> FMOV, 000001 ==> FABS,
7965 000010 ==> FNEG, 000011 ==> FSQRT,
7966 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7967 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7968 001000 ==> FRINTN, 001001 ==> FRINTP,
7969 001010 ==> FRINTM, 001011 ==> FRINTZ,
7970 001100 ==> FRINTA, 001101 ==> UNALLOC
7971 001110 ==> FRINTX, 001111 ==> FRINTI
7973 000100 ==> FCVT (half-to-single)
7974 000101 ==> FCVT (half-to-double)
7975 instr[14,10] = 10000. */
7977 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7978 uint32_t type = INSTR (23, 22);
7979 uint32_t opcode = INSTR (20, 15);
7987 do_FCVT_half_to_single (cpu);
7988 else if (opcode == 5)
7989 do_FCVT_half_to_double (cpu);
8041 case 8: /* FRINTN etc. */
8053 do_FCVT_double_to_half (cpu);
8055 do_FCVT_single_to_half (cpu);
8066 /* 32 bit signed int to float. */
8068 scvtf32 (sim_cpu *cpu)
8070 unsigned rn = INSTR (9, 5);
8071 unsigned sd = INSTR (4, 0);
8073 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8074 aarch64_set_FP_float
8075 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8078 /* signed int to float. */
8080 scvtf (sim_cpu *cpu)
8082 unsigned rn = INSTR (9, 5);
8083 unsigned sd = INSTR (4, 0);
8085 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8086 aarch64_set_FP_float
8087 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8090 /* 32 bit signed int to double. */
8092 scvtd32 (sim_cpu *cpu)
8094 unsigned rn = INSTR (9, 5);
8095 unsigned sd = INSTR (4, 0);
8097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8098 aarch64_set_FP_double
8099 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8102 /* signed int to double. */
8104 scvtd (sim_cpu *cpu)
8106 unsigned rn = INSTR (9, 5);
8107 unsigned sd = INSTR (4, 0);
8109 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8110 aarch64_set_FP_double
8111 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8114 static const float FLOAT_INT_MAX = (float) INT_MAX;
8115 static const float FLOAT_INT_MIN = (float) INT_MIN;
8116 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8117 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8118 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8119 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8120 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8121 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8125 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8126 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8127 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8128 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8129 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8130 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8131 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8132 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8134 /* Check for FP exception conditions:
8137 Out of Range raises IO and IX and saturates value
8138 Denormal raises ID and IX and sets to zero. */
8139 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8142 switch (fpclassify (F)) \
8146 aarch64_set_FPSR (cpu, IO); \
8148 VALUE = ITYPE##_MAX; \
8150 VALUE = ITYPE##_MIN; \
8154 if (F >= FTYPE##_##ITYPE##_MAX) \
8156 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8157 VALUE = ITYPE##_MAX; \
8159 else if (F <= FTYPE##_##ITYPE##_MIN) \
8161 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8162 VALUE = ITYPE##_MIN; \
8166 case FP_SUBNORMAL: \
8167 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8179 /* 32 bit convert float to signed int truncate towards zero. */
8181 fcvtszs32 (sim_cpu *cpu)
8183 unsigned sn = INSTR (9, 5);
8184 unsigned rd = INSTR (4, 0);
8185 /* TODO : check that this rounds toward zero. */
8186 float f = aarch64_get_FP_float (cpu, sn);
8187 int32_t value = (int32_t) f;
8189 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8191 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8192 /* Avoid sign extension to 64 bit. */
8193 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8196 /* 64 bit convert float to signed int truncate towards zero. */
8198 fcvtszs (sim_cpu *cpu)
8200 unsigned sn = INSTR (9, 5);
8201 unsigned rd = INSTR (4, 0);
8202 float f = aarch64_get_FP_float (cpu, sn);
8203 int64_t value = (int64_t) f;
8205 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8207 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8208 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8211 /* 32 bit convert double to signed int truncate towards zero. */
8213 fcvtszd32 (sim_cpu *cpu)
8215 unsigned sn = INSTR (9, 5);
8216 unsigned rd = INSTR (4, 0);
8217 /* TODO : check that this rounds toward zero. */
8218 double d = aarch64_get_FP_double (cpu, sn);
8219 int32_t value = (int32_t) d;
8221 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8223 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8224 /* Avoid sign extension to 64 bit. */
8225 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8228 /* 64 bit convert double to signed int truncate towards zero. */
8230 fcvtszd (sim_cpu *cpu)
8232 unsigned sn = INSTR (9, 5);
8233 unsigned rd = INSTR (4, 0);
8234 /* TODO : check that this rounds toward zero. */
8235 double d = aarch64_get_FP_double (cpu, sn);
8238 value = (int64_t) d;
8240 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8242 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8243 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8247 do_fcvtzu (sim_cpu *cpu)
8249 /* instr[31] = size: 32-bit (0), 64-bit (1)
8250 instr[30,23] = 00111100
8251 instr[22] = type: single (0)/ double (1)
8252 instr[21] = enable (0)/disable(1) precision
8253 instr[20,16] = 11001
8254 instr[15,10] = precision
8258 unsigned rs = INSTR (9, 5);
8259 unsigned rd = INSTR (4, 0);
8261 NYI_assert (30, 23, 0x3C);
8262 NYI_assert (20, 16, 0x19);
8264 if (INSTR (21, 21) != 1)
8265 /* Convert to fixed point. */
8268 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8271 /* Convert to unsigned 64-bit integer. */
8274 double d = aarch64_get_FP_double (cpu, rs);
8275 uint64_t value = (uint64_t) d;
8277 /* Do not raise an exception if we have reached ULONG_MAX. */
8278 if (value != (1UL << 63))
8279 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8281 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8285 float f = aarch64_get_FP_float (cpu, rs);
8286 uint64_t value = (uint64_t) f;
8288 /* Do not raise an exception if we have reached ULONG_MAX. */
8289 if (value != (1UL << 63))
8290 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8292 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8299 /* Convert to unsigned 32-bit integer. */
8302 double d = aarch64_get_FP_double (cpu, rs);
8304 value = (uint32_t) d;
8305 /* Do not raise an exception if we have reached UINT_MAX. */
8306 if (value != (1UL << 31))
8307 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8311 float f = aarch64_get_FP_float (cpu, rs);
8313 value = (uint32_t) f;
8314 /* Do not raise an exception if we have reached UINT_MAX. */
8315 if (value != (1UL << 31))
8316 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8319 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8324 do_UCVTF (sim_cpu *cpu)
8326 /* instr[31] = size: 32-bit (0), 64-bit (1)
8327 instr[30,23] = 001 1110 0
8328 instr[22] = type: single (0)/ double (1)
8329 instr[21] = enable (0)/disable(1) precision
8330 instr[20,16] = 0 0011
8331 instr[15,10] = precision
8335 unsigned rs = INSTR (9, 5);
8336 unsigned rd = INSTR (4, 0);
8338 NYI_assert (30, 23, 0x3C);
8339 NYI_assert (20, 16, 0x03);
8341 if (INSTR (21, 21) != 1)
8344 /* FIXME: Add exception raising. */
8345 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8348 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8351 aarch64_set_FP_double (cpu, rd, (double) value);
8353 aarch64_set_FP_float (cpu, rd, (float) value);
8357 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8360 aarch64_set_FP_double (cpu, rd, (double) value);
8362 aarch64_set_FP_float (cpu, rd, (float) value);
8367 float_vector_move (sim_cpu *cpu)
8369 /* instr[31,17] == 100 1111 0101 0111
8370 instr[16] ==> direction 0=> to GR, 1=> from GR
8372 instr[9,5] ==> source
8373 instr[4,0] ==> dest. */
8375 unsigned rn = INSTR (9, 5);
8376 unsigned rd = INSTR (4, 0);
8378 NYI_assert (31, 17, 0x4F57);
8380 if (INSTR (15, 10) != 0)
8383 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8385 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8387 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8391 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8393 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8395 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8398 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8400 instr[20,19] = rmode
8401 instr[18,16] = opcode
8402 instr[15,10] = 10 0000 */
8404 uint32_t rmode_opcode;
8410 if (INSTR (31, 17) == 0x4F57)
8412 float_vector_move (cpu);
8416 size = INSTR (31, 31);
8421 type = INSTR (23, 22);
8425 rmode_opcode = INSTR (20, 16);
8426 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8428 switch (rmode_opcode)
8430 case 2: /* SCVTF. */
8433 case 0: scvtf32 (cpu); return;
8434 case 1: scvtd32 (cpu); return;
8435 case 2: scvtf (cpu); return;
8436 case 3: scvtd (cpu); return;
8439 case 6: /* FMOV GR, Vec. */
8442 case 0: gfmovs (cpu); return;
8443 case 3: gfmovd (cpu); return;
8444 default: HALT_UNALLOC;
8447 case 7: /* FMOV vec, GR. */
8450 case 0: fgmovs (cpu); return;
8451 case 3: fgmovd (cpu); return;
8452 default: HALT_UNALLOC;
8455 case 24: /* FCVTZS. */
8458 case 0: fcvtszs32 (cpu); return;
8459 case 1: fcvtszd32 (cpu); return;
8460 case 2: fcvtszs (cpu); return;
8461 case 3: fcvtszd (cpu); return;
8464 case 25: do_fcvtzu (cpu); return;
8465 case 3: do_UCVTF (cpu); return;
8467 case 0: /* FCVTNS. */
8468 case 1: /* FCVTNU. */
8469 case 4: /* FCVTAS. */
8470 case 5: /* FCVTAU. */
8471 case 8: /* FCVPTS. */
8472 case 9: /* FCVTPU. */
8473 case 16: /* FCVTMS. */
8474 case 17: /* FCVTMU. */
8481 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8485 /* FIXME: Add exception raising. */
8486 if (isnan (fvalue1) || isnan (fvalue2))
8488 else if (isinf (fvalue1) && isinf (fvalue2))
8490 /* Subtracting two infinities may give a NaN. We only need to compare
8491 the signs, which we can get from isinf. */
8492 int result = isinf (fvalue1) - isinf (fvalue2);
8496 else if (result < 0)
8498 else /* (result > 0). */
8503 float result = fvalue1 - fvalue2;
8507 else if (result < 0)
8509 else /* (result > 0). */
8513 aarch64_set_CPSR (cpu, flags);
8517 fcmps (sim_cpu *cpu)
8519 unsigned sm = INSTR (20, 16);
8520 unsigned sn = INSTR ( 9, 5);
8522 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8523 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8525 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8526 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8529 /* Float compare to zero -- Invalid Operation exception
8530 only on signaling NaNs. */
8532 fcmpzs (sim_cpu *cpu)
8534 unsigned sn = INSTR ( 9, 5);
8535 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8537 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8538 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8541 /* Float compare -- Invalid Operation exception on all NaNs. */
8543 fcmpes (sim_cpu *cpu)
8545 unsigned sm = INSTR (20, 16);
8546 unsigned sn = INSTR ( 9, 5);
8548 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8549 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8551 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8552 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8555 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8557 fcmpzes (sim_cpu *cpu)
8559 unsigned sn = INSTR ( 9, 5);
8560 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8563 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8567 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8571 /* FIXME: Add exception raising. */
8572 if (isnan (dval1) || isnan (dval2))
8574 else if (isinf (dval1) && isinf (dval2))
8576 /* Subtracting two infinities may give a NaN. We only need to compare
8577 the signs, which we can get from isinf. */
8578 int result = isinf (dval1) - isinf (dval2);
8582 else if (result < 0)
8584 else /* (result > 0). */
8589 double result = dval1 - dval2;
8593 else if (result < 0)
8595 else /* (result > 0). */
8599 aarch64_set_CPSR (cpu, flags);
8602 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8604 fcmpd (sim_cpu *cpu)
8606 unsigned sm = INSTR (20, 16);
8607 unsigned sn = INSTR ( 9, 5);
8609 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8610 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8612 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8613 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8616 /* Double compare to zero -- Invalid Operation exception
8617 only on signaling NaNs. */
8619 fcmpzd (sim_cpu *cpu)
8621 unsigned sn = INSTR ( 9, 5);
8622 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8624 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8625 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8628 /* Double compare -- Invalid Operation exception on all NaNs. */
8630 fcmped (sim_cpu *cpu)
8632 unsigned sm = INSTR (20, 16);
8633 unsigned sn = INSTR ( 9, 5);
8635 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8636 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8638 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8639 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8642 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8644 fcmpzed (sim_cpu *cpu)
8646 unsigned sn = INSTR ( 9, 5);
8647 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8649 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8650 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8654 dexSimpleFPCompare (sim_cpu *cpu)
8656 /* assert instr[28,25] == 1111
8657 instr[30:24:21:13,10] = 0011000
8658 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8659 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8660 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8661 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8662 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8663 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8666 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8667 uint32_t type = INSTR (23, 22);
8668 uint32_t op = INSTR (15, 14);
8669 uint32_t op2_2_0 = INSTR (2, 0);
8683 /* dispatch on type and top 2 bits of opcode. */
8684 dispatch = (type << 2) | INSTR (4, 3);
8688 case 0: fcmps (cpu); return;
8689 case 1: fcmpzs (cpu); return;
8690 case 2: fcmpes (cpu); return;
8691 case 3: fcmpzes (cpu); return;
8692 case 4: fcmpd (cpu); return;
8693 case 5: fcmpzd (cpu); return;
8694 case 6: fcmped (cpu); return;
8695 case 7: fcmpzed (cpu); return;
8700 do_scalar_FADDP (sim_cpu *cpu)
8702 /* instr [31,23] = 0111 1110 0
8703 instr [22] = single(0)/double(1)
8704 instr [21,10] = 11 0000 1101 10
8706 instr [4,0] = Fd. */
8708 unsigned Fn = INSTR (9, 5);
8709 unsigned Fd = INSTR (4, 0);
8711 NYI_assert (31, 23, 0x0FC);
8712 NYI_assert (21, 10, 0xC36);
8714 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8717 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8718 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8720 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8724 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8725 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8727 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8731 /* Floating point absolute difference. */
8734 do_scalar_FABD (sim_cpu *cpu)
8736 /* instr [31,23] = 0111 1110 1
8737 instr [22] = float(0)/double(1)
8740 instr [15,10] = 1101 01
8742 instr [4, 0] = Rd. */
8744 unsigned rm = INSTR (20, 16);
8745 unsigned rn = INSTR (9, 5);
8746 unsigned rd = INSTR (4, 0);
8748 NYI_assert (31, 23, 0x0FD);
8749 NYI_assert (21, 21, 1);
8750 NYI_assert (15, 10, 0x35);
8752 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8754 aarch64_set_FP_double (cpu, rd,
8755 fabs (aarch64_get_FP_double (cpu, rn)
8756 - aarch64_get_FP_double (cpu, rm)));
8758 aarch64_set_FP_float (cpu, rd,
8759 fabsf (aarch64_get_FP_float (cpu, rn)
8760 - aarch64_get_FP_float (cpu, rm)));
8764 do_scalar_CMGT (sim_cpu *cpu)
8766 /* instr [31,21] = 0101 1110 111
8768 instr [15,10] = 00 1101
8770 instr [4, 0] = Rd. */
8772 unsigned rm = INSTR (20, 16);
8773 unsigned rn = INSTR (9, 5);
8774 unsigned rd = INSTR (4, 0);
8776 NYI_assert (31, 21, 0x2F7);
8777 NYI_assert (15, 10, 0x0D);
8779 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8780 aarch64_set_vec_u64 (cpu, rd, 0,
8781 aarch64_get_vec_u64 (cpu, rn, 0) >
8782 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8786 do_scalar_USHR (sim_cpu *cpu)
8788 /* instr [31,23] = 0111 1111 0
8789 instr [22,16] = shift amount
8790 instr [15,10] = 0000 01
8792 instr [4, 0] = Rd. */
8794 unsigned amount = 128 - INSTR (22, 16);
8795 unsigned rn = INSTR (9, 5);
8796 unsigned rd = INSTR (4, 0);
8798 NYI_assert (31, 23, 0x0FE);
8799 NYI_assert (15, 10, 0x01);
8801 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8802 aarch64_set_vec_u64 (cpu, rd, 0,
8803 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8807 do_scalar_SSHL (sim_cpu *cpu)
8809 /* instr [31,21] = 0101 1110 111
8811 instr [15,10] = 0100 01
8813 instr [4, 0] = Rd. */
8815 unsigned rm = INSTR (20, 16);
8816 unsigned rn = INSTR (9, 5);
8817 unsigned rd = INSTR (4, 0);
8818 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8820 NYI_assert (31, 21, 0x2F7);
8821 NYI_assert (15, 10, 0x11);
8823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8825 aarch64_set_vec_s64 (cpu, rd, 0,
8826 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8828 aarch64_set_vec_s64 (cpu, rd, 0,
8829 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8833 do_scalar_shift (sim_cpu *cpu)
8835 /* instr [31,23] = 0101 1111 0
8836 instr [22,16] = shift amount
8837 instr [15,10] = 0101 01 [SHL]
8838 instr [15,10] = 0000 01 [SSHR]
8840 instr [4, 0] = Rd. */
8842 unsigned rn = INSTR (9, 5);
8843 unsigned rd = INSTR (4, 0);
8846 NYI_assert (31, 23, 0x0BE);
8848 if (INSTR (22, 22) == 0)
8851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8852 switch (INSTR (15, 10))
8854 case 0x01: /* SSHR */
8855 amount = 128 - INSTR (22, 16);
8856 aarch64_set_vec_s64 (cpu, rd, 0,
8857 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8859 case 0x15: /* SHL */
8860 amount = INSTR (22, 16) - 64;
8861 aarch64_set_vec_u64 (cpu, rd, 0,
8862 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8869 /* FCMEQ FCMGT FCMGE. */
8871 do_scalar_FCM (sim_cpu *cpu)
8873 /* instr [31,30] = 01
8875 instr [28,24] = 1 1110
8880 instr [15,12] = 1110
8884 instr [4, 0] = Rd. */
8886 unsigned rm = INSTR (20, 16);
8887 unsigned rn = INSTR (9, 5);
8888 unsigned rd = INSTR (4, 0);
8889 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8894 NYI_assert (31, 30, 1);
8895 NYI_assert (28, 24, 0x1E);
8896 NYI_assert (21, 21, 1);
8897 NYI_assert (15, 12, 0xE);
8898 NYI_assert (10, 10, 1);
8900 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8903 double val1 = aarch64_get_FP_double (cpu, rn);
8904 double val2 = aarch64_get_FP_double (cpu, rm);
8909 result = val1 == val2;
8917 result = val1 >= val2;
8925 result = val1 > val2;
8932 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8936 val1 = aarch64_get_FP_float (cpu, rn);
8937 val2 = aarch64_get_FP_float (cpu, rm);
8942 result = val1 == val2;
8946 val1 = fabsf (val1);
8947 val2 = fabsf (val2);
8950 result = val1 >= val2;
8954 val1 = fabsf (val1);
8955 val2 = fabsf (val2);
8958 result = val1 > val2;
8965 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8968 /* An alias of DUP. */
8970 do_scalar_MOV (sim_cpu *cpu)
8972 /* instr [31,21] = 0101 1110 000
8973 instr [20,16] = imm5
8974 instr [15,10] = 0000 01
8976 instr [4, 0] = Rd. */
8978 unsigned rn = INSTR (9, 5);
8979 unsigned rd = INSTR (4, 0);
8982 NYI_assert (31, 21, 0x2F0);
8983 NYI_assert (15, 10, 0x01);
8985 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8989 index = INSTR (20, 17);
8991 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8993 else if (INSTR (17, 17))
8996 index = INSTR (20, 18);
8998 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9000 else if (INSTR (18, 18))
9003 index = INSTR (20, 19);
9005 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9007 else if (INSTR (19, 19))
9010 index = INSTR (20, 20);
9012 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9019 do_scalar_NEG (sim_cpu *cpu)
9021 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9023 instr [4, 0] = Rd. */
9025 unsigned rn = INSTR (9, 5);
9026 unsigned rd = INSTR (4, 0);
9028 NYI_assert (31, 10, 0x1FB82E);
9030 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9031 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9035 do_scalar_USHL (sim_cpu *cpu)
9037 /* instr [31,21] = 0111 1110 111
9039 instr [15,10] = 0100 01
9041 instr [4, 0] = Rd. */
9043 unsigned rm = INSTR (20, 16);
9044 unsigned rn = INSTR (9, 5);
9045 unsigned rd = INSTR (4, 0);
9046 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9048 NYI_assert (31, 21, 0x3F7);
9049 NYI_assert (15, 10, 0x11);
9051 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9053 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9055 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9059 do_double_add (sim_cpu *cpu)
9061 /* instr [31,21] = 0101 1110 111
9063 instr [15,10] = 1000 01
9065 instr [4,0] = Fd. */
9072 NYI_assert (31, 21, 0x2F7);
9073 NYI_assert (15, 10, 0x21);
9077 Fn = INSTR (20, 16);
9079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9080 val1 = aarch64_get_FP_double (cpu, Fm);
9081 val2 = aarch64_get_FP_double (cpu, Fn);
9083 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9087 do_scalar_UCVTF (sim_cpu *cpu)
9089 /* instr [31,23] = 0111 1110 0
9090 instr [22] = single(0)/double(1)
9091 instr [21,10] = 10 0001 1101 10
9093 instr [4,0] = rd. */
9095 unsigned rn = INSTR (9, 5);
9096 unsigned rd = INSTR (4, 0);
9098 NYI_assert (31, 23, 0x0FC);
9099 NYI_assert (21, 10, 0x876);
9101 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9104 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9106 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9110 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9112 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9117 do_scalar_vec (sim_cpu *cpu)
9119 /* instr [30] = 1. */
9120 /* instr [28,25] = 1111. */
9121 switch (INSTR (31, 23))
9124 switch (INSTR (15, 10))
9126 case 0x01: do_scalar_MOV (cpu); return;
9127 case 0x39: do_scalar_FCM (cpu); return;
9128 case 0x3B: do_scalar_FCM (cpu); return;
9132 case 0xBE: do_scalar_shift (cpu); return;
9135 switch (INSTR (15, 10))
9138 switch (INSTR (21, 16))
9140 case 0x30: do_scalar_FADDP (cpu); return;
9141 case 0x21: do_scalar_UCVTF (cpu); return;
9144 case 0x39: do_scalar_FCM (cpu); return;
9145 case 0x3B: do_scalar_FCM (cpu); return;
9150 switch (INSTR (15, 10))
9152 case 0x0D: do_scalar_CMGT (cpu); return;
9153 case 0x11: do_scalar_USHL (cpu); return;
9154 case 0x2E: do_scalar_NEG (cpu); return;
9155 case 0x35: do_scalar_FABD (cpu); return;
9156 case 0x39: do_scalar_FCM (cpu); return;
9157 case 0x3B: do_scalar_FCM (cpu); return;
9162 case 0xFE: do_scalar_USHR (cpu); return;
9165 switch (INSTR (15, 10))
9167 case 0x21: do_double_add (cpu); return;
9168 case 0x11: do_scalar_SSHL (cpu); return;
9179 dexAdvSIMD1 (sim_cpu *cpu)
9181 /* instr [28,25] = 1 111. */
9183 /* We are currently only interested in the basic
9184 scalar fp routines which all have bit 30 = 0. */
9186 do_scalar_vec (cpu);
9188 /* instr[24] is set for FP data processing 3-source and clear for
9189 all other basic scalar fp instruction groups. */
9190 else if (INSTR (24, 24))
9191 dexSimpleFPDataProc3Source (cpu);
9193 /* instr[21] is clear for floating <-> fixed conversions and set for
9194 all other basic scalar fp instruction groups. */
9195 else if (!INSTR (21, 21))
9196 dexSimpleFPFixedConvert (cpu);
9198 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9199 11 ==> cond select, 00 ==> other. */
9201 switch (INSTR (11, 10))
9203 case 1: dexSimpleFPCondCompare (cpu); return;
9204 case 2: dexSimpleFPDataProc2Source (cpu); return;
9205 case 3: dexSimpleFPCondSelect (cpu); return;
9208 /* Now an ordered cascade of tests.
9209 FP immediate has instr [12] == 1.
9210 FP compare has instr [13] == 1.
9211 FP Data Proc 1 Source has instr [14] == 1.
9212 FP floating <--> integer conversions has instr [15] == 0. */
9214 dexSimpleFPImmediate (cpu);
9216 else if (INSTR (13, 13))
9217 dexSimpleFPCompare (cpu);
9219 else if (INSTR (14, 14))
9220 dexSimpleFPDataProc1Source (cpu);
9222 else if (!INSTR (15, 15))
9223 dexSimpleFPIntegerConvert (cpu);
9226 /* If we get here then instr[15] == 1 which means UNALLOC. */
9231 /* PC relative addressing. */
9234 pcadr (sim_cpu *cpu)
9236 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9237 instr[30,29] = immlo
9238 instr[23,5] = immhi. */
9240 unsigned rd = INSTR (4, 0);
9241 uint32_t isPage = INSTR (31, 31);
9242 union { int64_t u64; uint64_t s64; } imm;
9245 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9247 offset = (offset << 2) | INSTR (30, 29);
9249 address = aarch64_get_PC (cpu);
9257 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9258 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9261 /* Specific decode and execute for group Data Processing Immediate. */
9264 dexPCRelAddressing (sim_cpu *cpu)
9266 /* assert instr[28,24] = 10000. */
9270 /* Immediate logical.
9271 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9272 16, 32 or 64 bit sequence pulled out at decode and possibly
9275 N.B. the output register (dest) can normally be Xn or SP
9276 the exception occurs for flag setting instructions which may
9277 only use Xn for the output (dest). The input register can
9280 /* 32 bit and immediate. */
9282 and32 (sim_cpu *cpu, uint32_t bimm)
9284 unsigned rn = INSTR (9, 5);
9285 unsigned rd = INSTR (4, 0);
9287 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9288 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9289 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9292 /* 64 bit and immediate. */
9294 and64 (sim_cpu *cpu, uint64_t bimm)
9296 unsigned rn = INSTR (9, 5);
9297 unsigned rd = INSTR (4, 0);
9299 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9300 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9301 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9304 /* 32 bit and immediate set flags. */
9306 ands32 (sim_cpu *cpu, uint32_t bimm)
9308 unsigned rn = INSTR (9, 5);
9309 unsigned rd = INSTR (4, 0);
9311 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9312 uint32_t value2 = bimm;
9314 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9315 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9316 set_flags_for_binop32 (cpu, value1 & value2);
9319 /* 64 bit and immediate set flags. */
9321 ands64 (sim_cpu *cpu, uint64_t bimm)
9323 unsigned rn = INSTR (9, 5);
9324 unsigned rd = INSTR (4, 0);
9326 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9327 uint64_t value2 = bimm;
9329 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9330 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9331 set_flags_for_binop64 (cpu, value1 & value2);
9334 /* 32 bit exclusive or immediate. */
9336 eor32 (sim_cpu *cpu, uint32_t bimm)
9338 unsigned rn = INSTR (9, 5);
9339 unsigned rd = INSTR (4, 0);
9341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9342 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9343 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9346 /* 64 bit exclusive or immediate. */
9348 eor64 (sim_cpu *cpu, uint64_t bimm)
9350 unsigned rn = INSTR (9, 5);
9351 unsigned rd = INSTR (4, 0);
9353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9354 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9355 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9358 /* 32 bit or immediate. */
9360 orr32 (sim_cpu *cpu, uint32_t bimm)
9362 unsigned rn = INSTR (9, 5);
9363 unsigned rd = INSTR (4, 0);
9365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9366 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9367 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9370 /* 64 bit or immediate. */
9372 orr64 (sim_cpu *cpu, uint64_t bimm)
9374 unsigned rn = INSTR (9, 5);
9375 unsigned rd = INSTR (4, 0);
9377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9378 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9379 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9382 /* Logical shifted register.
9383 These allow an optional LSL, ASR, LSR or ROR to the second source
9384 register with a count up to the register bit count.
9385 N.B register args may not be SP. */
9387 /* 32 bit AND shifted register. */
9389 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9391 unsigned rm = INSTR (20, 16);
9392 unsigned rn = INSTR (9, 5);
9393 unsigned rd = INSTR (4, 0);
9395 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9397 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9398 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9401 /* 64 bit AND shifted register. */
9403 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9405 unsigned rm = INSTR (20, 16);
9406 unsigned rn = INSTR (9, 5);
9407 unsigned rd = INSTR (4, 0);
9409 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9411 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9412 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9415 /* 32 bit AND shifted register setting flags. */
9417 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9419 unsigned rm = INSTR (20, 16);
9420 unsigned rn = INSTR (9, 5);
9421 unsigned rd = INSTR (4, 0);
9423 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9424 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9427 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9428 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9429 set_flags_for_binop32 (cpu, value1 & value2);
9432 /* 64 bit AND shifted register setting flags. */
9434 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9436 unsigned rm = INSTR (20, 16);
9437 unsigned rn = INSTR (9, 5);
9438 unsigned rd = INSTR (4, 0);
9440 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9441 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9444 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9445 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9446 set_flags_for_binop64 (cpu, value1 & value2);
9449 /* 32 bit BIC shifted register. */
9451 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9453 unsigned rm = INSTR (20, 16);
9454 unsigned rn = INSTR (9, 5);
9455 unsigned rd = INSTR (4, 0);
9457 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9459 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9460 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9463 /* 64 bit BIC shifted register. */
9465 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9467 unsigned rm = INSTR (20, 16);
9468 unsigned rn = INSTR (9, 5);
9469 unsigned rd = INSTR (4, 0);
9471 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9473 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9474 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9477 /* 32 bit BIC shifted register setting flags. */
9479 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9481 unsigned rm = INSTR (20, 16);
9482 unsigned rn = INSTR (9, 5);
9483 unsigned rd = INSTR (4, 0);
9485 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9486 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9490 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9491 set_flags_for_binop32 (cpu, value1 & value2);
9494 /* 64 bit BIC shifted register setting flags. */
9496 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9498 unsigned rm = INSTR (20, 16);
9499 unsigned rn = INSTR (9, 5);
9500 unsigned rd = INSTR (4, 0);
9502 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9503 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9506 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9507 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9508 set_flags_for_binop64 (cpu, value1 & value2);
9511 /* 32 bit EON shifted register. */
9513 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9515 unsigned rm = INSTR (20, 16);
9516 unsigned rn = INSTR (9, 5);
9517 unsigned rd = INSTR (4, 0);
9519 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9521 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9522 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9525 /* 64 bit EON shifted register. */
9527 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9529 unsigned rm = INSTR (20, 16);
9530 unsigned rn = INSTR (9, 5);
9531 unsigned rd = INSTR (4, 0);
9533 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9535 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9536 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9539 /* 32 bit EOR shifted register. */
9541 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9543 unsigned rm = INSTR (20, 16);
9544 unsigned rn = INSTR (9, 5);
9545 unsigned rd = INSTR (4, 0);
9547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9549 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9550 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9553 /* 64 bit EOR shifted register. */
9555 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9557 unsigned rm = INSTR (20, 16);
9558 unsigned rn = INSTR (9, 5);
9559 unsigned rd = INSTR (4, 0);
9561 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9563 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9564 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9567 /* 32 bit ORR shifted register. */
9569 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9571 unsigned rm = INSTR (20, 16);
9572 unsigned rn = INSTR (9, 5);
9573 unsigned rd = INSTR (4, 0);
9575 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9577 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9578 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9581 /* 64 bit ORR shifted register. */
9583 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9585 unsigned rm = INSTR (20, 16);
9586 unsigned rn = INSTR (9, 5);
9587 unsigned rd = INSTR (4, 0);
9589 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9591 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9592 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9595 /* 32 bit ORN shifted register. */
9597 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9599 unsigned rm = INSTR (20, 16);
9600 unsigned rn = INSTR (9, 5);
9601 unsigned rd = INSTR (4, 0);
9603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9605 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9606 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9609 /* 64 bit ORN shifted register. */
9611 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9613 unsigned rm = INSTR (20, 16);
9614 unsigned rn = INSTR (9, 5);
9615 unsigned rd = INSTR (4, 0);
9617 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9619 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9620 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9624 dexLogicalImmediate (sim_cpu *cpu)
9626 /* assert instr[28,23] = 1001000
9627 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9628 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9629 instr[22] = N : used to construct immediate mask
9635 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9636 uint32_t size = INSTR (31, 31);
9637 uint32_t N = INSTR (22, 22);
9638 /* uint32_t immr = INSTR (21, 16);. */
9639 /* uint32_t imms = INSTR (15, 10);. */
9640 uint32_t index = INSTR (22, 10);
9641 uint64_t bimm64 = LITable [index];
9642 uint32_t dispatch = INSTR (30, 29);
9652 uint32_t bimm = (uint32_t) bimm64;
9656 case 0: and32 (cpu, bimm); return;
9657 case 1: orr32 (cpu, bimm); return;
9658 case 2: eor32 (cpu, bimm); return;
9659 case 3: ands32 (cpu, bimm); return;
9666 case 0: and64 (cpu, bimm64); return;
9667 case 1: orr64 (cpu, bimm64); return;
9668 case 2: eor64 (cpu, bimm64); return;
9669 case 3: ands64 (cpu, bimm64); return;
9676 The uimm argument is a 16 bit value to be inserted into the
9677 target register the pos argument locates the 16 bit word in the
9678 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9680 N.B register arg may not be SP so it should be.
9681 accessed using the setGZRegisterXXX accessors. */
9683 /* 32 bit move 16 bit immediate zero remaining shorts. */
9685 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9687 unsigned rd = INSTR (4, 0);
9689 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9690 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9693 /* 64 bit move 16 bit immediate zero remaining shorts. */
9695 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9697 unsigned rd = INSTR (4, 0);
9699 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9700 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9703 /* 32 bit move 16 bit immediate negated. */
9705 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9707 unsigned rd = INSTR (4, 0);
9709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9710 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9713 /* 64 bit move 16 bit immediate negated. */
9715 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9717 unsigned rd = INSTR (4, 0);
9719 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9721 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9722 ^ 0xffffffffffffffffULL));
9725 /* 32 bit move 16 bit immediate keep remaining shorts. */
9727 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9729 unsigned rd = INSTR (4, 0);
9730 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9731 uint32_t value = val << (pos * 16);
9732 uint32_t mask = ~(0xffffU << (pos * 16));
9734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9735 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9738 /* 64 bit move 16 it immediate keep remaining shorts. */
9740 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9742 unsigned rd = INSTR (4, 0);
9743 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9744 uint64_t value = (uint64_t) val << (pos * 16);
9745 uint64_t mask = ~(0xffffULL << (pos * 16));
9747 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9748 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9752 dexMoveWideImmediate (sim_cpu *cpu)
9754 /* assert instr[28:23] = 100101
9755 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9756 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9757 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9758 instr[20,5] = uimm16
9761 /* N.B. the (multiple of 16) shift is applied by the called routine,
9762 we just pass the multiplier. */
9765 uint32_t size = INSTR (31, 31);
9766 uint32_t op = INSTR (30, 29);
9767 uint32_t shift = INSTR (22, 21);
9769 /* 32 bit can only shift 0 or 1 lot of 16.
9770 anything else is an unallocated instruction. */
9771 if (size == 0 && (shift > 1))
9777 imm = INSTR (20, 5);
9782 movn32 (cpu, imm, shift);
9784 movz32 (cpu, imm, shift);
9786 movk32 (cpu, imm, shift);
9791 movn64 (cpu, imm, shift);
9793 movz64 (cpu, imm, shift);
9795 movk64 (cpu, imm, shift);
9799 /* Bitfield operations.
9800 These take a pair of bit positions r and s which are in {0..31}
9801 or {0..63} depending on the instruction word size.
9802 N.B register args may not be SP. */
9804 /* OK, we start with ubfm which just needs to pick
9805 some bits out of source zero the rest and write
9806 the result to dest. Just need two logical shifts. */
9808 /* 32 bit bitfield move, left and right of affected zeroed
9809 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9811 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9814 unsigned rn = INSTR (9, 5);
9815 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9817 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9820 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9821 We want only bits s:xxx:r at the bottom of the word
9822 so we LSL bit s up to bit 31 i.e. by 31 - s
9823 and then we LSR to bring bit 31 down to bit s - r
9824 i.e. by 31 + r - s. */
9826 value >>= 31 + r - s;
9830 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9831 We want only bits s:xxx:0 starting at it 31-(r-1)
9832 so we LSL bit s up to bit 31 i.e. by 31 - s
9833 and then we LSL to bring bit 31 down to 31-(r-1)+s
9834 i.e. by r - (s + 1). */
9836 value >>= r - (s + 1);
9839 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9841 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9844 /* 64 bit bitfield move, left and right of affected zeroed
9845 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9847 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9850 unsigned rn = INSTR (9, 5);
9851 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9855 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9856 We want only bits s:xxx:r at the bottom of the word.
9857 So we LSL bit s up to bit 63 i.e. by 63 - s
9858 and then we LSR to bring bit 63 down to bit s - r
9859 i.e. by 63 + r - s. */
9861 value >>= 63 + r - s;
9865 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9866 We want only bits s:xxx:0 starting at it 63-(r-1).
9867 So we LSL bit s up to bit 63 i.e. by 63 - s
9868 and then we LSL to bring bit 63 down to 63-(r-1)+s
9869 i.e. by r - (s + 1). */
9871 value >>= r - (s + 1);
9874 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9876 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9879 /* The signed versions need to insert sign bits
9880 on the left of the inserted bit field. so we do
9881 much the same as the unsigned version except we
9882 use an arithmetic shift right -- this just means
9883 we need to operate on signed values. */
9885 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9886 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9888 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9891 unsigned rn = INSTR (9, 5);
9892 /* as per ubfm32 but use an ASR instead of an LSR. */
9893 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9898 value >>= 31 + r - s;
9903 value >>= r - (s + 1);
9906 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9908 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9911 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9912 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9914 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9917 unsigned rn = INSTR (9, 5);
9918 /* acpu per ubfm but use an ASR instead of an LSR. */
9919 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9924 value >>= 63 + r - s;
9929 value >>= r - (s + 1);
9932 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9934 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9937 /* Finally, these versions leave non-affected bits
9938 as is. so we need to generate the bits as per
9939 ubfm and also generate a mask to pick the
9940 bits from the original and computed values. */
9942 /* 32 bit bitfield move, non-affected bits left as is.
9943 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9945 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9947 unsigned rn = INSTR (9, 5);
9948 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9953 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9956 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9957 We want only bits s:xxx:r at the bottom of the word
9958 so we LSL bit s up to bit 31 i.e. by 31 - s
9959 and then we LSR to bring bit 31 down to bit s - r
9960 i.e. by 31 + r - s. */
9962 value >>= 31 + r - s;
9963 /* the mask must include the same bits. */
9965 mask >>= 31 + r - s;
9969 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9970 We want only bits s:xxx:0 starting at it 31-(r-1)
9971 so we LSL bit s up to bit 31 i.e. by 31 - s
9972 and then we LSL to bring bit 31 down to 31-(r-1)+s
9973 i.e. by r - (s + 1). */
9975 value >>= r - (s + 1);
9976 /* The mask must include the same bits. */
9978 mask >>= r - (s + 1);
9982 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9987 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9989 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9992 /* 64 bit bitfield move, non-affected bits left as is.
9993 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9995 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9998 unsigned rn = INSTR (9, 5);
9999 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10000 uint64_t mask = 0xffffffffffffffffULL;
10004 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10005 We want only bits s:xxx:r at the bottom of the word
10006 so we LSL bit s up to bit 63 i.e. by 63 - s
10007 and then we LSR to bring bit 63 down to bit s - r
10008 i.e. by 63 + r - s. */
10010 value >>= 63 + r - s;
10011 /* The mask must include the same bits. */
10013 mask >>= 63 + r - s;
10017 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10018 We want only bits s:xxx:0 starting at it 63-(r-1)
10019 so we LSL bit s up to bit 63 i.e. by 63 - s
10020 and then we LSL to bring bit 63 down to 63-(r-1)+s
10021 i.e. by r - (s + 1). */
10023 value >>= r - (s + 1);
10024 /* The mask must include the same bits. */
10026 mask >>= r - (s + 1);
10029 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10031 aarch64_set_reg_u64
10032 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10036 dexBitfieldImmediate (sim_cpu *cpu)
10038 /* assert instr[28:23] = 100110
10039 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10040 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10041 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10042 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10043 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10047 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10050 uint32_t size = INSTR (31, 31);
10051 uint32_t N = INSTR (22, 22);
10052 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10053 /* or else we have an UNALLOC. */
10054 uint32_t immr = INSTR (21, 16);
10059 if (!size && uimm (immr, 5, 5))
10062 imms = INSTR (15, 10);
10063 if (!size && uimm (imms, 5, 5))
10066 /* Switch on combined size and op. */
10067 dispatch = INSTR (31, 29);
10070 case 0: sbfm32 (cpu, immr, imms); return;
10071 case 1: bfm32 (cpu, immr, imms); return;
10072 case 2: ubfm32 (cpu, immr, imms); return;
10073 case 4: sbfm (cpu, immr, imms); return;
10074 case 5: bfm (cpu, immr, imms); return;
10075 case 6: ubfm (cpu, immr, imms); return;
10076 default: HALT_UNALLOC;
10081 do_EXTR_32 (sim_cpu *cpu)
10083 /* instr[31:21] = 00010011100
10085 instr[15,10] = imms : 0xxxxx for 32 bit
10088 unsigned rm = INSTR (20, 16);
10089 unsigned imms = INSTR (15, 10) & 31;
10090 unsigned rn = INSTR ( 9, 5);
10091 unsigned rd = INSTR ( 4, 0);
10095 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10097 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10098 val2 <<= (32 - imms);
10100 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10101 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10105 do_EXTR_64 (sim_cpu *cpu)
10107 /* instr[31:21] = 10010011100
10109 instr[15,10] = imms
10112 unsigned rm = INSTR (20, 16);
10113 unsigned imms = INSTR (15, 10) & 63;
10114 unsigned rn = INSTR ( 9, 5);
10115 unsigned rd = INSTR ( 4, 0);
10118 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10120 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10122 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10126 dexExtractImmediate (sim_cpu *cpu)
10128 /* assert instr[28:23] = 100111
10129 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10130 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10131 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10132 instr[21] = op0 : must be 0 or UNALLOC
10134 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10138 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10139 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10141 uint32_t size = INSTR (31, 31);
10142 uint32_t N = INSTR (22, 22);
10143 /* 32 bit operations must have imms[5] = 0
10144 or else we have an UNALLOC. */
10145 uint32_t imms = INSTR (15, 10);
10150 if (!size && uimm (imms, 5, 5))
10153 /* Switch on combined size and op. */
10154 dispatch = INSTR (31, 29);
10159 else if (dispatch == 4)
10162 else if (dispatch == 1)
10169 dexDPImm (sim_cpu *cpu)
10171 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10172 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10173 bits [25,23] of a DPImm are the secondary dispatch vector. */
10174 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10178 case DPIMM_PCADR_000:
10179 case DPIMM_PCADR_001:
10180 dexPCRelAddressing (cpu);
10183 case DPIMM_ADDSUB_010:
10184 case DPIMM_ADDSUB_011:
10185 dexAddSubtractImmediate (cpu);
10188 case DPIMM_LOG_100:
10189 dexLogicalImmediate (cpu);
10192 case DPIMM_MOV_101:
10193 dexMoveWideImmediate (cpu);
10196 case DPIMM_BITF_110:
10197 dexBitfieldImmediate (cpu);
10200 case DPIMM_EXTR_111:
10201 dexExtractImmediate (cpu);
10205 /* Should never reach here. */
10211 dexLoadUnscaledImmediate (sim_cpu *cpu)
10213 /* instr[29,24] == 111_00
10216 instr[31,30] = size
10219 instr[20,12] = simm9
10220 instr[9,5] = rn may be SP. */
10221 /* unsigned rt = INSTR (4, 0); */
10222 uint32_t V = INSTR (26, 26);
10223 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10224 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10228 /* GReg operations. */
10231 case 0: sturb (cpu, imm); return;
10232 case 1: ldurb32 (cpu, imm); return;
10233 case 2: ldursb64 (cpu, imm); return;
10234 case 3: ldursb32 (cpu, imm); return;
10235 case 4: sturh (cpu, imm); return;
10236 case 5: ldurh32 (cpu, imm); return;
10237 case 6: ldursh64 (cpu, imm); return;
10238 case 7: ldursh32 (cpu, imm); return;
10239 case 8: stur32 (cpu, imm); return;
10240 case 9: ldur32 (cpu, imm); return;
10241 case 10: ldursw (cpu, imm); return;
10242 case 12: stur64 (cpu, imm); return;
10243 case 13: ldur64 (cpu, imm); return;
10256 /* FReg operations. */
10259 case 2: fsturq (cpu, imm); return;
10260 case 3: fldurq (cpu, imm); return;
10261 case 8: fsturs (cpu, imm); return;
10262 case 9: fldurs (cpu, imm); return;
10263 case 12: fsturd (cpu, imm); return;
10264 case 13: fldurd (cpu, imm); return;
10266 case 0: /* STUR 8 bit FP. */
10267 case 1: /* LDUR 8 bit FP. */
10268 case 4: /* STUR 16 bit FP. */
10269 case 5: /* LDUR 8 bit FP. */
10283 /* N.B. A preliminary note regarding all the ldrs<x>32
10286 The signed value loaded by these instructions is cast to unsigned
10287 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10288 64 bit element of the GReg union. this performs a 32 bit sign extension
10289 (as required) but avoids 64 bit sign extension, thus ensuring that the
10290 top half of the register word is zero. this is what the spec demands
10291 when a 32 bit load occurs. */
10293 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10295 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10297 unsigned int rn = INSTR (9, 5);
10298 unsigned int rt = INSTR (4, 0);
10300 /* The target register may not be SP but the source may be
10301 there is no scaling required for a byte load. */
10302 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10303 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10304 (int64_t) aarch64_get_mem_s8 (cpu, address));
10307 /* 32 bit load sign-extended byte scaled or unscaled zero-
10308 or sign-extended 32-bit register offset. */
10310 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10312 unsigned int rm = INSTR (20, 16);
10313 unsigned int rn = INSTR (9, 5);
10314 unsigned int rt = INSTR (4, 0);
10316 /* rn may reference SP, rm and rt must reference ZR. */
10318 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10319 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10322 /* There is no scaling required for a byte load. */
10323 aarch64_set_reg_u64
10324 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10328 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10329 pre- or post-writeback. */
10331 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10334 unsigned int rn = INSTR (9, 5);
10335 unsigned int rt = INSTR (4, 0);
10337 if (rn == rt && wb != NoWriteBack)
10340 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10345 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10346 (int64_t) aarch64_get_mem_s8 (cpu, address));
10351 if (wb != NoWriteBack)
10352 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10355 /* 8 bit store scaled. */
10357 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10359 unsigned st = INSTR (4, 0);
10360 unsigned rn = INSTR (9, 5);
10362 aarch64_set_mem_u8 (cpu,
10363 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10364 aarch64_get_vec_u8 (cpu, st, 0));
10367 /* 8 bit store scaled or unscaled zero- or
10368 sign-extended 8-bit register offset. */
10370 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10372 unsigned rm = INSTR (20, 16);
10373 unsigned rn = INSTR (9, 5);
10374 unsigned st = INSTR (4, 0);
10376 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10377 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10379 uint64_t displacement = scaling == Scaled ? extended : 0;
10382 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10385 /* 16 bit store scaled. */
10387 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10389 unsigned st = INSTR (4, 0);
10390 unsigned rn = INSTR (9, 5);
10392 aarch64_set_mem_u16
10394 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10395 aarch64_get_vec_u16 (cpu, st, 0));
10398 /* 16 bit store scaled or unscaled zero-
10399 or sign-extended 16-bit register offset. */
10401 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10403 unsigned rm = INSTR (20, 16);
10404 unsigned rn = INSTR (9, 5);
10405 unsigned st = INSTR (4, 0);
10407 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10408 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10410 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10412 aarch64_set_mem_u16
10413 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10416 /* 32 bit store scaled unsigned 12 bit. */
10418 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10420 unsigned st = INSTR (4, 0);
10421 unsigned rn = INSTR (9, 5);
10423 aarch64_set_mem_u32
10425 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10426 aarch64_get_vec_u32 (cpu, st, 0));
10429 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10431 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10433 unsigned rn = INSTR (9, 5);
10434 unsigned st = INSTR (4, 0);
10436 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10441 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10446 if (wb != NoWriteBack)
10447 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10450 /* 32 bit store scaled or unscaled zero-
10451 or sign-extended 32-bit register offset. */
10453 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10455 unsigned rm = INSTR (20, 16);
10456 unsigned rn = INSTR (9, 5);
10457 unsigned st = INSTR (4, 0);
10459 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10460 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10462 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10464 aarch64_set_mem_u32
10465 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10468 /* 64 bit store scaled unsigned 12 bit. */
10470 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10472 unsigned st = INSTR (4, 0);
10473 unsigned rn = INSTR (9, 5);
10475 aarch64_set_mem_u64
10477 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10478 aarch64_get_vec_u64 (cpu, st, 0));
10481 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10483 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10485 unsigned rn = INSTR (9, 5);
10486 unsigned st = INSTR (4, 0);
10488 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10493 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10498 if (wb != NoWriteBack)
10499 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10502 /* 64 bit store scaled or unscaled zero-
10503 or sign-extended 32-bit register offset. */
10505 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10507 unsigned rm = INSTR (20, 16);
10508 unsigned rn = INSTR (9, 5);
10509 unsigned st = INSTR (4, 0);
10511 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10512 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10514 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10516 aarch64_set_mem_u64
10517 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10520 /* 128 bit store scaled unsigned 12 bit. */
10522 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10525 unsigned st = INSTR (4, 0);
10526 unsigned rn = INSTR (9, 5);
10529 aarch64_get_FP_long_double (cpu, st, & a);
10531 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10532 aarch64_set_mem_long_double (cpu, addr, a);
10535 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10537 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10540 unsigned rn = INSTR (9, 5);
10541 unsigned st = INSTR (4, 0);
10542 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10547 aarch64_get_FP_long_double (cpu, st, & a);
10548 aarch64_set_mem_long_double (cpu, address, a);
10553 if (wb != NoWriteBack)
10554 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10557 /* 128 bit store scaled or unscaled zero-
10558 or sign-extended 32-bit register offset. */
10560 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10562 unsigned rm = INSTR (20, 16);
10563 unsigned rn = INSTR (9, 5);
10564 unsigned st = INSTR (4, 0);
10566 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10567 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10569 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10573 aarch64_get_FP_long_double (cpu, st, & a);
10574 aarch64_set_mem_long_double (cpu, address + displacement, a);
10578 dexLoadImmediatePrePost (sim_cpu *cpu)
10580 /* instr[31,30] = size
10586 instr[20,12] = simm9
10587 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10589 instr[9,5] = Rn may be SP.
10592 uint32_t V = INSTR (26, 26);
10593 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10594 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10595 WriteBack wb = INSTR (11, 11);
10599 /* GReg operations. */
10602 case 0: strb_wb (cpu, imm, wb); return;
10603 case 1: ldrb32_wb (cpu, imm, wb); return;
10604 case 2: ldrsb_wb (cpu, imm, wb); return;
10605 case 3: ldrsb32_wb (cpu, imm, wb); return;
10606 case 4: strh_wb (cpu, imm, wb); return;
10607 case 5: ldrh32_wb (cpu, imm, wb); return;
10608 case 6: ldrsh64_wb (cpu, imm, wb); return;
10609 case 7: ldrsh32_wb (cpu, imm, wb); return;
10610 case 8: str32_wb (cpu, imm, wb); return;
10611 case 9: ldr32_wb (cpu, imm, wb); return;
10612 case 10: ldrsw_wb (cpu, imm, wb); return;
10613 case 12: str_wb (cpu, imm, wb); return;
10614 case 13: ldr_wb (cpu, imm, wb); return;
10624 /* FReg operations. */
10627 case 2: fstrq_wb (cpu, imm, wb); return;
10628 case 3: fldrq_wb (cpu, imm, wb); return;
10629 case 8: fstrs_wb (cpu, imm, wb); return;
10630 case 9: fldrs_wb (cpu, imm, wb); return;
10631 case 12: fstrd_wb (cpu, imm, wb); return;
10632 case 13: fldrd_wb (cpu, imm, wb); return;
10634 case 0: /* STUR 8 bit FP. */
10635 case 1: /* LDUR 8 bit FP. */
10636 case 4: /* STUR 16 bit FP. */
10637 case 5: /* LDUR 8 bit FP. */
10652 dexLoadRegisterOffset (sim_cpu *cpu)
10654 /* instr[31,30] = size
10661 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10662 110 ==> SXTW, 111 ==> SXTX,
10667 instr[4,0] = rt. */
10669 uint32_t V = INSTR (26, 26);
10670 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10671 Scaling scale = INSTR (12, 12);
10672 Extension extensionType = INSTR (15, 13);
10674 /* Check for illegal extension types. */
10675 if (uimm (extensionType, 1, 1) == 0)
10678 if (extensionType == UXTX || extensionType == SXTX)
10679 extensionType = NoExtension;
10683 /* GReg operations. */
10686 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10687 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10688 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10689 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10690 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10691 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10692 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10693 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10694 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10695 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10696 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10697 case 12: str_scale_ext (cpu, scale, extensionType); return;
10698 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10699 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10708 /* FReg operations. */
10711 case 1: /* LDUR 8 bit FP. */
10713 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10714 case 5: /* LDUR 8 bit FP. */
10716 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10717 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10719 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10720 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10721 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10722 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10723 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10737 dexLoadUnsignedImmediate (sim_cpu *cpu)
10739 /* instr[29,24] == 111_01
10740 instr[31,30] = size
10743 instr[21,10] = uimm12 : unsigned immediate offset
10744 instr[9,5] = rn may be SP.
10745 instr[4,0] = rt. */
10747 uint32_t V = INSTR (26,26);
10748 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10749 uint32_t imm = INSTR (21, 10);
10753 /* GReg operations. */
10756 case 0: strb_abs (cpu, imm); return;
10757 case 1: ldrb32_abs (cpu, imm); return;
10758 case 2: ldrsb_abs (cpu, imm); return;
10759 case 3: ldrsb32_abs (cpu, imm); return;
10760 case 4: strh_abs (cpu, imm); return;
10761 case 5: ldrh32_abs (cpu, imm); return;
10762 case 6: ldrsh_abs (cpu, imm); return;
10763 case 7: ldrsh32_abs (cpu, imm); return;
10764 case 8: str32_abs (cpu, imm); return;
10765 case 9: ldr32_abs (cpu, imm); return;
10766 case 10: ldrsw_abs (cpu, imm); return;
10767 case 12: str_abs (cpu, imm); return;
10768 case 13: ldr_abs (cpu, imm); return;
10769 case 14: prfm_abs (cpu, imm); return;
10778 /* FReg operations. */
10781 case 0: fstrb_abs (cpu, imm); return;
10782 case 4: fstrh_abs (cpu, imm); return;
10783 case 8: fstrs_abs (cpu, imm); return;
10784 case 12: fstrd_abs (cpu, imm); return;
10785 case 2: fstrq_abs (cpu, imm); return;
10787 case 1: fldrb_abs (cpu, imm); return;
10788 case 5: fldrh_abs (cpu, imm); return;
10789 case 9: fldrs_abs (cpu, imm); return;
10790 case 13: fldrd_abs (cpu, imm); return;
10791 case 3: fldrq_abs (cpu, imm); return;
10805 dexLoadExclusive (sim_cpu *cpu)
10807 /* assert instr[29:24] = 001000;
10808 instr[31,30] = size
10809 instr[23] = 0 if exclusive
10810 instr[22] = L : 1 if load, 0 if store
10811 instr[21] = 1 if pair
10813 instr[15] = o0 : 1 if ordered
10816 instr[4.0] = Rt. */
10818 switch (INSTR (22, 21))
10820 case 2: ldxr (cpu); return;
10821 case 0: stxr (cpu); return;
10827 dexLoadOther (sim_cpu *cpu)
10831 /* instr[29,25] = 111_0
10832 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10833 instr[21:11,10] is the secondary dispatch. */
10834 if (INSTR (24, 24))
10836 dexLoadUnsignedImmediate (cpu);
10840 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10843 case 0: dexLoadUnscaledImmediate (cpu); return;
10844 case 1: dexLoadImmediatePrePost (cpu); return;
10845 case 3: dexLoadImmediatePrePost (cpu); return;
10846 case 6: dexLoadRegisterOffset (cpu); return;
10858 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10860 unsigned rn = INSTR (14, 10);
10861 unsigned rd = INSTR (9, 5);
10862 unsigned rm = INSTR (4, 0);
10863 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10865 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10866 HALT_UNALLOC; /* ??? */
10873 aarch64_set_mem_u32 (cpu, address,
10874 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10875 aarch64_set_mem_u32 (cpu, address + 4,
10876 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10881 if (wb != NoWriteBack)
10882 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10886 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10888 unsigned rn = INSTR (14, 10);
10889 unsigned rd = INSTR (9, 5);
10890 unsigned rm = INSTR (4, 0);
10891 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10893 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10894 HALT_UNALLOC; /* ??? */
10901 aarch64_set_mem_u64 (cpu, address,
10902 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10903 aarch64_set_mem_u64 (cpu, address + 8,
10904 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10909 if (wb != NoWriteBack)
10910 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10914 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10916 unsigned rn = INSTR (14, 10);
10917 unsigned rd = INSTR (9, 5);
10918 unsigned rm = INSTR (4, 0);
10919 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10921 /* Treat this as unalloc to make sure we don't do it. */
10930 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10931 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10936 if (wb != NoWriteBack)
10937 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10941 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10943 unsigned rn = INSTR (14, 10);
10944 unsigned rd = INSTR (9, 5);
10945 unsigned rm = INSTR (4, 0);
10946 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10948 /* Treat this as unalloc to make sure we don't do it. */
10957 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10958 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10963 if (wb != NoWriteBack)
10964 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10968 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10970 unsigned rn = INSTR (14, 10);
10971 unsigned rd = INSTR (9, 5);
10972 unsigned rm = INSTR (4, 0);
10973 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10975 /* Treat this as unalloc to make sure we don't do it. */
10984 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10985 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10990 if (wb != NoWriteBack)
10991 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10995 dex_load_store_pair_gr (sim_cpu *cpu)
10997 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10998 instr[29,25] = instruction encoding: 101_0
10999 instr[26] = V : 1 if fp 0 if gp
11000 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11001 instr[22] = load/store (1=> load)
11002 instr[21,15] = signed, scaled, offset
11005 instr[ 4, 0] = Rm. */
11007 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11008 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11012 case 2: store_pair_u32 (cpu, offset, Post); return;
11013 case 3: load_pair_u32 (cpu, offset, Post); return;
11014 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11015 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11016 case 6: store_pair_u32 (cpu, offset, Pre); return;
11017 case 7: load_pair_u32 (cpu, offset, Pre); return;
11019 case 11: load_pair_s32 (cpu, offset, Post); return;
11020 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11021 case 15: load_pair_s32 (cpu, offset, Pre); return;
11023 case 18: store_pair_u64 (cpu, offset, Post); return;
11024 case 19: load_pair_u64 (cpu, offset, Post); return;
11025 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11026 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11027 case 22: store_pair_u64 (cpu, offset, Pre); return;
11028 case 23: load_pair_u64 (cpu, offset, Pre); return;
11036 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11038 unsigned rn = INSTR (14, 10);
11039 unsigned rd = INSTR (9, 5);
11040 unsigned rm = INSTR (4, 0);
11041 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11048 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11049 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11054 if (wb != NoWriteBack)
11055 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11059 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11061 unsigned rn = INSTR (14, 10);
11062 unsigned rd = INSTR (9, 5);
11063 unsigned rm = INSTR (4, 0);
11064 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11071 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11072 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11077 if (wb != NoWriteBack)
11078 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11082 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11085 unsigned rn = INSTR (14, 10);
11086 unsigned rd = INSTR (9, 5);
11087 unsigned rm = INSTR (4, 0);
11088 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11095 aarch64_get_FP_long_double (cpu, rm, & a);
11096 aarch64_set_mem_long_double (cpu, address, a);
11097 aarch64_get_FP_long_double (cpu, rn, & a);
11098 aarch64_set_mem_long_double (cpu, address + 16, a);
11103 if (wb != NoWriteBack)
11104 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11108 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11110 unsigned rn = INSTR (14, 10);
11111 unsigned rd = INSTR (9, 5);
11112 unsigned rm = INSTR (4, 0);
11113 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11123 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11124 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11129 if (wb != NoWriteBack)
11130 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11134 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11136 unsigned rn = INSTR (14, 10);
11137 unsigned rd = INSTR (9, 5);
11138 unsigned rm = INSTR (4, 0);
11139 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11149 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11150 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11155 if (wb != NoWriteBack)
11156 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11160 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11163 unsigned rn = INSTR (14, 10);
11164 unsigned rd = INSTR (9, 5);
11165 unsigned rm = INSTR (4, 0);
11166 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11176 aarch64_get_mem_long_double (cpu, address, & a);
11177 aarch64_set_FP_long_double (cpu, rm, a);
11178 aarch64_get_mem_long_double (cpu, address + 16, & a);
11179 aarch64_set_FP_long_double (cpu, rn, a);
11184 if (wb != NoWriteBack)
11185 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11189 dex_load_store_pair_fp (sim_cpu *cpu)
11191 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11192 instr[29,25] = instruction encoding
11193 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11194 instr[22] = load/store (1=> load)
11195 instr[21,15] = signed, scaled, offset
11198 instr[ 4, 0] = Rm */
11200 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11201 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11205 case 2: store_pair_float (cpu, offset, Post); return;
11206 case 3: load_pair_float (cpu, offset, Post); return;
11207 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11208 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11209 case 6: store_pair_float (cpu, offset, Pre); return;
11210 case 7: load_pair_float (cpu, offset, Pre); return;
11212 case 10: store_pair_double (cpu, offset, Post); return;
11213 case 11: load_pair_double (cpu, offset, Post); return;
11214 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11215 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11216 case 14: store_pair_double (cpu, offset, Pre); return;
11217 case 15: load_pair_double (cpu, offset, Pre); return;
11219 case 18: store_pair_long_double (cpu, offset, Post); return;
11220 case 19: load_pair_long_double (cpu, offset, Post); return;
11221 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11222 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11223 case 22: store_pair_long_double (cpu, offset, Pre); return;
11224 case 23: load_pair_long_double (cpu, offset, Pre); return;
11231 static inline unsigned
11232 vec_reg (unsigned v, unsigned o)
11234 return (v + o) & 0x3F;
11237 /* Load multiple N-element structures to N consecutive registers. */
11239 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11241 int all = INSTR (30, 30);
11242 unsigned size = INSTR (11, 10);
11243 unsigned vd = INSTR (4, 0);
11248 case 0: /* 8-bit operations. */
11250 for (i = 0; i < (16 * N); i++)
11251 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11252 aarch64_get_mem_u8 (cpu, address + i));
11254 for (i = 0; i < (8 * N); i++)
11255 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11256 aarch64_get_mem_u8 (cpu, address + i));
11259 case 1: /* 16-bit operations. */
11261 for (i = 0; i < (8 * N); i++)
11262 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11263 aarch64_get_mem_u16 (cpu, address + i * 2));
11265 for (i = 0; i < (4 * N); i++)
11266 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11267 aarch64_get_mem_u16 (cpu, address + i * 2));
11270 case 2: /* 32-bit operations. */
11272 for (i = 0; i < (4 * N); i++)
11273 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11274 aarch64_get_mem_u32 (cpu, address + i * 4));
11276 for (i = 0; i < (2 * N); i++)
11277 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11278 aarch64_get_mem_u32 (cpu, address + i * 4));
11281 case 3: /* 64-bit operations. */
11283 for (i = 0; i < (2 * N); i++)
11284 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11285 aarch64_get_mem_u64 (cpu, address + i * 8));
11287 for (i = 0; i < N; i++)
11288 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11289 aarch64_get_mem_u64 (cpu, address + i * 8));
11294 /* LD4: load multiple 4-element to four consecutive registers. */
11296 LD4 (sim_cpu *cpu, uint64_t address)
11298 vec_load (cpu, address, 4);
11301 /* LD3: load multiple 3-element structures to three consecutive registers. */
11303 LD3 (sim_cpu *cpu, uint64_t address)
11305 vec_load (cpu, address, 3);
11308 /* LD2: load multiple 2-element structures to two consecutive registers. */
11310 LD2 (sim_cpu *cpu, uint64_t address)
11312 vec_load (cpu, address, 2);
11315 /* Load multiple 1-element structures into one register. */
11317 LD1_1 (sim_cpu *cpu, uint64_t address)
11319 int all = INSTR (30, 30);
11320 unsigned size = INSTR (11, 10);
11321 unsigned vd = INSTR (4, 0);
11327 /* LD1 {Vd.16b}, addr, #16 */
11328 /* LD1 {Vd.8b}, addr, #8 */
11329 for (i = 0; i < (all ? 16 : 8); i++)
11330 aarch64_set_vec_u8 (cpu, vd, i,
11331 aarch64_get_mem_u8 (cpu, address + i));
11335 /* LD1 {Vd.8h}, addr, #16 */
11336 /* LD1 {Vd.4h}, addr, #8 */
11337 for (i = 0; i < (all ? 8 : 4); i++)
11338 aarch64_set_vec_u16 (cpu, vd, i,
11339 aarch64_get_mem_u16 (cpu, address + i * 2));
11343 /* LD1 {Vd.4s}, addr, #16 */
11344 /* LD1 {Vd.2s}, addr, #8 */
11345 for (i = 0; i < (all ? 4 : 2); i++)
11346 aarch64_set_vec_u32 (cpu, vd, i,
11347 aarch64_get_mem_u32 (cpu, address + i * 4));
11351 /* LD1 {Vd.2d}, addr, #16 */
11352 /* LD1 {Vd.1d}, addr, #8 */
11353 for (i = 0; i < (all ? 2 : 1); i++)
11354 aarch64_set_vec_u64 (cpu, vd, i,
11355 aarch64_get_mem_u64 (cpu, address + i * 8));
11360 /* Load multiple 1-element structures into two registers. */
11362 LD1_2 (sim_cpu *cpu, uint64_t address)
11364 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11365 So why have two different instructions ? There must be something
11366 wrong somewhere. */
11367 vec_load (cpu, address, 2);
11370 /* Load multiple 1-element structures into three registers. */
11372 LD1_3 (sim_cpu *cpu, uint64_t address)
11374 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11375 So why have two different instructions ? There must be something
11376 wrong somewhere. */
11377 vec_load (cpu, address, 3);
11380 /* Load multiple 1-element structures into four registers. */
11382 LD1_4 (sim_cpu *cpu, uint64_t address)
11384 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11385 So why have two different instructions ? There must be something
11386 wrong somewhere. */
11387 vec_load (cpu, address, 4);
11390 /* Store multiple N-element structures to N consecutive registers. */
11392 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11394 int all = INSTR (30, 30);
11395 unsigned size = INSTR (11, 10);
11396 unsigned vd = INSTR (4, 0);
11401 case 0: /* 8-bit operations. */
11403 for (i = 0; i < (16 * N); i++)
11406 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11408 for (i = 0; i < (8 * N); i++)
11411 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11414 case 1: /* 16-bit operations. */
11416 for (i = 0; i < (8 * N); i++)
11417 aarch64_set_mem_u16
11418 (cpu, address + i * 2,
11419 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11421 for (i = 0; i < (4 * N); i++)
11422 aarch64_set_mem_u16
11423 (cpu, address + i * 2,
11424 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11427 case 2: /* 32-bit operations. */
11429 for (i = 0; i < (4 * N); i++)
11430 aarch64_set_mem_u32
11431 (cpu, address + i * 4,
11432 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11434 for (i = 0; i < (2 * N); i++)
11435 aarch64_set_mem_u32
11436 (cpu, address + i * 4,
11437 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11440 case 3: /* 64-bit operations. */
11442 for (i = 0; i < (2 * N); i++)
11443 aarch64_set_mem_u64
11444 (cpu, address + i * 8,
11445 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11447 for (i = 0; i < N; i++)
11448 aarch64_set_mem_u64
11449 (cpu, address + i * 8,
11450 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11455 /* Store multiple 4-element structure to four consecutive registers. */
11457 ST4 (sim_cpu *cpu, uint64_t address)
11459 vec_store (cpu, address, 4);
11462 /* Store multiple 3-element structures to three consecutive registers. */
11464 ST3 (sim_cpu *cpu, uint64_t address)
11466 vec_store (cpu, address, 3);
11469 /* Store multiple 2-element structures to two consecutive registers. */
11471 ST2 (sim_cpu *cpu, uint64_t address)
11473 vec_store (cpu, address, 2);
11476 /* Store multiple 1-element structures into one register. */
11478 ST1_1 (sim_cpu *cpu, uint64_t address)
11480 int all = INSTR (30, 30);
11481 unsigned size = INSTR (11, 10);
11482 unsigned vd = INSTR (4, 0);
11488 for (i = 0; i < (all ? 16 : 8); i++)
11489 aarch64_set_mem_u8 (cpu, address + i,
11490 aarch64_get_vec_u8 (cpu, vd, i));
11494 for (i = 0; i < (all ? 8 : 4); i++)
11495 aarch64_set_mem_u16 (cpu, address + i * 2,
11496 aarch64_get_vec_u16 (cpu, vd, i));
11500 for (i = 0; i < (all ? 4 : 2); i++)
11501 aarch64_set_mem_u32 (cpu, address + i * 4,
11502 aarch64_get_vec_u32 (cpu, vd, i));
11506 for (i = 0; i < (all ? 2 : 1); i++)
11507 aarch64_set_mem_u64 (cpu, address + i * 8,
11508 aarch64_get_vec_u64 (cpu, vd, i));
11513 /* Store multiple 1-element structures into two registers. */
11515 ST1_2 (sim_cpu *cpu, uint64_t address)
11517 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11518 So why have two different instructions ? There must be
11519 something wrong somewhere. */
11520 vec_store (cpu, address, 2);
11523 /* Store multiple 1-element structures into three registers. */
11525 ST1_3 (sim_cpu *cpu, uint64_t address)
11527 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11528 So why have two different instructions ? There must be
11529 something wrong somewhere. */
11530 vec_store (cpu, address, 3);
11533 /* Store multiple 1-element structures into four registers. */
11535 ST1_4 (sim_cpu *cpu, uint64_t address)
11537 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11538 So why have two different instructions ? There must be
11539 something wrong somewhere. */
11540 vec_store (cpu, address, 4);
11544 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11547 instr[30] = element selector 0=>half, 1=>all elements
11548 instr[29,24] = 00 1101
11549 instr[23] = 0=>simple, 1=>post
11551 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11552 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11553 11111 (immediate post inc)
11555 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11557 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11558 10=> word(s), 11=> double(d)
11559 instr[9,5] = address
11562 unsigned full = INSTR (30, 30);
11563 unsigned vd = INSTR (4, 0);
11564 unsigned size = INSTR (11, 10);
11567 NYI_assert (29, 24, 0x0D);
11568 NYI_assert (22, 22, 1);
11569 NYI_assert (15, 14, 3);
11570 NYI_assert (12, 12, 0);
11572 switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11574 case 0: /* LD1R. */
11579 uint8_t val = aarch64_get_mem_u8 (cpu, address);
11580 for (i = 0; i < (full ? 16 : 8); i++)
11581 aarch64_set_vec_u8 (cpu, vd, i, val);
11587 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11588 for (i = 0; i < (full ? 8 : 4); i++)
11589 aarch64_set_vec_u16 (cpu, vd, i, val);
11595 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11596 for (i = 0; i < (full ? 4 : 2); i++)
11597 aarch64_set_vec_u32 (cpu, vd, i, val);
11603 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11604 for (i = 0; i < (full ? 2 : 1); i++)
11605 aarch64_set_vec_u64 (cpu, vd, i, val);
11614 case 1: /* LD2R. */
11619 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11620 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11622 for (i = 0; i < (full ? 16 : 8); i++)
11624 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11625 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11632 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11633 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11635 for (i = 0; i < (full ? 8 : 4); i++)
11637 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11638 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11645 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11646 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11648 for (i = 0; i < (full ? 4 : 2); i++)
11650 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11651 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11658 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11659 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11661 for (i = 0; i < (full ? 2 : 1); i++)
11663 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11664 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11674 case 2: /* LD3R. */
11679 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11680 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11681 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11683 for (i = 0; i < (full ? 16 : 8); i++)
11685 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11686 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11687 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11694 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11695 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11696 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11698 for (i = 0; i < (full ? 8 : 4); i++)
11700 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11701 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11702 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11709 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11710 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11711 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11713 for (i = 0; i < (full ? 4 : 2); i++)
11715 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11716 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11717 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11724 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11725 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11726 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11728 for (i = 0; i < (full ? 2 : 1); i++)
11730 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11731 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11732 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11742 case 3: /* LD4R. */
11747 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11748 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11749 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11750 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11752 for (i = 0; i < (full ? 16 : 8); i++)
11754 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11755 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11756 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11757 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11764 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11765 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11766 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11767 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11769 for (i = 0; i < (full ? 8 : 4); i++)
11771 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11772 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11773 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11774 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11781 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11782 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11783 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11784 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11786 for (i = 0; i < (full ? 4 : 2); i++)
11788 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11789 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11790 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11791 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11798 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11799 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11800 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11801 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11803 for (i = 0; i < (full ? 2 : 1); i++)
11805 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11806 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11807 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11808 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11824 do_vec_load_store (sim_cpu *cpu)
11826 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11829 instr[30] = element selector 0=>half, 1=>all elements
11830 instr[29,25] = 00110
11832 instr[23] = 0=>simple, 1=>post
11833 instr[22] = 0=>store, 1=>load
11834 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11835 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11836 11111 (immediate post inc)
11837 instr[15,12] = elements and destinations. eg for load:
11838 0000=>LD4 => load multiple 4-element to
11839 four consecutive registers
11840 0100=>LD3 => load multiple 3-element to
11841 three consecutive registers
11842 1000=>LD2 => load multiple 2-element to
11843 two consecutive registers
11844 0010=>LD1 => load multiple 1-element to
11845 four consecutive registers
11846 0110=>LD1 => load multiple 1-element to
11847 three consecutive registers
11848 1010=>LD1 => load multiple 1-element to
11849 two consecutive registers
11850 0111=>LD1 => load multiple 1-element to
11854 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11855 10=> word(s), 11=> double(d)
11856 instr[9,5] = Vn, can be SP
11865 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11868 type = INSTR (15, 12);
11869 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11872 post = INSTR (23, 23);
11873 load = INSTR (22, 22);
11875 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11879 unsigned vm = INSTR (20, 16);
11883 unsigned sizeof_operation;
11887 case 0: sizeof_operation = 32; break;
11888 case 4: sizeof_operation = 24; break;
11889 case 8: sizeof_operation = 16; break;
11892 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11893 sizeof_operation <<= INSTR (11, 10);
11897 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11898 sizeof_operation <<= INSTR (11, 10);
11902 /* One register, immediate offset variant. */
11903 sizeof_operation = 8;
11907 /* Two registers, immediate offset variant. */
11908 sizeof_operation = 16;
11912 /* Three registers, immediate offset variant. */
11913 sizeof_operation = 24;
11917 /* Four registers, immediate offset variant. */
11918 sizeof_operation = 32;
11925 if (INSTR (30, 30))
11926 sizeof_operation *= 2;
11928 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11931 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11932 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11936 NYI_assert (20, 16, 0);
11943 case 0: LD4 (cpu, address); return;
11944 case 4: LD3 (cpu, address); return;
11945 case 8: LD2 (cpu, address); return;
11946 case 2: LD1_4 (cpu, address); return;
11947 case 6: LD1_3 (cpu, address); return;
11948 case 10: LD1_2 (cpu, address); return;
11949 case 7: LD1_1 (cpu, address); return;
11952 case 0xC: do_vec_LDnR (cpu, address); return;
11962 case 0: ST4 (cpu, address); return;
11963 case 4: ST3 (cpu, address); return;
11964 case 8: ST2 (cpu, address); return;
11965 case 2: ST1_4 (cpu, address); return;
11966 case 6: ST1_3 (cpu, address); return;
11967 case 10: ST1_2 (cpu, address); return;
11968 case 7: ST1_1 (cpu, address); return;
11975 dexLdSt (sim_cpu *cpu)
11977 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11978 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11979 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11980 bits [29,28:26] of a LS are the secondary dispatch vector. */
11981 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11986 dexLoadExclusive (cpu); return;
11990 dexLoadLiteral (cpu); return;
11994 dexLoadOther (cpu); return;
11996 case LS_ADVSIMD_001:
11997 do_vec_load_store (cpu); return;
12000 dex_load_store_pair_gr (cpu); return;
12003 dex_load_store_pair_fp (cpu); return;
12006 /* Should never reach here. */
12011 /* Specific decode and execute for group Data Processing Register. */
12014 dexLogicalShiftedRegister (sim_cpu *cpu)
12016 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12018 instr[28:24] = 01010
12019 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12022 instr[15,10] = count : must be 0xxxxx for 32 bit
12026 uint32_t size = INSTR (31, 31);
12027 Shift shiftType = INSTR (23, 22);
12028 uint32_t count = INSTR (15, 10);
12030 /* 32 bit operations must have count[5] = 0.
12031 or else we have an UNALLOC. */
12032 if (size == 0 && uimm (count, 5, 5))
12035 /* Dispatch on size:op:N. */
12036 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12038 case 0: and32_shift (cpu, shiftType, count); return;
12039 case 1: bic32_shift (cpu, shiftType, count); return;
12040 case 2: orr32_shift (cpu, shiftType, count); return;
12041 case 3: orn32_shift (cpu, shiftType, count); return;
12042 case 4: eor32_shift (cpu, shiftType, count); return;
12043 case 5: eon32_shift (cpu, shiftType, count); return;
12044 case 6: ands32_shift (cpu, shiftType, count); return;
12045 case 7: bics32_shift (cpu, shiftType, count); return;
12046 case 8: and64_shift (cpu, shiftType, count); return;
12047 case 9: bic64_shift (cpu, shiftType, count); return;
12048 case 10:orr64_shift (cpu, shiftType, count); return;
12049 case 11:orn64_shift (cpu, shiftType, count); return;
12050 case 12:eor64_shift (cpu, shiftType, count); return;
12051 case 13:eon64_shift (cpu, shiftType, count); return;
12052 case 14:ands64_shift (cpu, shiftType, count); return;
12053 case 15:bics64_shift (cpu, shiftType, count); return;
12057 /* 32 bit conditional select. */
12059 csel32 (sim_cpu *cpu, CondCode cc)
12061 unsigned rm = INSTR (20, 16);
12062 unsigned rn = INSTR (9, 5);
12063 unsigned rd = INSTR (4, 0);
12065 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12066 testConditionCode (cpu, cc)
12067 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12068 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12071 /* 64 bit conditional select. */
12073 csel64 (sim_cpu *cpu, CondCode cc)
12075 unsigned rm = INSTR (20, 16);
12076 unsigned rn = INSTR (9, 5);
12077 unsigned rd = INSTR (4, 0);
12079 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12080 testConditionCode (cpu, cc)
12081 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12082 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12085 /* 32 bit conditional increment. */
12087 csinc32 (sim_cpu *cpu, CondCode cc)
12089 unsigned rm = INSTR (20, 16);
12090 unsigned rn = INSTR (9, 5);
12091 unsigned rd = INSTR (4, 0);
12093 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12094 testConditionCode (cpu, cc)
12095 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12096 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12099 /* 64 bit conditional increment. */
12101 csinc64 (sim_cpu *cpu, CondCode cc)
12103 unsigned rm = INSTR (20, 16);
12104 unsigned rn = INSTR (9, 5);
12105 unsigned rd = INSTR (4, 0);
12107 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12108 testConditionCode (cpu, cc)
12109 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12110 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12113 /* 32 bit conditional invert. */
12115 csinv32 (sim_cpu *cpu, CondCode cc)
12117 unsigned rm = INSTR (20, 16);
12118 unsigned rn = INSTR (9, 5);
12119 unsigned rd = INSTR (4, 0);
12121 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12122 testConditionCode (cpu, cc)
12123 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12124 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12127 /* 64 bit conditional invert. */
12129 csinv64 (sim_cpu *cpu, CondCode cc)
12131 unsigned rm = INSTR (20, 16);
12132 unsigned rn = INSTR (9, 5);
12133 unsigned rd = INSTR (4, 0);
12135 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12136 testConditionCode (cpu, cc)
12137 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12138 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12141 /* 32 bit conditional negate. */
12143 csneg32 (sim_cpu *cpu, CondCode cc)
12145 unsigned rm = INSTR (20, 16);
12146 unsigned rn = INSTR (9, 5);
12147 unsigned rd = INSTR (4, 0);
12149 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12150 testConditionCode (cpu, cc)
12151 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12152 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12155 /* 64 bit conditional negate. */
12157 csneg64 (sim_cpu *cpu, CondCode cc)
12159 unsigned rm = INSTR (20, 16);
12160 unsigned rn = INSTR (9, 5);
12161 unsigned rd = INSTR (4, 0);
12163 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12164 testConditionCode (cpu, cc)
12165 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12166 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12170 dexCondSelect (sim_cpu *cpu)
12172 /* instr[28,21] = 11011011
12173 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12174 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12175 100 ==> CSINV, 101 ==> CSNEG,
12177 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12178 instr[15,12] = cond
12179 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12181 CondCode cc = INSTR (15, 12);
12182 uint32_t S = INSTR (29, 29);
12183 uint32_t op2 = INSTR (11, 10);
12191 switch ((INSTR (31, 30) << 1) | op2)
12193 case 0: csel32 (cpu, cc); return;
12194 case 1: csinc32 (cpu, cc); return;
12195 case 2: csinv32 (cpu, cc); return;
12196 case 3: csneg32 (cpu, cc); return;
12197 case 4: csel64 (cpu, cc); return;
12198 case 5: csinc64 (cpu, cc); return;
12199 case 6: csinv64 (cpu, cc); return;
12200 case 7: csneg64 (cpu, cc); return;
12204 /* Some helpers for counting leading 1 or 0 bits. */
12206 /* Counts the number of leading bits which are the same
12207 in a 32 bit value in the range 1 to 32. */
12209 leading32 (uint32_t value)
12211 int32_t mask= 0xffff0000;
12212 uint32_t count= 16; /* Counts number of bits set in mask. */
12213 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12214 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12216 while (lo + 1 < hi)
12218 int32_t test = (value & mask);
12220 if (test == 0 || test == mask)
12223 count = (lo + hi) / 2;
12224 mask >>= (count - lo);
12229 count = (lo + hi) / 2;
12230 mask <<= hi - count;
12239 test = (value & mask);
12241 if (test == 0 || test == mask)
12250 /* Counts the number of leading bits which are the same
12251 in a 64 bit value in the range 1 to 64. */
12253 leading64 (uint64_t value)
12255 int64_t mask= 0xffffffff00000000LL;
12256 uint64_t count = 32; /* Counts number of bits set in mask. */
12257 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12258 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12260 while (lo + 1 < hi)
12262 int64_t test = (value & mask);
12264 if (test == 0 || test == mask)
12267 count = (lo + hi) / 2;
12268 mask >>= (count - lo);
12273 count = (lo + hi) / 2;
12274 mask <<= hi - count;
12283 test = (value & mask);
12285 if (test == 0 || test == mask)
12294 /* Bit operations. */
12295 /* N.B register args may not be SP. */
12297 /* 32 bit count leading sign bits. */
12299 cls32 (sim_cpu *cpu)
12301 unsigned rn = INSTR (9, 5);
12302 unsigned rd = INSTR (4, 0);
12304 /* N.B. the result needs to exclude the leading bit. */
12305 aarch64_set_reg_u64
12306 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12309 /* 64 bit count leading sign bits. */
12311 cls64 (sim_cpu *cpu)
12313 unsigned rn = INSTR (9, 5);
12314 unsigned rd = INSTR (4, 0);
12316 /* N.B. the result needs to exclude the leading bit. */
12317 aarch64_set_reg_u64
12318 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12321 /* 32 bit count leading zero bits. */
12323 clz32 (sim_cpu *cpu)
12325 unsigned rn = INSTR (9, 5);
12326 unsigned rd = INSTR (4, 0);
12327 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12329 /* if the sign (top) bit is set then the count is 0. */
12330 if (pick32 (value, 31, 31))
12331 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12333 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12336 /* 64 bit count leading zero bits. */
12338 clz64 (sim_cpu *cpu)
12340 unsigned rn = INSTR (9, 5);
12341 unsigned rd = INSTR (4, 0);
12342 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12344 /* if the sign (top) bit is set then the count is 0. */
12345 if (pick64 (value, 63, 63))
12346 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12348 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12351 /* 32 bit reverse bits. */
12353 rbit32 (sim_cpu *cpu)
12355 unsigned rn = INSTR (9, 5);
12356 unsigned rd = INSTR (4, 0);
12357 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12358 uint32_t result = 0;
12361 for (i = 0; i < 32; i++)
12364 result |= (value & 1);
12367 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12370 /* 64 bit reverse bits. */
12372 rbit64 (sim_cpu *cpu)
12374 unsigned rn = INSTR (9, 5);
12375 unsigned rd = INSTR (4, 0);
12376 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12377 uint64_t result = 0;
12380 for (i = 0; i < 64; i++)
12383 result |= (value & 1UL);
12386 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12389 /* 32 bit reverse bytes. */
12391 rev32 (sim_cpu *cpu)
12393 unsigned rn = INSTR (9, 5);
12394 unsigned rd = INSTR (4, 0);
12395 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12396 uint32_t result = 0;
12399 for (i = 0; i < 4; i++)
12402 result |= (value & 0xff);
12405 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12408 /* 64 bit reverse bytes. */
12410 rev64 (sim_cpu *cpu)
12412 unsigned rn = INSTR (9, 5);
12413 unsigned rd = INSTR (4, 0);
12414 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12415 uint64_t result = 0;
12418 for (i = 0; i < 8; i++)
12421 result |= (value & 0xffULL);
12424 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12427 /* 32 bit reverse shorts. */
12428 /* N.B.this reverses the order of the bytes in each half word. */
12430 revh32 (sim_cpu *cpu)
12432 unsigned rn = INSTR (9, 5);
12433 unsigned rd = INSTR (4, 0);
12434 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12435 uint32_t result = 0;
12438 for (i = 0; i < 2; i++)
12441 result |= (value & 0x00ff00ff);
12444 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12447 /* 64 bit reverse shorts. */
12448 /* N.B.this reverses the order of the bytes in each half word. */
12450 revh64 (sim_cpu *cpu)
12452 unsigned rn = INSTR (9, 5);
12453 unsigned rd = INSTR (4, 0);
12454 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12455 uint64_t result = 0;
12458 for (i = 0; i < 2; i++)
12461 result |= (value & 0x00ff00ff00ff00ffULL);
12464 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12468 dexDataProc1Source (sim_cpu *cpu)
12471 instr[28,21] = 111010110
12472 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12473 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12474 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12475 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12476 000010 ==> REV, 000011 ==> UNALLOC
12477 000100 ==> CLZ, 000101 ==> CLS
12479 instr[9,5] = rn : may not be SP
12480 instr[4,0] = rd : may not be SP. */
12482 uint32_t S = INSTR (29, 29);
12483 uint32_t opcode2 = INSTR (20, 16);
12484 uint32_t opcode = INSTR (15, 10);
12485 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12498 case 0: rbit32 (cpu); return;
12499 case 1: revh32 (cpu); return;
12500 case 2: rev32 (cpu); return;
12501 case 4: clz32 (cpu); return;
12502 case 5: cls32 (cpu); return;
12503 case 8: rbit64 (cpu); return;
12504 case 9: revh64 (cpu); return;
12505 case 10:rev32 (cpu); return;
12506 case 11:rev64 (cpu); return;
12507 case 12:clz64 (cpu); return;
12508 case 13:cls64 (cpu); return;
12509 default: HALT_UNALLOC;
12514 Shifts by count supplied in register.
12515 N.B register args may not be SP.
12516 These all use the shifted auxiliary function for
12517 simplicity and clarity. Writing the actual shift
12518 inline would avoid a branch and so be faster but
12519 would also necessitate getting signs right. */
12521 /* 32 bit arithmetic shift right. */
12523 asrv32 (sim_cpu *cpu)
12525 unsigned rm = INSTR (20, 16);
12526 unsigned rn = INSTR (9, 5);
12527 unsigned rd = INSTR (4, 0);
12529 aarch64_set_reg_u64
12531 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12532 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12535 /* 64 bit arithmetic shift right. */
12537 asrv64 (sim_cpu *cpu)
12539 unsigned rm = INSTR (20, 16);
12540 unsigned rn = INSTR (9, 5);
12541 unsigned rd = INSTR (4, 0);
12543 aarch64_set_reg_u64
12545 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12546 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12549 /* 32 bit logical shift left. */
12551 lslv32 (sim_cpu *cpu)
12553 unsigned rm = INSTR (20, 16);
12554 unsigned rn = INSTR (9, 5);
12555 unsigned rd = INSTR (4, 0);
12557 aarch64_set_reg_u64
12559 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12560 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12563 /* 64 bit arithmetic shift left. */
12565 lslv64 (sim_cpu *cpu)
12567 unsigned rm = INSTR (20, 16);
12568 unsigned rn = INSTR (9, 5);
12569 unsigned rd = INSTR (4, 0);
12571 aarch64_set_reg_u64
12573 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12574 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12577 /* 32 bit logical shift right. */
12579 lsrv32 (sim_cpu *cpu)
12581 unsigned rm = INSTR (20, 16);
12582 unsigned rn = INSTR (9, 5);
12583 unsigned rd = INSTR (4, 0);
12585 aarch64_set_reg_u64
12587 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12588 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12591 /* 64 bit logical shift right. */
12593 lsrv64 (sim_cpu *cpu)
12595 unsigned rm = INSTR (20, 16);
12596 unsigned rn = INSTR (9, 5);
12597 unsigned rd = INSTR (4, 0);
12599 aarch64_set_reg_u64
12601 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12602 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12605 /* 32 bit rotate right. */
12607 rorv32 (sim_cpu *cpu)
12609 unsigned rm = INSTR (20, 16);
12610 unsigned rn = INSTR (9, 5);
12611 unsigned rd = INSTR (4, 0);
12613 aarch64_set_reg_u64
12615 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12616 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12619 /* 64 bit rotate right. */
12621 rorv64 (sim_cpu *cpu)
12623 unsigned rm = INSTR (20, 16);
12624 unsigned rn = INSTR (9, 5);
12625 unsigned rd = INSTR (4, 0);
12627 aarch64_set_reg_u64
12629 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12630 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12636 /* 32 bit signed divide. */
12638 cpuiv32 (sim_cpu *cpu)
12640 unsigned rm = INSTR (20, 16);
12641 unsigned rn = INSTR (9, 5);
12642 unsigned rd = INSTR (4, 0);
12643 /* N.B. the pseudo-code does the divide using 64 bit data. */
12644 /* TODO : check that this rounds towards zero as required. */
12645 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12646 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12648 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12649 divisor ? ((int32_t) (dividend / divisor)) : 0);
12652 /* 64 bit signed divide. */
12654 cpuiv64 (sim_cpu *cpu)
12656 unsigned rm = INSTR (20, 16);
12657 unsigned rn = INSTR (9, 5);
12658 unsigned rd = INSTR (4, 0);
12660 /* TODO : check that this rounds towards zero as required. */
12661 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12663 aarch64_set_reg_s64
12665 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12668 /* 32 bit unsigned divide. */
12670 udiv32 (sim_cpu *cpu)
12672 unsigned rm = INSTR (20, 16);
12673 unsigned rn = INSTR (9, 5);
12674 unsigned rd = INSTR (4, 0);
12676 /* N.B. the pseudo-code does the divide using 64 bit data. */
12677 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12678 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12680 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12681 divisor ? (uint32_t) (dividend / divisor) : 0);
12684 /* 64 bit unsigned divide. */
12686 udiv64 (sim_cpu *cpu)
12688 unsigned rm = INSTR (20, 16);
12689 unsigned rn = INSTR (9, 5);
12690 unsigned rd = INSTR (4, 0);
12692 /* TODO : check that this rounds towards zero as required. */
12693 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12695 aarch64_set_reg_u64
12697 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12701 dexDataProc2Source (sim_cpu *cpu)
12703 /* assert instr[30] == 0
12704 instr[28,21] == 11010110
12705 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12706 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12707 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12708 001000 ==> LSLV, 001001 ==> LSRV
12709 001010 ==> ASRV, 001011 ==> RORV
12713 uint32_t S = INSTR (29, 29);
12714 uint32_t opcode = INSTR (15, 10);
12722 dispatch = ( (INSTR (31, 31) << 3)
12723 | (uimm (opcode, 3, 3) << 2)
12724 | uimm (opcode, 1, 0));
12727 case 2: udiv32 (cpu); return;
12728 case 3: cpuiv32 (cpu); return;
12729 case 4: lslv32 (cpu); return;
12730 case 5: lsrv32 (cpu); return;
12731 case 6: asrv32 (cpu); return;
12732 case 7: rorv32 (cpu); return;
12733 case 10: udiv64 (cpu); return;
12734 case 11: cpuiv64 (cpu); return;
12735 case 12: lslv64 (cpu); return;
12736 case 13: lsrv64 (cpu); return;
12737 case 14: asrv64 (cpu); return;
12738 case 15: rorv64 (cpu); return;
12739 default: HALT_UNALLOC;
12746 /* 32 bit multiply and add. */
12748 madd32 (sim_cpu *cpu)
12750 unsigned rm = INSTR (20, 16);
12751 unsigned ra = INSTR (14, 10);
12752 unsigned rn = INSTR (9, 5);
12753 unsigned rd = INSTR (4, 0);
12755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12756 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12757 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12758 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12759 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12762 /* 64 bit multiply and add. */
12764 madd64 (sim_cpu *cpu)
12766 unsigned rm = INSTR (20, 16);
12767 unsigned ra = INSTR (14, 10);
12768 unsigned rn = INSTR (9, 5);
12769 unsigned rd = INSTR (4, 0);
12771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12772 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12773 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12774 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12775 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12778 /* 32 bit multiply and sub. */
12780 msub32 (sim_cpu *cpu)
12782 unsigned rm = INSTR (20, 16);
12783 unsigned ra = INSTR (14, 10);
12784 unsigned rn = INSTR (9, 5);
12785 unsigned rd = INSTR (4, 0);
12787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12788 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12789 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12790 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12791 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12794 /* 64 bit multiply and sub. */
12796 msub64 (sim_cpu *cpu)
12798 unsigned rm = INSTR (20, 16);
12799 unsigned ra = INSTR (14, 10);
12800 unsigned rn = INSTR (9, 5);
12801 unsigned rd = INSTR (4, 0);
12803 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12804 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12805 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12806 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12807 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12810 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12812 smaddl (sim_cpu *cpu)
12814 unsigned rm = INSTR (20, 16);
12815 unsigned ra = INSTR (14, 10);
12816 unsigned rn = INSTR (9, 5);
12817 unsigned rd = INSTR (4, 0);
12819 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12820 obtain a 64 bit product. */
12821 aarch64_set_reg_s64
12823 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12824 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12825 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12828 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12830 smsubl (sim_cpu *cpu)
12832 unsigned rm = INSTR (20, 16);
12833 unsigned ra = INSTR (14, 10);
12834 unsigned rn = INSTR (9, 5);
12835 unsigned rd = INSTR (4, 0);
12837 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12838 obtain a 64 bit product. */
12839 aarch64_set_reg_s64
12841 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12842 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12843 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12846 /* Integer Multiply/Divide. */
12848 /* First some macros and a helper function. */
12849 /* Macros to test or access elements of 64 bit words. */
12851 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12852 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12853 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12854 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12855 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12856 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12858 /* Offset of sign bit in 64 bit signed integger. */
12859 #define SIGN_SHIFT_U64 63
12860 /* The sign bit itself -- also identifies the minimum negative int value. */
12861 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12862 /* Return true if a 64 bit signed int presented as an unsigned int is the
12863 most negative value. */
12864 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12865 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12866 int has its sign bit set to false. */
12867 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12868 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12869 an unsigned int has its sign bit set or not. */
12870 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12871 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12872 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12874 /* Multiply two 64 bit ints and return.
12875 the hi 64 bits of the 128 bit product. */
12878 mul64hi (uint64_t value1, uint64_t value2)
12880 uint64_t resultmid1;
12882 uint64_t value1_lo = lowWordToU64 (value1);
12883 uint64_t value1_hi = highWordToU64 (value1) ;
12884 uint64_t value2_lo = lowWordToU64 (value2);
12885 uint64_t value2_hi = highWordToU64 (value2);
12887 /* Cross-multiply and collect results. */
12888 uint64_t xproductlo = value1_lo * value2_lo;
12889 uint64_t xproductmid1 = value1_lo * value2_hi;
12890 uint64_t xproductmid2 = value1_hi * value2_lo;
12891 uint64_t xproducthi = value1_hi * value2_hi;
12892 uint64_t carry = 0;
12893 /* Start accumulating 64 bit results. */
12894 /* Drop bottom half of lowest cross-product. */
12895 uint64_t resultmid = xproductlo >> 32;
12896 /* Add in middle products. */
12897 resultmid = resultmid + xproductmid1;
12899 /* Check for overflow. */
12900 if (resultmid < xproductmid1)
12901 /* Carry over 1 into top cross-product. */
12904 resultmid1 = resultmid + xproductmid2;
12906 /* Check for overflow. */
12907 if (resultmid1 < xproductmid2)
12908 /* Carry over 1 into top cross-product. */
12911 /* Drop lowest 32 bits of middle cross-product. */
12912 result = resultmid1 >> 32;
12914 /* Add top cross-product plus and any carry. */
12915 result += xproducthi + carry;
12920 /* Signed multiply high, source, source2 :
12921 64 bit, dest <-- high 64-bit of result. */
12923 smulh (sim_cpu *cpu)
12927 unsigned rm = INSTR (20, 16);
12928 unsigned rn = INSTR (9, 5);
12929 unsigned rd = INSTR (4, 0);
12930 GReg ra = INSTR (14, 10);
12931 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12932 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12935 int64_t signum = 1;
12940 /* Convert to unsigned and use the unsigned mul64hi routine
12941 the fix the sign up afterwards. */
12962 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12963 uresult = mul64hi (uvalue1, uvalue2);
12967 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12970 /* Unsigned multiply add long -- source, source2 :
12971 32 bit, source3 : 64 bit. */
12973 umaddl (sim_cpu *cpu)
12975 unsigned rm = INSTR (20, 16);
12976 unsigned ra = INSTR (14, 10);
12977 unsigned rn = INSTR (9, 5);
12978 unsigned rd = INSTR (4, 0);
12980 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12981 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12982 obtain a 64 bit product. */
12983 aarch64_set_reg_u64
12985 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12986 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12987 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12990 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12992 umsubl (sim_cpu *cpu)
12994 unsigned rm = INSTR (20, 16);
12995 unsigned ra = INSTR (14, 10);
12996 unsigned rn = INSTR (9, 5);
12997 unsigned rd = INSTR (4, 0);
12999 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13000 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13001 obtain a 64 bit product. */
13002 aarch64_set_reg_u64
13004 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13005 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13006 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13009 /* Unsigned multiply high, source, source2 :
13010 64 bit, dest <-- high 64-bit of result. */
13012 umulh (sim_cpu *cpu)
13014 unsigned rm = INSTR (20, 16);
13015 unsigned rn = INSTR (9, 5);
13016 unsigned rd = INSTR (4, 0);
13017 GReg ra = INSTR (14, 10);
13022 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13023 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13024 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13025 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13029 dexDataProc3Source (sim_cpu *cpu)
13031 /* assert instr[28,24] == 11011. */
13032 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13033 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13034 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13035 instr[15] = o0 : 0/1 ==> ok
13036 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13037 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13038 0100 ==> SMULH, (64 bit only)
13039 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13040 1100 ==> UMULH (64 bit only)
13044 uint32_t size = INSTR (31, 31);
13045 uint32_t op54 = INSTR (30, 29);
13046 uint32_t op31 = INSTR (23, 21);
13047 uint32_t o0 = INSTR (15, 15);
13064 dispatch = (op31 << 1) | o0;
13068 case 0: madd64 (cpu); return;
13069 case 1: msub64 (cpu); return;
13070 case 2: smaddl (cpu); return;
13071 case 3: smsubl (cpu); return;
13072 case 4: smulh (cpu); return;
13073 case 10: umaddl (cpu); return;
13074 case 11: umsubl (cpu); return;
13075 case 12: umulh (cpu); return;
13076 default: HALT_UNALLOC;
13081 dexDPReg (sim_cpu *cpu)
13083 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13084 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13085 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13086 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13090 case DPREG_LOG_000:
13091 case DPREG_LOG_001:
13092 dexLogicalShiftedRegister (cpu); return;
13094 case DPREG_ADDSHF_010:
13095 dexAddSubtractShiftedRegister (cpu); return;
13097 case DPREG_ADDEXT_011:
13098 dexAddSubtractExtendedRegister (cpu); return;
13100 case DPREG_ADDCOND_100:
13102 /* This set bundles a variety of different operations. */
13104 /* 1) add/sub w carry. */
13105 uint32_t mask1 = 0x1FE00000U;
13106 uint32_t val1 = 0x1A000000U;
13107 /* 2) cond compare register/immediate. */
13108 uint32_t mask2 = 0x1FE00000U;
13109 uint32_t val2 = 0x1A400000U;
13110 /* 3) cond select. */
13111 uint32_t mask3 = 0x1FE00000U;
13112 uint32_t val3 = 0x1A800000U;
13113 /* 4) data proc 1/2 source. */
13114 uint32_t mask4 = 0x1FE00000U;
13115 uint32_t val4 = 0x1AC00000U;
13117 if ((aarch64_get_instr (cpu) & mask1) == val1)
13118 dexAddSubtractWithCarry (cpu);
13120 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13123 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13124 dexCondSelect (cpu);
13126 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13128 /* Bit 30 is clear for data proc 2 source
13129 and set for data proc 1 source. */
13130 if (aarch64_get_instr (cpu) & (1U << 30))
13131 dexDataProc1Source (cpu);
13133 dexDataProc2Source (cpu);
13137 /* Should not reach here. */
13143 case DPREG_3SRC_110:
13144 dexDataProc3Source (cpu); return;
13146 case DPREG_UNALLOC_101:
13149 case DPREG_3SRC_111:
13150 dexDataProc3Source (cpu); return;
13153 /* Should never reach here. */
13158 /* Unconditional Branch immediate.
13159 Offset is a PC-relative byte offset in the range +/- 128MiB.
13160 The offset is assumed to be raw from the decode i.e. the
13161 simulator is expected to scale them from word offsets to byte. */
13163 /* Unconditional branch. */
13165 buc (sim_cpu *cpu, int32_t offset)
13167 aarch64_set_next_PC_by_offset (cpu, offset);
13170 static unsigned stack_depth = 0;
13172 /* Unconditional branch and link -- writes return PC to LR. */
13174 bl (sim_cpu *cpu, int32_t offset)
13176 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13177 aarch64_save_LR (cpu);
13178 aarch64_set_next_PC_by_offset (cpu, offset);
13180 if (TRACE_BRANCH_P (cpu))
13184 " %*scall %" PRIx64 " [%s]"
13185 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13186 stack_depth, " ", aarch64_get_next_PC (cpu),
13187 aarch64_get_func (CPU_STATE (cpu),
13188 aarch64_get_next_PC (cpu)),
13189 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13190 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13191 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13196 /* Unconditional Branch register.
13197 Branch/return address is in source register. */
13199 /* Unconditional branch. */
13203 unsigned rn = INSTR (9, 5);
13204 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13205 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13208 /* Unconditional branch and link -- writes return PC to LR. */
13212 unsigned rn = INSTR (9, 5);
13214 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13215 /* The pseudo code in the spec says we update LR before fetching.
13216 the value from the rn. */
13217 aarch64_save_LR (cpu);
13218 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13220 if (TRACE_BRANCH_P (cpu))
13224 " %*scall %" PRIx64 " [%s]"
13225 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13226 stack_depth, " ", aarch64_get_next_PC (cpu),
13227 aarch64_get_func (CPU_STATE (cpu),
13228 aarch64_get_next_PC (cpu)),
13229 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13230 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13231 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13236 /* Return -- assembler will default source to LR this is functionally
13237 equivalent to br but, presumably, unlike br it side effects the
13238 branch predictor. */
13242 unsigned rn = INSTR (9, 5);
13243 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13245 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13246 if (TRACE_BRANCH_P (cpu))
13249 " %*sreturn [result: %" PRIx64 "]",
13250 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13255 /* NOP -- we implement this and call it from the decode in case we
13256 want to intercept it later. */
13261 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13264 /* Data synchronization barrier. */
13269 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13272 /* Data memory barrier. */
13277 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13280 /* Instruction synchronization barrier. */
13285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13289 dexBranchImmediate (sim_cpu *cpu)
13291 /* assert instr[30,26] == 00101
13292 instr[31] ==> 0 == B, 1 == BL
13293 instr[25,0] == imm26 branch offset counted in words. */
13295 uint32_t top = INSTR (31, 31);
13296 /* We have a 26 byte signed word offset which we need to pass to the
13297 execute routine as a signed byte offset. */
13298 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13306 /* Control Flow. */
13308 /* Conditional branch
13310 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13311 a bit position in the range 0 .. 63
13313 cc is a CondCode enum value as pulled out of the decode
13315 N.B. any offset register (source) can only be Xn or Wn. */
13318 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13320 /* The test returns TRUE if CC is met. */
13321 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13322 if (testConditionCode (cpu, cc))
13323 aarch64_set_next_PC_by_offset (cpu, offset);
13326 /* 32 bit branch on register non-zero. */
13328 cbnz32 (sim_cpu *cpu, int32_t offset)
13330 unsigned rt = INSTR (4, 0);
13332 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13333 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13334 aarch64_set_next_PC_by_offset (cpu, offset);
13337 /* 64 bit branch on register zero. */
13339 cbnz (sim_cpu *cpu, int32_t offset)
13341 unsigned rt = INSTR (4, 0);
13343 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13344 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13345 aarch64_set_next_PC_by_offset (cpu, offset);
13348 /* 32 bit branch on register non-zero. */
13350 cbz32 (sim_cpu *cpu, int32_t offset)
13352 unsigned rt = INSTR (4, 0);
13354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13355 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13356 aarch64_set_next_PC_by_offset (cpu, offset);
13359 /* 64 bit branch on register zero. */
13361 cbz (sim_cpu *cpu, int32_t offset)
13363 unsigned rt = INSTR (4, 0);
13365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13366 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13367 aarch64_set_next_PC_by_offset (cpu, offset);
13370 /* Branch on register bit test non-zero -- one size fits all. */
13372 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13374 unsigned rt = INSTR (4, 0);
13376 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13377 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13378 aarch64_set_next_PC_by_offset (cpu, offset);
13381 /* Branch on register bit test zero -- one size fits all. */
13383 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13385 unsigned rt = INSTR (4, 0);
13387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13388 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13389 aarch64_set_next_PC_by_offset (cpu, offset);
13393 dexCompareBranchImmediate (sim_cpu *cpu)
13395 /* instr[30,25] = 01 1010
13396 instr[31] = size : 0 ==> 32, 1 ==> 64
13397 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13398 instr[23,5] = simm19 branch offset counted in words
13401 uint32_t size = INSTR (31, 31);
13402 uint32_t op = INSTR (24, 24);
13403 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13408 cbz32 (cpu, offset);
13410 cbnz32 (cpu, offset);
13417 cbnz (cpu, offset);
13422 dexTestBranchImmediate (sim_cpu *cpu)
13424 /* instr[31] = b5 : bit 5 of test bit idx
13425 instr[30,25] = 01 1011
13426 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13427 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13428 instr[18,5] = simm14 : signed offset counted in words
13429 instr[4,0] = uimm5 */
13431 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13432 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13434 NYI_assert (30, 25, 0x1b);
13436 if (INSTR (24, 24) == 0)
13437 tbz (cpu, pos, offset);
13439 tbnz (cpu, pos, offset);
13443 dexCondBranchImmediate (sim_cpu *cpu)
13445 /* instr[31,25] = 010 1010
13446 instr[24] = op1; op => 00 ==> B.cond
13447 instr[23,5] = simm19 : signed offset counted in words
13449 instr[3,0] = cond */
13452 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13454 NYI_assert (31, 25, 0x2a);
13459 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13461 bcc (cpu, offset, INSTR (3, 0));
13465 dexBranchRegister (sim_cpu *cpu)
13467 /* instr[31,25] = 110 1011
13468 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13469 instr[20,16] = op2 : must be 11111
13470 instr[15,10] = op3 : must be 000000
13471 instr[4,0] = op2 : must be 11111. */
13473 uint32_t op = INSTR (24, 21);
13474 uint32_t op2 = INSTR (20, 16);
13475 uint32_t op3 = INSTR (15, 10);
13476 uint32_t op4 = INSTR (4, 0);
13478 NYI_assert (31, 25, 0x6b);
13480 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13494 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13495 /* anything else is unallocated. */
13496 uint32_t rn = INSTR (4, 0);
13501 if (op == 4 || op == 5)
13508 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13509 but this may not be available. So instead we define the values we need
13511 #define AngelSVC_Reason_Open 0x01
13512 #define AngelSVC_Reason_Close 0x02
13513 #define AngelSVC_Reason_Write 0x05
13514 #define AngelSVC_Reason_Read 0x06
13515 #define AngelSVC_Reason_IsTTY 0x09
13516 #define AngelSVC_Reason_Seek 0x0A
13517 #define AngelSVC_Reason_FLen 0x0C
13518 #define AngelSVC_Reason_Remove 0x0E
13519 #define AngelSVC_Reason_Rename 0x0F
13520 #define AngelSVC_Reason_Clock 0x10
13521 #define AngelSVC_Reason_Time 0x11
13522 #define AngelSVC_Reason_System 0x12
13523 #define AngelSVC_Reason_Errno 0x13
13524 #define AngelSVC_Reason_GetCmdLine 0x15
13525 #define AngelSVC_Reason_HeapInfo 0x16
13526 #define AngelSVC_Reason_ReportException 0x18
13527 #define AngelSVC_Reason_Elapsed 0x30
13531 handle_halt (sim_cpu *cpu, uint32_t val)
13533 uint64_t result = 0;
13535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13538 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13539 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13540 sim_stopped, SIM_SIGTRAP);
13543 /* We have encountered an Angel SVC call. See if we can process it. */
13544 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13546 case AngelSVC_Reason_HeapInfo:
13548 /* Get the values. */
13549 uint64_t stack_top = aarch64_get_stack_start (cpu);
13550 uint64_t heap_base = aarch64_get_heap_start (cpu);
13552 /* Get the pointer */
13553 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13554 ptr = aarch64_get_mem_u64 (cpu, ptr);
13556 /* Fill in the memory block. */
13557 /* Start addr of heap. */
13558 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13559 /* End addr of heap. */
13560 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13561 /* Lowest stack addr. */
13562 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13563 /* Initial stack addr. */
13564 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13566 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13570 case AngelSVC_Reason_Open:
13572 /* Get the pointer */
13573 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13574 /* FIXME: For now we just assume that we will only be asked
13575 to open the standard file descriptors. */
13579 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13583 case AngelSVC_Reason_Close:
13585 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13586 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13591 case AngelSVC_Reason_Errno:
13593 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13596 case AngelSVC_Reason_Clock:
13598 #ifdef CLOCKS_PER_SEC
13599 (CLOCKS_PER_SEC >= 100)
13600 ? (clock () / (CLOCKS_PER_SEC / 100))
13601 : ((clock () * 100) / CLOCKS_PER_SEC)
13603 /* Presume unix... clock() returns microseconds. */
13607 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13610 case AngelSVC_Reason_GetCmdLine:
13612 /* Get the pointer */
13613 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13614 ptr = aarch64_get_mem_u64 (cpu, ptr);
13616 /* FIXME: No command line for now. */
13617 aarch64_set_mem_u64 (cpu, ptr, 0);
13618 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13622 case AngelSVC_Reason_IsTTY:
13624 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13627 case AngelSVC_Reason_Write:
13629 /* Get the pointer */
13630 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13631 /* Get the write control block. */
13632 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13633 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13634 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13636 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13637 PRIx64 " on descriptor %" PRIx64,
13642 TRACE_SYSCALL (cpu,
13643 " AngelSVC: Write: Suspiciously long write: %ld",
13645 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13646 sim_stopped, SIM_SIGBUS);
13650 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13654 TRACE (cpu, 0, "\n");
13655 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13656 (int) len, aarch64_get_mem_ptr (cpu, buf));
13657 TRACE (cpu, 0, "\n");
13661 TRACE_SYSCALL (cpu,
13662 " AngelSVC: Write: Unexpected file handle: %d",
13664 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13665 sim_stopped, SIM_SIGABRT);
13670 case AngelSVC_Reason_ReportException:
13672 /* Get the pointer */
13673 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13674 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13675 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13676 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13678 TRACE_SYSCALL (cpu,
13679 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13682 if (type == 0x20026)
13683 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13684 sim_exited, state);
13686 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13687 sim_stopped, SIM_SIGINT);
13691 case AngelSVC_Reason_Read:
13692 case AngelSVC_Reason_FLen:
13693 case AngelSVC_Reason_Seek:
13694 case AngelSVC_Reason_Remove:
13695 case AngelSVC_Reason_Time:
13696 case AngelSVC_Reason_System:
13697 case AngelSVC_Reason_Rename:
13698 case AngelSVC_Reason_Elapsed:
13700 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13701 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13702 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13703 sim_stopped, SIM_SIGTRAP);
13706 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13710 dexExcpnGen (sim_cpu *cpu)
13712 /* instr[31:24] = 11010100
13713 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13714 010 ==> HLT, 101 ==> DBG GEN EXCPN
13715 instr[20,5] = imm16
13716 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13717 instr[1,0] = LL : discriminates opc */
13719 uint32_t opc = INSTR (23, 21);
13720 uint32_t imm16 = INSTR (20, 5);
13721 uint32_t opc2 = INSTR (4, 2);
13724 NYI_assert (31, 24, 0xd4);
13731 /* We only implement HLT and BRK for now. */
13732 if (opc == 1 && LL == 0)
13734 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13735 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13736 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13739 if (opc == 2 && LL == 0)
13740 handle_halt (cpu, imm16);
13742 else if (opc == 0 || opc == 5)
13749 /* Stub for accessing system registers. */
13752 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13753 unsigned crm, unsigned op2)
13755 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13756 /* DCZID_EL0 - the Data Cache Zero ID register.
13757 We do not support DC ZVA at the moment, so
13758 we return a value with the disable bit set.
13759 We implement support for the DCZID register since
13760 it is used by the C library's memset function. */
13761 return ((uint64_t) 1) << 4;
13763 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13764 /* Cache Type Register. */
13765 return 0x80008000UL;
13767 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13768 /* TPIDR_EL0 - thread pointer id. */
13769 return aarch64_get_thread_id (cpu);
13771 if (op1 == 3 && crm == 4 && op2 == 0)
13772 return aarch64_get_FPCR (cpu);
13774 if (op1 == 3 && crm == 4 && op2 == 1)
13775 return aarch64_get_FPSR (cpu);
13777 else if (op1 == 3 && crm == 2 && op2 == 0)
13778 return aarch64_get_CPSR (cpu);
13784 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13785 unsigned crm, unsigned op2, uint64_t val)
13787 if (op1 == 3 && crm == 4 && op2 == 0)
13788 aarch64_set_FPCR (cpu, val);
13790 else if (op1 == 3 && crm == 4 && op2 == 1)
13791 aarch64_set_FPSR (cpu, val);
13793 else if (op1 == 3 && crm == 2 && op2 == 0)
13794 aarch64_set_CPSR (cpu, val);
13801 do_mrs (sim_cpu *cpu)
13803 /* instr[31:20] = 1101 0101 0001 1
13810 unsigned sys_op0 = INSTR (19, 19) + 2;
13811 unsigned sys_op1 = INSTR (18, 16);
13812 unsigned sys_crn = INSTR (15, 12);
13813 unsigned sys_crm = INSTR (11, 8);
13814 unsigned sys_op2 = INSTR (7, 5);
13815 unsigned rt = INSTR (4, 0);
13817 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13818 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13819 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13823 do_MSR_immediate (sim_cpu *cpu)
13825 /* instr[31:19] = 1101 0101 0000 0
13827 instr[15,12] = 0100
13830 instr[4,0] = 1 1111 */
13832 unsigned op1 = INSTR (18, 16);
13833 /*unsigned crm = INSTR (11, 8);*/
13834 unsigned op2 = INSTR (7, 5);
13836 NYI_assert (31, 19, 0x1AA0);
13837 NYI_assert (15, 12, 0x4);
13838 NYI_assert (4, 0, 0x1F);
13843 HALT_NYI; /* set SPSel. */
13850 HALT_NYI; /* set DAIFset. */
13852 HALT_NYI; /* set DAIFclr. */
13861 do_MSR_reg (sim_cpu *cpu)
13863 /* instr[31:20] = 1101 0101 0001
13871 unsigned sys_op0 = INSTR (19, 19) + 2;
13872 unsigned sys_op1 = INSTR (18, 16);
13873 unsigned sys_crn = INSTR (15, 12);
13874 unsigned sys_crm = INSTR (11, 8);
13875 unsigned sys_op2 = INSTR (7, 5);
13876 unsigned rt = INSTR (4, 0);
13878 NYI_assert (31, 20, 0xD51);
13880 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13881 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13882 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13886 do_SYS (sim_cpu *cpu)
13888 /* instr[31,19] = 1101 0101 0000 1
13894 NYI_assert (31, 19, 0x1AA1);
13896 /* FIXME: For now we just silently accept system ops. */
13900 dexSystem (sim_cpu *cpu)
13902 /* instr[31:22] = 1101 01010 0
13909 instr[4,0] = uimm5 */
13911 /* We are interested in HINT, DSB, DMB and ISB
13913 Hint #0 encodes NOOP (this is the only hint we care about)
13914 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13915 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13917 DSB, DMB, ISB are data store barrier, data memory barrier and
13918 instruction store barrier, respectively, where
13920 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13921 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13922 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13923 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13924 10 ==> InerShareable, 11 ==> FullSystem
13925 types : 01 ==> Reads, 10 ==> Writes,
13926 11 ==> All, 00 ==> All (domain == FullSystem). */
13928 unsigned rt = INSTR (4, 0);
13930 NYI_assert (31, 22, 0x354);
13932 switch (INSTR (21, 12))
13937 /* NOP has CRm != 0000 OR. */
13938 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13939 uint32_t crm = INSTR (11, 8);
13940 uint32_t op2 = INSTR (7, 5);
13942 if (crm != 0 || (op2 == 0 || op2 > 5))
13944 /* Actually call nop method so we can reimplement it later. */
13953 uint32_t op2 = INSTR (7, 5);
13958 case 4: dsb (cpu); return;
13959 case 5: dmb (cpu); return;
13960 case 6: isb (cpu); return;
13961 default: HALT_UNALLOC;
13972 do_SYS (cpu); /* DC is an alias of SYS. */
13976 if (INSTR (21, 20) == 0x1)
13978 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13979 do_MSR_immediate (cpu);
13987 dexBr (sim_cpu *cpu)
13989 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13990 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13991 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13992 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13997 return dexBranchImmediate (cpu);
13999 case BR_IMMCMP_001:
14000 /* Compare has bit 25 clear while test has it set. */
14001 if (!INSTR (25, 25))
14002 dexCompareBranchImmediate (cpu);
14004 dexTestBranchImmediate (cpu);
14007 case BR_IMMCOND_010:
14008 /* This is a conditional branch if bit 25 is clear otherwise
14010 if (!INSTR (25, 25))
14011 dexCondBranchImmediate (cpu);
14016 case BR_UNALLOC_011:
14020 dexBranchImmediate (cpu);
14023 case BR_IMMCMP_101:
14024 /* Compare has bit 25 clear while test has it set. */
14025 if (!INSTR (25, 25))
14026 dexCompareBranchImmediate (cpu);
14028 dexTestBranchImmediate (cpu);
14032 /* Unconditional branch reg has bit 25 set. */
14033 if (INSTR (25, 25))
14034 dexBranchRegister (cpu);
14036 /* This includes both Excpn Gen, System and unalloc operations.
14037 We need to decode the Excpn Gen operation BRK so we can plant
14038 debugger entry points.
14039 Excpn Gen operations have instr [24] = 0.
14040 we need to decode at least one of the System operations NOP
14041 which is an alias for HINT #0.
14042 System operations have instr [24,22] = 100. */
14043 else if (INSTR (24, 24) == 0)
14046 else if (INSTR (24, 22) == 4)
14054 case BR_UNALLOC_111:
14058 /* Should never reach here. */
14064 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14066 /* We need to check if gdb wants an in here. */
14067 /* checkBreak (cpu);. */
14069 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14073 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14074 case GROUP_LDST_0100: dexLdSt (cpu); break;
14075 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14076 case GROUP_LDST_0110: dexLdSt (cpu); break;
14077 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14078 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14079 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14080 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14081 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14082 case GROUP_LDST_1100: dexLdSt (cpu); break;
14083 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14084 case GROUP_LDST_1110: dexLdSt (cpu); break;
14085 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14087 case GROUP_UNALLOC_0001:
14088 case GROUP_UNALLOC_0010:
14089 case GROUP_UNALLOC_0011:
14093 /* Should never reach here. */
14099 aarch64_step (sim_cpu *cpu)
14101 uint64_t pc = aarch64_get_PC (cpu);
14103 if (pc == TOP_LEVEL_RETURN_PC)
14106 aarch64_set_next_PC (cpu, pc + 4);
14108 /* Code is always little-endian. */
14109 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14110 & aarch64_get_instr (cpu), pc, 4);
14111 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14113 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14114 aarch64_get_instr (cpu));
14115 TRACE_DISASM (cpu, pc);
14117 aarch64_decode_and_execute (cpu, pc);
14123 aarch64_run (SIM_DESC sd)
14125 sim_cpu *cpu = STATE_CPU (sd, 0);
14127 while (aarch64_step (cpu))
14129 aarch64_update_PC (cpu);
14131 if (sim_events_tick (sd))
14132 sim_events_process (sd);
14135 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14136 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14140 aarch64_init (sim_cpu *cpu, uint64_t pc)
14142 uint64_t sp = aarch64_get_stack_start (cpu);
14144 /* Install SP, FP and PC and set LR to -20
14145 so we can detect a top-level return. */
14146 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14147 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14148 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14149 aarch64_set_next_PC (cpu, pc);
14150 aarch64_update_PC (cpu);
14151 aarch64_init_LIT_table ();