sim/aarch64/simulator.c

   1 /* simulator.c -- Interface for the AArch64 simulator.
   2
   3    Copyright (C) 2015-2016 Free Software Foundation, Inc.
   4
   5    Contributed by Red Hat.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include <stdlib.h>
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <sys/types.h>
  27 #include <math.h>
  28 #include <time.h>
  29 #include <limits.h>
  30
  31 #include "simulator.h"
  32 #include "cpustate.h"
  33 #include "memory.h"
  34
  35 #define NO_SP 0
  36 #define SP_OK 1
  37
  38 #define TST(_flag)   (aarch64_test_CPSR_bit (cpu, _flag))
  39 #define IS_SET(_X)   (TST (( _X )) ? 1 : 0)
  40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
  41
  42 /* Space saver macro.  */
  43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
  44
  45 #define HALT_UNALLOC                                                    \
  46   do                                                                    \
  47     {                                                                   \
  48       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  49       TRACE_INSN (cpu,                                                  \
  50                   "Unallocated instruction detected at sim line %d,"    \
  51                   " exe addr %" PRIx64,                                 \
  52                   __LINE__, aarch64_get_PC (cpu));                      \
  53       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  54                        sim_stopped, SIM_SIGILL);                        \
  55     }                                                                   \
  56   while (0)
  57
  58 #define HALT_NYI                                                        \
  59   do                                                                    \
  60     {                                                                   \
  61       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  62       TRACE_INSN (cpu,                                                  \
  63                   "Unimplemented instruction detected at sim line %d,"  \
  64                   " exe addr %" PRIx64,                                 \
  65                   __LINE__, aarch64_get_PC (cpu));                      \
  66       if (! TRACE_ANY_P (cpu))                                          \
  67         {                                                               \
  68           sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: "); \
  69           trace_disasm (CPU_STATE (cpu), cpu, aarch64_get_PC (cpu));    \
  70         }                                                               \
  71       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  72                        sim_stopped, SIM_SIGABRT);                       \
  73     }                                                                   \
  74   while (0)
  75
  76 #define NYI_assert(HI, LO, EXPECTED)                                    \
  77   do                                                                    \
  78     {                                                                   \
  79       if (INSTR ((HI), (LO)) != (EXPECTED))                             \
  80         HALT_NYI;                                                       \
  81     }                                                                   \
  82   while (0)
  83
  84 /* Helper functions used by expandLogicalImmediate.  */
  85
  86 /* for i = 1, ... N result<i-1> = 1 other bits are zero  */
  87 static inline uint64_t
  88 ones (int N)
  89 {
  90   return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
  91 }
  92
  93 /* result<0> to val<N>  */
  94 static inline uint64_t
  95 pickbit (uint64_t val, int N)
  96 {
  97   return pickbits64 (val, N, N);
  98 }
  99
 100 static uint64_t
 101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
 102 {
 103   uint64_t mask;
 104   uint64_t imm;
 105   unsigned simd_size;
 106
 107   /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
 108      (in other words, right rotated by R), then replicated. */
 109   if (N != 0)
 110     {
 111       simd_size = 64;
 112       mask = 0xffffffffffffffffull;
 113     }
 114   else
 115     {
 116       switch (S)
 117         {
 118         case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32;           break;
 119         case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
 120         case 0x30 ... 0x37: /* 110xxx */ simd_size =  8; S &= 0x7; break;
 121         case 0x38 ... 0x3b: /* 1110xx */ simd_size =  4; S &= 0x3; break;
 122         case 0x3c ... 0x3d: /* 11110x */ simd_size =  2; S &= 0x1; break;
 123         default: return 0;
 124         }
 125       mask = (1ull << simd_size) - 1;
 126       /* Top bits are IGNORED.  */
 127       R &= simd_size - 1;
 128     }
 129
 130   /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected.  */
 131   if (S == simd_size - 1)
 132     return 0;
 133
 134   /* S+1 consecutive bits to 1.  */
 135   /* NOTE: S can't be 63 due to detection above.  */
 136   imm = (1ull << (S + 1)) - 1;
 137
 138   /* Rotate to the left by simd_size - R.  */
 139   if (R != 0)
 140     imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
 141
 142   /* Replicate the value according to SIMD size.  */
 143   switch (simd_size)
 144     {
 145     case  2: imm = (imm <<  2) | imm;
 146     case  4: imm = (imm <<  4) | imm;
 147     case  8: imm = (imm <<  8) | imm;
 148     case 16: imm = (imm << 16) | imm;
 149     case 32: imm = (imm << 32) | imm;
 150     case 64: break;
 151     default: return 0;
 152     }
 153
 154   return imm;
 155 }
 156
 157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
 158    for each possible combination i.e. 13 bits worth of int entries.  */
 159 #define  LI_TABLE_SIZE  (1 << 13)
 160 static uint64_t LITable[LI_TABLE_SIZE];
 161
 162 void
 163 aarch64_init_LIT_table (void)
 164 {
 165   unsigned index;
 166
 167   for (index = 0; index < LI_TABLE_SIZE; index++)
 168     {
 169       uint32_t N    = uimm (index, 12, 12);
 170       uint32_t immr = uimm (index, 11, 6);
 171       uint32_t imms = uimm (index, 5, 0);
 172
 173       LITable [index] = expand_logical_immediate (imms, immr, N);
 174     }
 175 }
 176
 177 static void
 178 dexNotify (sim_cpu *cpu)
 179 {
 180   /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
 181                            2 ==> exit Java, 3 ==> start next bytecode.  */
 182   uint32_t type = INSTR (14, 0);
 183
 184   TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
 185
 186   switch (type)
 187     {
 188     case 0:
 189       /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
 190          aarch64_get_reg_u64 (cpu, R22, 0));  */
 191       break;
 192     case 1:
 193       /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
 194          aarch64_get_reg_u64 (cpu, R22, 0));  */
 195       break;
 196     case 2:
 197       /* aarch64_notifyMethodExit ();  */
 198       break;
 199     case 3:
 200       /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
 201          aarch64_get_reg_u64 (cpu, R22, 0));  */
 202       break;
 203     }
 204 }
 205
 206 /* secondary decode within top level groups  */
 207
 208 static void
 209 dexPseudo (sim_cpu *cpu)
 210 {
 211   /* assert instr[28,27] = 00
 212
 213      We provide 2 pseudo instructions:
 214
 215      HALT stops execution of the simulator causing an immediate
 216      return to the x86 code which entered it.
 217
 218      CALLOUT initiates recursive entry into x86 code.  A register
 219      argument holds the address of the x86 routine.  Immediate
 220      values in the instruction identify the number of general
 221      purpose and floating point register arguments to be passed
 222      and the type of any value to be returned.  */
 223
 224   uint32_t PSEUDO_HALT      =  0xE0000000U;
 225   uint32_t PSEUDO_CALLOUT   =  0x00018000U;
 226   uint32_t PSEUDO_CALLOUTR  =  0x00018001U;
 227   uint32_t PSEUDO_NOTIFY    =  0x00014000U;
 228   uint32_t dispatch;
 229
 230   if (aarch64_get_instr (cpu) == PSEUDO_HALT)
 231     {
 232       TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
 233       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 234                        sim_stopped, SIM_SIGTRAP);
 235     }
 236
 237   dispatch = INSTR (31, 15);
 238
 239   /* We do not handle callouts at the moment.  */
 240   if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
 241     {
 242       TRACE_EVENTS (cpu, " Callout");
 243       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 244                        sim_stopped, SIM_SIGABRT);
 245     }
 246
 247   else if (dispatch == PSEUDO_NOTIFY)
 248     dexNotify (cpu);
 249
 250   else
 251     HALT_UNALLOC;
 252 }
 253
 254 /* Load-store single register (unscaled offset)
 255    These instructions employ a base register plus an unscaled signed
 256    9 bit offset.
 257
 258    N.B. the base register (source) can be Xn or SP. all other
 259    registers may not be SP.  */
 260
 261 /* 32 bit load 32 bit unscaled signed 9 bit.  */
 262 static void
 263 ldur32 (sim_cpu *cpu, int32_t offset)
 264 {
 265   unsigned rn = INSTR (9, 5);
 266   unsigned rt = INSTR (4, 0);
 267
 268   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 269                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 270                         + offset));
 271 }
 272
 273 /* 64 bit load 64 bit unscaled signed 9 bit.  */
 274 static void
 275 ldur64 (sim_cpu *cpu, int32_t offset)
 276 {
 277   unsigned rn = INSTR (9, 5);
 278   unsigned rt = INSTR (4, 0);
 279
 280   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 281                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 282                         + offset));
 283 }
 284
 285 /* 32 bit load zero-extended byte unscaled signed 9 bit.  */
 286 static void
 287 ldurb32 (sim_cpu *cpu, int32_t offset)
 288 {
 289   unsigned rn = INSTR (9, 5);
 290   unsigned rt = INSTR (4, 0);
 291
 292   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
 293                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 294                         + offset));
 295 }
 296
 297 /* 32 bit load sign-extended byte unscaled signed 9 bit.  */
 298 static void
 299 ldursb32 (sim_cpu *cpu, int32_t offset)
 300 {
 301   unsigned rn = INSTR (9, 5);
 302   unsigned rt = INSTR (4, 0);
 303
 304   aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
 305                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 306                         + offset));
 307 }
 308
 309 /* 64 bit load sign-extended byte unscaled signed 9 bit.  */
 310 static void
 311 ldursb64 (sim_cpu *cpu, int32_t offset)
 312 {
 313   unsigned rn = INSTR (9, 5);
 314   unsigned rt = INSTR (4, 0);
 315
 316   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
 317                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 318                         + offset));
 319 }
 320
 321 /* 32 bit load zero-extended short unscaled signed 9 bit  */
 322 static void
 323 ldurh32 (sim_cpu *cpu, int32_t offset)
 324 {
 325   unsigned rn = INSTR (9, 5);
 326   unsigned rd = INSTR (4, 0);
 327
 328   aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
 329                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 330                         + offset));
 331 }
 332
 333 /* 32 bit load sign-extended short unscaled signed 9 bit  */
 334 static void
 335 ldursh32 (sim_cpu *cpu, int32_t offset)
 336 {
 337   unsigned rn = INSTR (9, 5);
 338   unsigned rd = INSTR (4, 0);
 339
 340   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
 341                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 342                         + offset));
 343 }
 344
 345 /* 64 bit load sign-extended short unscaled signed 9 bit  */
 346 static void
 347 ldursh64 (sim_cpu *cpu, int32_t offset)
 348 {
 349   unsigned rn = INSTR (9, 5);
 350   unsigned rt = INSTR (4, 0);
 351
 352   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
 353                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 354                         + offset));
 355 }
 356
 357 /* 64 bit load sign-extended word unscaled signed 9 bit  */
 358 static void
 359 ldursw (sim_cpu *cpu, int32_t offset)
 360 {
 361   unsigned rn = INSTR (9, 5);
 362   unsigned rd = INSTR (4, 0);
 363
 364   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
 365                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 366                         + offset));
 367 }
 368
 369 /* N.B. with stores the value in source is written to the address
 370    identified by source2 modified by offset.  */
 371
 372 /* 32 bit store 32 bit unscaled signed 9 bit.  */
 373 static void
 374 stur32 (sim_cpu *cpu, int32_t offset)
 375 {
 376   unsigned rn = INSTR (9, 5);
 377   unsigned rd = INSTR (4, 0);
 378
 379   aarch64_set_mem_u32 (cpu,
 380                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 381                        aarch64_get_reg_u32 (cpu, rd, NO_SP));
 382 }
 383
 384 /* 64 bit store 64 bit unscaled signed 9 bit  */
 385 static void
 386 stur64 (sim_cpu *cpu, int32_t offset)
 387 {
 388   unsigned rn = INSTR (9, 5);
 389   unsigned rd = INSTR (4, 0);
 390
 391   aarch64_set_mem_u64 (cpu,
 392                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 393                        aarch64_get_reg_u64 (cpu, rd, NO_SP));
 394 }
 395
 396 /* 32 bit store byte unscaled signed 9 bit  */
 397 static void
 398 sturb (sim_cpu *cpu, int32_t offset)
 399 {
 400   unsigned rn = INSTR (9, 5);
 401   unsigned rd = INSTR (4, 0);
 402
 403   aarch64_set_mem_u8 (cpu,
 404                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 405                       aarch64_get_reg_u8 (cpu, rd, NO_SP));
 406 }
 407
 408 /* 32 bit store short unscaled signed 9 bit  */
 409 static void
 410 sturh (sim_cpu *cpu, int32_t offset)
 411 {
 412   unsigned rn = INSTR (9, 5);
 413   unsigned rd = INSTR (4, 0);
 414
 415   aarch64_set_mem_u16 (cpu,
 416                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 417                        aarch64_get_reg_u16 (cpu, rd, NO_SP));
 418 }
 419
 420 /* Load single register pc-relative label
 421    Offset is a signed 19 bit immediate count in words
 422    rt may not be SP.  */
 423
 424 /* 32 bit pc-relative load  */
 425 static void
 426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
 427 {
 428   unsigned rd = INSTR (4, 0);
 429
 430   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 431                        aarch64_get_mem_u32
 432                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 433 }
 434
 435 /* 64 bit pc-relative load  */
 436 static void
 437 ldr_pcrel (sim_cpu *cpu, int32_t offset)
 438 {
 439   unsigned rd = INSTR (4, 0);
 440
 441   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 442                        aarch64_get_mem_u64
 443                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 444 }
 445
 446 /* sign extended 32 bit pc-relative load  */
 447 static void
 448 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
 449 {
 450   unsigned rd = INSTR (4, 0);
 451
 452   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 453                        aarch64_get_mem_s32
 454                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 455 }
 456
 457 /* float pc-relative load  */
 458 static void
 459 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
 460 {
 461   unsigned int rd = INSTR (4, 0);
 462
 463   aarch64_set_vec_u32 (cpu, rd, 0,
 464                        aarch64_get_mem_u32
 465                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 466 }
 467
 468 /* double pc-relative load  */
 469 static void
 470 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
 471 {
 472   unsigned int st = INSTR (4, 0);
 473
 474   aarch64_set_vec_u64 (cpu, st, 0,
 475                        aarch64_get_mem_u64
 476                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 477 }
 478
 479 /* long double pc-relative load.  */
 480 static void
 481 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
 482 {
 483   unsigned int st = INSTR (4, 0);
 484   uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
 485   FRegister a;
 486
 487   aarch64_get_mem_long_double (cpu, addr, & a);
 488   aarch64_set_FP_long_double (cpu, st, a);
 489 }
 490
 491 /* This can be used to scale an offset by applying
 492    the requisite shift. the second argument is either
 493    16, 32 or 64.  */
 494
 495 #define SCALE(_offset, _elementSize) \
 496     ((_offset) << ScaleShift ## _elementSize)
 497
 498 /* This can be used to optionally scale a register derived offset
 499    by applying the requisite shift as indicated by the Scaling
 500    argument. the second argument is either Byte, Short, Word
 501    or Long. The third argument is either Scaled or Unscaled.
 502    N.B. when _Scaling is Scaled the shift gets ANDed with
 503    all 1s while when it is Unscaled it gets ANDed with 0.  */
 504
 505 #define OPT_SCALE(_offset, _elementType, _Scaling) \
 506   ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
 507
 508 /* This can be used to zero or sign extend a 32 bit register derived
 509    value to a 64 bit value.  the first argument must be the value as
 510    a uint32_t and the second must be either UXTW or SXTW. The result
 511    is returned as an int64_t.  */
 512
 513 static inline int64_t
 514 extend (uint32_t value, Extension extension)
 515 {
 516   union
 517   {
 518     uint32_t u;
 519     int32_t   n;
 520   } x;
 521
 522   /* A branchless variant of this ought to be possible.  */
 523   if (extension == UXTW || extension == NoExtension)
 524     return value;
 525
 526   x.u = value;
 527   return x.n;
 528 }
 529
 530 /* Scalar Floating Point
 531
 532    FP load/store single register (4 addressing modes)
 533
 534    N.B. the base register (source) can be the stack pointer.
 535    The secondary source register (source2) can only be an Xn register.  */
 536
 537 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 538 static void
 539 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 540 {
 541   unsigned rn = INSTR (9, 5);
 542   unsigned st = INSTR (4, 0);
 543   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 544
 545   if (wb != Post)
 546     address += offset;
 547
 548   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
 549   if (wb == Post)
 550     address += offset;
 551
 552   if (wb != NoWriteBack)
 553     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 554 }
 555
 556 /* Load 8 bit with unsigned 12 bit offset.  */
 557 static void
 558 fldrb_abs (sim_cpu *cpu, uint32_t offset)
 559 {
 560   unsigned rd = INSTR (4, 0);
 561   unsigned rn = INSTR (9, 5);
 562   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
 563
 564   aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 565 }
 566
 567 /* Load 16 bit scaled unsigned 12 bit.  */
 568 static void
 569 fldrh_abs (sim_cpu *cpu, uint32_t offset)
 570 {
 571   unsigned rd = INSTR (4, 0);
 572   unsigned rn = INSTR (9, 5);
 573   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
 574
 575   aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
 576 }
 577
 578 /* Load 32 bit scaled unsigned 12 bit.  */
 579 static void
 580 fldrs_abs (sim_cpu *cpu, uint32_t offset)
 581 {
 582   unsigned rd = INSTR (4, 0);
 583   unsigned rn = INSTR (9, 5);
 584   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
 585
 586   aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 587 }
 588
 589 /* Load 64 bit scaled unsigned 12 bit.  */
 590 static void
 591 fldrd_abs (sim_cpu *cpu, uint32_t offset)
 592 {
 593   unsigned rd = INSTR (4, 0);
 594   unsigned rn = INSTR (9, 5);
 595   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
 596
 597   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 598 }
 599
 600 /* Load 128 bit scaled unsigned 12 bit.  */
 601 static void
 602 fldrq_abs (sim_cpu *cpu, uint32_t offset)
 603 {
 604   unsigned rd = INSTR (4, 0);
 605   unsigned rn = INSTR (9, 5);
 606   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
 607
 608   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 609   aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
 610 }
 611
 612 /* Load 32 bit scaled or unscaled zero- or sign-extended
 613    32-bit register offset.  */
 614 static void
 615 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 616 {
 617   unsigned rm = INSTR (20, 16);
 618   unsigned rn = INSTR (9, 5);
 619   unsigned st = INSTR (4, 0);
 620   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 621   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 622   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
 623
 624   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
 625                        (cpu, address + displacement));
 626 }
 627
 628 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 629 static void
 630 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 631 {
 632   unsigned rn = INSTR (9, 5);
 633   unsigned st = INSTR (4, 0);
 634   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 635
 636   if (wb != Post)
 637     address += offset;
 638
 639   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
 640
 641   if (wb == Post)
 642     address += offset;
 643
 644   if (wb != NoWriteBack)
 645     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 646 }
 647
 648 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset.  */
 649 static void
 650 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 651 {
 652   unsigned rm = INSTR (20, 16);
 653   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 654   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
 655
 656   fldrd_wb (cpu, displacement, NoWriteBack);
 657 }
 658
 659 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback.  */
 660 static void
 661 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 662 {
 663   FRegister a;
 664   unsigned rn = INSTR (9, 5);
 665   unsigned st = INSTR (4, 0);
 666   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 667
 668   if (wb != Post)
 669     address += offset;
 670
 671   aarch64_get_mem_long_double (cpu, address, & a);
 672   aarch64_set_FP_long_double (cpu, st, a);
 673
 674   if (wb == Post)
 675     address += offset;
 676
 677   if (wb != NoWriteBack)
 678     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 679 }
 680
 681 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset  */
 682 static void
 683 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 684 {
 685   unsigned rm = INSTR (20, 16);
 686   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 687   uint64_t displacement = OPT_SCALE (extended, 128, scaling);
 688
 689   fldrq_wb (cpu, displacement, NoWriteBack);
 690 }
 691
 692 /* Memory Access
 693
 694    load-store single register
 695    There are four addressing modes available here which all employ a
 696    64 bit source (base) register.
 697
 698    N.B. the base register (source) can be the stack pointer.
 699    The secondary source register (source2)can only be an Xn register.
 700
 701    Scaled, 12-bit, unsigned immediate offset, without pre- and
 702    post-index options.
 703    Unscaled, 9-bit, signed immediate offset with pre- or post-index
 704    writeback.
 705    scaled or unscaled 64-bit register offset.
 706    scaled or unscaled 32-bit extended register offset.
 707
 708    All offsets are assumed to be raw from the decode i.e. the
 709    simulator is expected to adjust scaled offsets based on the
 710    accessed data size with register or extended register offset
 711    versions the same applies except that in the latter case the
 712    operation may also require a sign extend.
 713
 714    A separate method is provided for each possible addressing mode.  */
 715
 716 /* 32 bit load 32 bit scaled unsigned 12 bit  */
 717 static void
 718 ldr32_abs (sim_cpu *cpu, uint32_t offset)
 719 {
 720   unsigned rn = INSTR (9, 5);
 721   unsigned rt = INSTR (4, 0);
 722
 723   /* The target register may not be SP but the source may be.  */
 724   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 725                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 726                         + SCALE (offset, 32)));
 727 }
 728
 729 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 730 static void
 731 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 732 {
 733   unsigned rn = INSTR (9, 5);
 734   unsigned rt = INSTR (4, 0);
 735   uint64_t address;
 736
 737   if (rn == rt && wb != NoWriteBack)
 738     HALT_UNALLOC;
 739
 740   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 741
 742   if (wb != Post)
 743     address += offset;
 744
 745   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
 746
 747   if (wb == Post)
 748     address += offset;
 749
 750   if (wb != NoWriteBack)
 751     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 752 }
 753
 754 /* 32 bit load 32 bit scaled or unscaled
 755    zero- or sign-extended 32-bit register offset  */
 756 static void
 757 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 758 {
 759   unsigned rm = INSTR (20, 16);
 760   unsigned rn = INSTR (9, 5);
 761   unsigned rt = INSTR (4, 0);
 762   /* rn may reference SP, rm and rt must reference ZR  */
 763
 764   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 765   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 766   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
 767
 768   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 769                        aarch64_get_mem_u32 (cpu, address + displacement));
 770 }
 771
 772 /* 64 bit load 64 bit scaled unsigned 12 bit  */
 773 static void
 774 ldr_abs (sim_cpu *cpu, uint32_t offset)
 775 {
 776   unsigned rn = INSTR (9, 5);
 777   unsigned rt = INSTR (4, 0);
 778
 779   /* The target register may not be SP but the source may be.  */
 780   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 781                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 782                         + SCALE (offset, 64)));
 783 }
 784
 785 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 786 static void
 787 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 788 {
 789   unsigned rn = INSTR (9, 5);
 790   unsigned rt = INSTR (4, 0);
 791   uint64_t address;
 792
 793   if (rn == rt && wb != NoWriteBack)
 794     HALT_UNALLOC;
 795
 796   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 797
 798   if (wb != Post)
 799     address += offset;
 800
 801   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
 802
 803   if (wb == Post)
 804     address += offset;
 805
 806   if (wb != NoWriteBack)
 807     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 808 }
 809
 810 /* 64 bit load 64 bit scaled or unscaled zero-
 811    or sign-extended 32-bit register offset.  */
 812 static void
 813 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 814 {
 815   unsigned rm = INSTR (20, 16);
 816   unsigned rn = INSTR (9, 5);
 817   unsigned rt = INSTR (4, 0);
 818   /* rn may reference SP, rm and rt must reference ZR  */
 819
 820   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 821   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 822   uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
 823
 824   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 825                        aarch64_get_mem_u64 (cpu, address + displacement));
 826 }
 827
 828 /* 32 bit load zero-extended byte scaled unsigned 12 bit.  */
 829 static void
 830 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
 831 {
 832   unsigned rn = INSTR (9, 5);
 833   unsigned rt = INSTR (4, 0);
 834
 835   /* The target register may not be SP but the source may be
 836      there is no scaling required for a byte load.  */
 837   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 838                        aarch64_get_mem_u8
 839                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
 840 }
 841
 842 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback.  */
 843 static void
 844 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 845 {
 846   unsigned rn = INSTR (9, 5);
 847   unsigned rt = INSTR (4, 0);
 848   uint64_t address;
 849
 850   if (rn == rt && wb != NoWriteBack)
 851     HALT_UNALLOC;
 852
 853   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 854
 855   if (wb != Post)
 856     address += offset;
 857
 858   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
 859
 860   if (wb == Post)
 861     address += offset;
 862
 863   if (wb != NoWriteBack)
 864     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 865 }
 866
 867 /* 32 bit load zero-extended byte scaled or unscaled zero-
 868    or sign-extended 32-bit register offset.  */
 869 static void
 870 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 871 {
 872   unsigned rm = INSTR (20, 16);
 873   unsigned rn = INSTR (9, 5);
 874   unsigned rt = INSTR (4, 0);
 875   /* rn may reference SP, rm and rt must reference ZR  */
 876
 877   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 878   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 879                                  extension);
 880
 881   /* There is no scaling required for a byte load.  */
 882   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 883                        aarch64_get_mem_u8 (cpu, address + displacement));
 884 }
 885
 886 /* 64 bit load sign-extended byte unscaled signed 9 bit
 887    with pre- or post-writeback.  */
 888 static void
 889 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 890 {
 891   unsigned rn = INSTR (9, 5);
 892   unsigned rt = INSTR (4, 0);
 893   uint64_t address;
 894
 895   if (rn == rt && wb != NoWriteBack)
 896     HALT_UNALLOC;
 897
 898   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 899
 900   if (wb != Post)
 901     address += offset;
 902
 903   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s8 (cpu, address));
 904
 905   if (wb == Post)
 906     address += offset;
 907
 908   if (wb != NoWriteBack)
 909     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 910 }
 911
 912 /* 64 bit load sign-extended byte scaled unsigned 12 bit.  */
 913 static void
 914 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
 915 {
 916   ldrsb_wb (cpu, offset, NoWriteBack);
 917 }
 918
 919 /* 64 bit load sign-extended byte scaled or unscaled zero-
 920    or sign-extended 32-bit register offset.  */
 921 static void
 922 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 923 {
 924   unsigned rm = INSTR (20, 16);
 925   unsigned rn = INSTR (9, 5);
 926   unsigned rt = INSTR (4, 0);
 927   /* rn may reference SP, rm and rt must reference ZR  */
 928
 929   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 930   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 931                                  extension);
 932   /* There is no scaling required for a byte load.  */
 933   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 934                        aarch64_get_mem_s8 (cpu, address + displacement));
 935 }
 936
 937 /* 32 bit load zero-extended short scaled unsigned 12 bit.  */
 938 static void
 939 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
 940 {
 941   unsigned rn = INSTR (9, 5);
 942   unsigned rt = INSTR (4, 0);
 943
 944   /* The target register may not be SP but the source may be.  */
 945   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16
 946                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 947                         + SCALE (offset, 16)));
 948 }
 949
 950 /* 32 bit load zero-extended short unscaled signed 9 bit
 951    with pre- or post-writeback.  */
 952 static void
 953 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 954 {
 955   unsigned rn = INSTR (9, 5);
 956   unsigned rt = INSTR (4, 0);
 957   uint64_t address;
 958
 959   if (rn == rt && wb != NoWriteBack)
 960     HALT_UNALLOC;
 961
 962   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 963
 964   if (wb != Post)
 965     address += offset;
 966
 967   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
 968
 969   if (wb == Post)
 970     address += offset;
 971
 972   if (wb != NoWriteBack)
 973     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 974 }
 975
 976 /* 32 bit load zero-extended short scaled or unscaled zero-
 977    or sign-extended 32-bit register offset.  */
 978 static void
 979 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 980 {
 981   unsigned rm = INSTR (20, 16);
 982   unsigned rn = INSTR (9, 5);
 983   unsigned rt = INSTR (4, 0);
 984   /* rn may reference SP, rm and rt must reference ZR  */
 985
 986   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 987   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 988   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
 989
 990   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 991                        aarch64_get_mem_u16 (cpu, address + displacement));
 992 }
 993
 994 /* 32 bit load sign-extended short scaled unsigned 12 bit.  */
 995 static void
 996 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
 997 {
 998   unsigned rn = INSTR (9, 5);
 999   unsigned rt = INSTR (4, 0);
1000
1001   /* The target register may not be SP but the source may be.  */
1002   aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s16
1003                        (cpu,
1004                         aarch64_get_reg_u64 (cpu, rn, SP_OK)
1005                         + SCALE (offset, 16)));
1006 }
1007
1008 /* 32 bit load sign-extended short unscaled signed 9 bit
1009    with pre- or post-writeback.  */
1010 static void
1011 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1012 {
1013   unsigned rn = INSTR (9, 5);
1014   unsigned rt = INSTR (4, 0);
1015   uint64_t address;
1016
1017   if (rn == rt && wb != NoWriteBack)
1018     HALT_UNALLOC;
1019
1020   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1021
1022   if (wb != Post)
1023     address += offset;
1024
1025   aarch64_set_reg_u64 (cpu, rt, NO_SP,
1026                        (uint32_t) aarch64_get_mem_s16 (cpu, address));
1027
1028   if (wb == Post)
1029     address += offset;
1030
1031   if (wb != NoWriteBack)
1032     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1033 }
1034
1035 /* 32 bit load sign-extended short scaled or unscaled zero-
1036    or sign-extended 32-bit register offset.  */
1037 static void
1038 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1039 {
1040   unsigned rm = INSTR (20, 16);
1041   unsigned rn = INSTR (9, 5);
1042   unsigned rt = INSTR (4, 0);
1043   /* rn may reference SP, rm and rt must reference ZR  */
1044
1045   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1046   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1047   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1048
1049   aarch64_set_reg_u64 (cpu, rt, NO_SP,
1050                        (uint32_t) aarch64_get_mem_s16
1051                        (cpu, address + displacement));
1052 }
1053
1054 /* 64 bit load sign-extended short scaled unsigned 12 bit.  */
1055 static void
1056 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1057 {
1058   unsigned rn = INSTR (9, 5);
1059   unsigned rt = INSTR (4, 0);
1060
1061   /* The target register may not be SP but the source may be.  */
1062   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16
1063                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1064                         + SCALE (offset, 16)));
1065 }
1066
1067 /* 64 bit load sign-extended short unscaled signed 9 bit
1068    with pre- or post-writeback.  */
1069 static void
1070 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1071 {
1072   unsigned rn = INSTR (9, 5);
1073   unsigned rt = INSTR (4, 0);
1074   uint64_t address;
1075
1076   if (rn == rt && wb != NoWriteBack)
1077     HALT_UNALLOC;
1078
1079   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1080
1081   if (wb != Post)
1082     address += offset;
1083
1084   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16 (cpu, address));
1085
1086   if (wb == Post)
1087     address += offset;
1088
1089   if (wb != NoWriteBack)
1090     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1091 }
1092
1093 /* 64 bit load sign-extended short scaled or unscaled zero-
1094    or sign-extended 32-bit register offset.  */
1095 static void
1096 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1097 {
1098   unsigned rm = INSTR (20, 16);
1099   unsigned rn = INSTR (9, 5);
1100   unsigned rt = INSTR (4, 0);
1101   /* rn may reference SP, rm and rt must reference ZR  */
1102
1103   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1104   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1105   uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1106
1107   aarch64_set_reg_u64 (cpu, rt, NO_SP,
1108                        aarch64_get_mem_s16 (cpu, address + displacement));
1109 }
1110
1111 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit.  */
1112 static void
1113 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1114 {
1115   unsigned rn = INSTR (9, 5);
1116   unsigned rt = INSTR (4, 0);
1117
1118   /* The target register may not be SP but the source may be.  */
1119   return aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32
1120                               (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1121                                + SCALE (offset, 32)));
1122 }
1123
1124 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1125    with pre- or post-writeback.  */
1126 static void
1127 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1128 {
1129   unsigned rn = INSTR (9, 5);
1130   unsigned rt = INSTR (4, 0);
1131   uint64_t address;
1132
1133   if (rn == rt && wb != NoWriteBack)
1134     HALT_UNALLOC;
1135
1136   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1137
1138   if (wb != Post)
1139     address += offset;
1140
1141   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1142
1143   if (wb == Post)
1144     address += offset;
1145
1146   if (wb != NoWriteBack)
1147     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1148 }
1149
1150 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1151    or sign-extended 32-bit register offset.  */
1152 static void
1153 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1154 {
1155   unsigned rm = INSTR (20, 16);
1156   unsigned rn = INSTR (9, 5);
1157   unsigned rt = INSTR (4, 0);
1158   /* rn may reference SP, rm and rt must reference ZR  */
1159
1160   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1161   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1162   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
1163
1164   aarch64_set_reg_s64 (cpu, rt, NO_SP,
1165                        aarch64_get_mem_s32 (cpu, address + displacement));
1166 }
1167
1168 /* N.B. with stores the value in source is written to the
1169    address identified by source2 modified by source3/offset.  */
1170
1171 /* 32 bit store scaled unsigned 12 bit.  */
1172 static void
1173 str32_abs (sim_cpu *cpu, uint32_t offset)
1174 {
1175   unsigned rn = INSTR (9, 5);
1176   unsigned rt = INSTR (4, 0);
1177
1178   /* The target register may not be SP but the source may be.  */
1179   aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1180                              + SCALE (offset, 32)),
1181                        aarch64_get_reg_u32 (cpu, rt, NO_SP));
1182 }
1183
1184 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1185 static void
1186 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1187 {
1188   unsigned rn = INSTR (9, 5);
1189   unsigned rt = INSTR (4, 0);
1190   uint64_t address;
1191
1192   if (rn == rt && wb != NoWriteBack)
1193     HALT_UNALLOC;
1194
1195   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1196   if (wb != Post)
1197     address += offset;
1198
1199   aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1200
1201   if (wb == Post)
1202     address += offset;
1203
1204   if (wb != NoWriteBack)
1205     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1206 }
1207
1208 /* 32 bit store scaled or unscaled zero- or
1209    sign-extended 32-bit register offset.  */
1210 static void
1211 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1212 {
1213   unsigned rm = INSTR (20, 16);
1214   unsigned rn = INSTR (9, 5);
1215   unsigned rt = INSTR (4, 0);
1216
1217   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218   int64_t  extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221   aarch64_set_mem_u32 (cpu, address + displacement,
1222                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1223 }
1224
1225 /* 64 bit store scaled unsigned 12 bit.  */
1226 static void
1227 str_abs (sim_cpu *cpu, uint32_t offset)
1228 {
1229   unsigned rn = INSTR (9, 5);
1230   unsigned rt = INSTR (4, 0);
1231
1232   aarch64_set_mem_u64 (cpu,
1233                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
1234                        + SCALE (offset, 64),
1235                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1236 }
1237
1238 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1239 static void
1240 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1241 {
1242   unsigned rn = INSTR (9, 5);
1243   unsigned rt = INSTR (4, 0);
1244   uint64_t address;
1245
1246   if (rn == rt && wb != NoWriteBack)
1247     HALT_UNALLOC;
1248
1249   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1250
1251   if (wb != Post)
1252     address += offset;
1253
1254   aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1255
1256   if (wb == Post)
1257     address += offset;
1258
1259   if (wb != NoWriteBack)
1260     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1261 }
1262
1263 /* 64 bit store scaled or unscaled zero-
1264    or sign-extended 32-bit register offset.  */
1265 static void
1266 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1267 {
1268   unsigned rm = INSTR (20, 16);
1269   unsigned rn = INSTR (9, 5);
1270   unsigned rt = INSTR (4, 0);
1271   /* rn may reference SP, rm and rt must reference ZR  */
1272
1273   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1274   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1275                                extension);
1276   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1277
1278   aarch64_set_mem_u64 (cpu, address + displacement,
1279                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1280 }
1281
1282 /* 32 bit store byte scaled unsigned 12 bit.  */
1283 static void
1284 strb_abs (sim_cpu *cpu, uint32_t offset)
1285 {
1286   unsigned rn = INSTR (9, 5);
1287   unsigned rt = INSTR (4, 0);
1288
1289   /* The target register may not be SP but the source may be.
1290      There is no scaling required for a byte load.  */
1291   aarch64_set_mem_u8 (cpu,
1292                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1293                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1294 }
1295
1296 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback.  */
1297 static void
1298 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1299 {
1300   unsigned rn = INSTR (9, 5);
1301   unsigned rt = INSTR (4, 0);
1302   uint64_t address;
1303
1304   if (rn == rt && wb != NoWriteBack)
1305     HALT_UNALLOC;
1306
1307   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1308
1309   if (wb != Post)
1310     address += offset;
1311
1312   aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1313
1314   if (wb == Post)
1315     address += offset;
1316
1317   if (wb != NoWriteBack)
1318     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1319 }
1320
1321 /* 32 bit store byte scaled or unscaled zero-
1322    or sign-extended 32-bit register offset.  */
1323 static void
1324 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1325 {
1326   unsigned rm = INSTR (20, 16);
1327   unsigned rn = INSTR (9, 5);
1328   unsigned rt = INSTR (4, 0);
1329   /* rn may reference SP, rm and rt must reference ZR  */
1330
1331   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1332   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1333                                  extension);
1334
1335   /* There is no scaling required for a byte load.  */
1336   aarch64_set_mem_u8 (cpu, address + displacement,
1337                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1338 }
1339
1340 /* 32 bit store short scaled unsigned 12 bit.  */
1341 static void
1342 strh_abs (sim_cpu *cpu, uint32_t offset)
1343 {
1344   unsigned rn = INSTR (9, 5);
1345   unsigned rt = INSTR (4, 0);
1346
1347   /* The target register may not be SP but the source may be.  */
1348   aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1349                        + SCALE (offset, 16),
1350                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1351 }
1352
1353 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback.  */
1354 static void
1355 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1356 {
1357   unsigned rn = INSTR (9, 5);
1358   unsigned rt = INSTR (4, 0);
1359   uint64_t address;
1360
1361   if (rn == rt && wb != NoWriteBack)
1362     HALT_UNALLOC;
1363
1364   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1365
1366   if (wb != Post)
1367     address += offset;
1368
1369   aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1370
1371   if (wb == Post)
1372     address += offset;
1373
1374   if (wb != NoWriteBack)
1375     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1376 }
1377
1378 /* 32 bit store short scaled or unscaled zero-
1379    or sign-extended 32-bit register offset.  */
1380 static void
1381 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1382 {
1383   unsigned rm = INSTR (20, 16);
1384   unsigned rn = INSTR (9, 5);
1385   unsigned rt = INSTR (4, 0);
1386   /* rn may reference SP, rm and rt must reference ZR  */
1387
1388   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1389   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1390   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1391
1392   aarch64_set_mem_u16 (cpu, address + displacement,
1393                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1394 }
1395
1396 /* Prefetch unsigned 12 bit.  */
1397 static void
1398 prfm_abs (sim_cpu *cpu, uint32_t offset)
1399 {
1400   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1401                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1402                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1403                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1404                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1405                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1406                           ow ==> UNALLOC
1407      PrfOp prfop = prfop (instr, 4, 0);
1408      uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1409      + SCALE (offset, 64).  */
1410
1411   /* TODO : implement prefetch of address.  */
1412 }
1413
1414 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset.  */
1415 static void
1416 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1417 {
1418   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1419                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1420                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1421                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1422                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1423                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1424                           ow ==> UNALLOC
1425      rn may reference SP, rm may only reference ZR
1426      PrfOp prfop = prfop (instr, 4, 0);
1427      uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1428      int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1429                                 extension);
1430      uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
1431      uint64_t address = base + displacement.  */
1432
1433   /* TODO : implement prefetch of address  */
1434 }
1435
1436 /* 64 bit pc-relative prefetch.  */
1437 static void
1438 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1439 {
1440   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1441                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1442                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1443                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1444                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1445                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1446                           ow ==> UNALLOC
1447      PrfOp prfop = prfop (instr, 4, 0);
1448      uint64_t address = aarch64_get_PC (cpu) + offset.  */
1449
1450   /* TODO : implement this  */
1451 }
1452
1453 /* Load-store exclusive.  */
1454
1455 static void
1456 ldxr (sim_cpu *cpu)
1457 {
1458   unsigned rn = INSTR (9, 5);
1459   unsigned rt = INSTR (4, 0);
1460   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1461   int size = INSTR (31, 30);
1462   /* int ordered = INSTR (15, 15);  */
1463   /* int exclusive = ! INSTR (23, 23);  */
1464
1465   switch (size)
1466     {
1467     case 0:
1468       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1469       break;
1470     case 1:
1471       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1472       break;
1473     case 2:
1474       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1475       break;
1476     case 3:
1477       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1478       break;
1479     }
1480 }
1481
1482 static void
1483 stxr (sim_cpu *cpu)
1484 {
1485   unsigned rn = INSTR (9, 5);
1486   unsigned rt = INSTR (4, 0);
1487   unsigned rs = INSTR (20, 16);
1488   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1489   int      size = INSTR (31, 30);
1490   uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1491
1492   switch (size)
1493     {
1494     case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1495     case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1496     case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1497     case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1498     }
1499
1500   aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive...  */
1501 }
1502
1503 static void
1504 dexLoadLiteral (sim_cpu *cpu)
1505 {
1506   /* instr[29,27] == 011
1507      instr[25,24] == 00
1508      instr[31,30:26] = opc: 000 ==> LDRW,  001 ==> FLDRS
1509                             010 ==> LDRX,  011 ==> FLDRD
1510                             100 ==> LDRSW, 101 ==> FLDRQ
1511                             110 ==> PRFM, 111 ==> UNALLOC
1512      instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1513      instr[23, 5] == simm19  */
1514
1515   /* unsigned rt = INSTR (4, 0);  */
1516   uint32_t dispatch = ( (INSTR (31, 30) << 1)
1517                         | INSTR (26, 26));
1518   int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1519
1520   switch (dispatch)
1521     {
1522     case 0: ldr32_pcrel (cpu, imm); break;
1523     case 1: fldrs_pcrel (cpu, imm); break;
1524     case 2: ldr_pcrel   (cpu, imm); break;
1525     case 3: fldrd_pcrel (cpu, imm); break;
1526     case 4: ldrsw_pcrel (cpu, imm); break;
1527     case 5: fldrq_pcrel (cpu, imm); break;
1528     case 6: prfm_pcrel  (cpu, imm); break;
1529     case 7:
1530     default:
1531       HALT_UNALLOC;
1532     }
1533 }
1534
1535 /* Immediate arithmetic
1536    The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1537    value left shifted by 12 bits (done at decode).
1538
1539    N.B. the register args (dest, source) can normally be Xn or SP.
1540    the exception occurs for flag setting instructions which may
1541    only use Xn for the output (dest).  */
1542
1543 /* 32 bit add immediate.  */
1544 static void
1545 add32 (sim_cpu *cpu, uint32_t aimm)
1546 {
1547   unsigned rn = INSTR (9, 5);
1548   unsigned rd = INSTR (4, 0);
1549
1550   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1551                        aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1552 }
1553
1554 /* 64 bit add immediate.  */
1555 static void
1556 add64 (sim_cpu *cpu, uint32_t aimm)
1557 {
1558   unsigned rn = INSTR (9, 5);
1559   unsigned rd = INSTR (4, 0);
1560
1561   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1562                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1563 }
1564
1565 static void
1566 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1567 {
1568   int32_t   result = value1 + value2;
1569   int64_t   sresult = (int64_t) value1 + (int64_t) value2;
1570   uint64_t  uresult = (uint64_t)(uint32_t) value1
1571     + (uint64_t)(uint32_t) value2;
1572   uint32_t  flags = 0;
1573
1574   if (result == 0)
1575     flags |= Z;
1576
1577   if (result & (1 << 31))
1578     flags |= N;
1579
1580   if (uresult != result)
1581     flags |= C;
1582
1583   if (sresult != result)
1584     flags |= V;
1585
1586   aarch64_set_CPSR (cpu, flags);
1587 }
1588
1589 static void
1590 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1591 {
1592   int64_t   sval1 = value1;
1593   int64_t   sval2 = value2;
1594   uint64_t  result = value1 + value2;
1595   int64_t   sresult = sval1 + sval2;
1596   uint32_t  flags = 0;
1597
1598   if (result == 0)
1599     flags |= Z;
1600
1601   if (result & (1ULL << 63))
1602     flags |= N;
1603
1604   if (sval1 < 0)
1605     {
1606       if (sval2 < 0)
1607         {
1608           /* Negative plus a negative.  Overflow happens if
1609              the result is greater than either of the operands.  */
1610           if (sresult > sval1 || sresult > sval2)
1611             flags |= V;
1612         }
1613       /* else Negative plus a positive.  Overflow cannot happen.  */
1614     }
1615   else /* value1 is +ve.  */
1616     {
1617       if (sval2 < 0)
1618         {
1619           /* Overflow can only occur if we computed "0 - MININT".  */
1620           if (sval1 == 0 && sval2 == (1LL << 63))
1621             flags |= V;
1622         }
1623       else
1624         {
1625           /* Postive plus positive - overflow has happened if the
1626              result is smaller than either of the operands.  */
1627           if (result < value1 || result < value2)
1628             flags |= V | C;
1629         }
1630     }
1631
1632   aarch64_set_CPSR (cpu, flags);
1633 }
1634
1635 #define NEG(a) (((a) & signbit) == signbit)
1636 #define POS(a) (((a) & signbit) == 0)
1637
1638 static void
1639 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1640 {
1641   uint32_t result = value1 - value2;
1642   uint32_t flags = 0;
1643   uint32_t signbit = 1U << 31;
1644
1645   if (result == 0)
1646     flags |= Z;
1647
1648   if (NEG (result))
1649     flags |= N;
1650
1651   if (   (NEG (value1) && POS (value2))
1652       || (NEG (value1) && POS (result))
1653       || (POS (value2) && POS (result)))
1654     flags |= C;
1655
1656   if (   (NEG (value1) && POS (value2) && POS (result))
1657       || (POS (value1) && NEG (value2) && NEG (result)))
1658     flags |= V;
1659
1660   aarch64_set_CPSR (cpu, flags);
1661 }
1662
1663 static void
1664 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1665 {
1666   uint64_t result = value1 - value2;
1667   uint32_t flags = 0;
1668   uint64_t signbit = 1ULL << 63;
1669
1670   if (result == 0)
1671     flags |= Z;
1672
1673   if (NEG (result))
1674     flags |= N;
1675
1676   if (   (NEG (value1) && POS (value2))
1677       || (NEG (value1) && POS (result))
1678       || (POS (value2) && POS (result)))
1679     flags |= C;
1680
1681   if (   (NEG (value1) && POS (value2) && POS (result))
1682       || (POS (value1) && NEG (value2) && NEG (result)))
1683     flags |= V;
1684
1685   aarch64_set_CPSR (cpu, flags);
1686 }
1687
1688 static void
1689 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1690 {
1691   uint32_t flags = 0;
1692
1693   if (result == 0)
1694     flags |= Z;
1695   else
1696     flags &= ~ Z;
1697
1698   if (result & (1 << 31))
1699     flags |= N;
1700   else
1701     flags &= ~ N;
1702
1703   aarch64_set_CPSR (cpu, flags);
1704 }
1705
1706 static void
1707 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1708 {
1709   uint32_t flags = 0;
1710
1711   if (result == 0)
1712     flags |= Z;
1713   else
1714     flags &= ~ Z;
1715
1716   if (result & (1ULL << 63))
1717     flags |= N;
1718   else
1719     flags &= ~ N;
1720
1721   aarch64_set_CPSR (cpu, flags);
1722 }
1723
1724 /* 32 bit add immediate set flags.  */
1725 static void
1726 adds32 (sim_cpu *cpu, uint32_t aimm)
1727 {
1728   unsigned rn = INSTR (9, 5);
1729   unsigned rd = INSTR (4, 0);
1730   /* TODO : do we need to worry about signs here?  */
1731   int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1732
1733   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1734   set_flags_for_add32 (cpu, value1, aimm);
1735 }
1736
1737 /* 64 bit add immediate set flags.  */
1738 static void
1739 adds64 (sim_cpu *cpu, uint32_t aimm)
1740 {
1741   unsigned rn = INSTR (9, 5);
1742   unsigned rd = INSTR (4, 0);
1743   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1744   uint64_t value2 = aimm;
1745
1746   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1747   set_flags_for_add64 (cpu, value1, value2);
1748 }
1749
1750 /* 32 bit sub immediate.  */
1751 static void
1752 sub32 (sim_cpu *cpu, uint32_t aimm)
1753 {
1754   unsigned rn = INSTR (9, 5);
1755   unsigned rd = INSTR (4, 0);
1756
1757   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1758                        aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1759 }
1760
1761 /* 64 bit sub immediate.  */
1762 static void
1763 sub64 (sim_cpu *cpu, uint32_t aimm)
1764 {
1765   unsigned rn = INSTR (9, 5);
1766   unsigned rd = INSTR (4, 0);
1767
1768   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1769                        aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1770 }
1771
1772 /* 32 bit sub immediate set flags.  */
1773 static void
1774 subs32 (sim_cpu *cpu, uint32_t aimm)
1775 {
1776   unsigned rn = INSTR (9, 5);
1777   unsigned rd = INSTR (4, 0);
1778   uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1779   uint32_t value2 = aimm;
1780
1781   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1782   set_flags_for_sub32 (cpu, value1, value2);
1783 }
1784
1785 /* 64 bit sub immediate set flags.  */
1786 static void
1787 subs64 (sim_cpu *cpu, uint32_t aimm)
1788 {
1789   unsigned rn = INSTR (9, 5);
1790   unsigned rd = INSTR (4, 0);
1791   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1792   uint32_t value2 = aimm;
1793
1794   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1795   set_flags_for_sub64 (cpu, value1, value2);
1796 }
1797
1798 /* Data Processing Register.  */
1799
1800 /* First two helpers to perform the shift operations.  */
1801
1802 static inline uint32_t
1803 shifted32 (uint32_t value, Shift shift, uint32_t count)
1804 {
1805   switch (shift)
1806     {
1807     default:
1808     case LSL:
1809       return (value << count);
1810     case LSR:
1811       return (value >> count);
1812     case ASR:
1813       {
1814         int32_t svalue = value;
1815         return (svalue >> count);
1816       }
1817     case ROR:
1818       {
1819         uint32_t top = value >> count;
1820         uint32_t bottom = value << (32 - count);
1821         return (bottom | top);
1822       }
1823     }
1824 }
1825
1826 static inline uint64_t
1827 shifted64 (uint64_t value, Shift shift, uint32_t count)
1828 {
1829   switch (shift)
1830     {
1831     default:
1832     case LSL:
1833       return (value << count);
1834     case LSR:
1835       return (value >> count);
1836     case ASR:
1837       {
1838         int64_t svalue = value;
1839         return (svalue >> count);
1840       }
1841     case ROR:
1842       {
1843         uint64_t top = value >> count;
1844         uint64_t bottom = value << (64 - count);
1845         return (bottom | top);
1846       }
1847     }
1848 }
1849
1850 /* Arithmetic shifted register.
1851    These allow an optional LSL, ASR or LSR to the second source
1852    register with a count up to the register bit count.
1853
1854    N.B register args may not be SP.  */
1855
1856 /* 32 bit ADD shifted register.  */
1857 static void
1858 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1859 {
1860   unsigned rm = INSTR (20, 16);
1861   unsigned rn = INSTR (9, 5);
1862   unsigned rd = INSTR (4, 0);
1863
1864   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1865                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1866                        + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1867                                     shift, count));
1868 }
1869
1870 /* 64 bit ADD shifted register.  */
1871 static void
1872 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1873 {
1874   unsigned rm = INSTR (20, 16);
1875   unsigned rn = INSTR (9, 5);
1876   unsigned rd = INSTR (4, 0);
1877
1878   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1879                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1880                        + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1881                                     shift, count));
1882 }
1883
1884 /* 32 bit ADD shifted register setting flags.  */
1885 static void
1886 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1887 {
1888   unsigned rm = INSTR (20, 16);
1889   unsigned rn = INSTR (9, 5);
1890   unsigned rd = INSTR (4, 0);
1891
1892   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1893   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1894                                shift, count);
1895
1896   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1897   set_flags_for_add32 (cpu, value1, value2);
1898 }
1899
1900 /* 64 bit ADD shifted register setting flags.  */
1901 static void
1902 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1903 {
1904   unsigned rm = INSTR (20, 16);
1905   unsigned rn = INSTR (9, 5);
1906   unsigned rd = INSTR (4, 0);
1907
1908   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1909   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1910                                shift, count);
1911
1912   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1913   set_flags_for_add64 (cpu, value1, value2);
1914 }
1915
1916 /* 32 bit SUB shifted register.  */
1917 static void
1918 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1919 {
1920   unsigned rm = INSTR (20, 16);
1921   unsigned rn = INSTR (9, 5);
1922   unsigned rd = INSTR (4, 0);
1923
1924   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1925                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1926                        - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1927                                     shift, count));
1928 }
1929
1930 /* 64 bit SUB shifted register.  */
1931 static void
1932 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1933 {
1934   unsigned rm = INSTR (20, 16);
1935   unsigned rn = INSTR (9, 5);
1936   unsigned rd = INSTR (4, 0);
1937
1938   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940                        - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941                                     shift, count));
1942 }
1943
1944 /* 32 bit SUB shifted register setting flags.  */
1945 static void
1946 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948   unsigned rm = INSTR (20, 16);
1949   unsigned rn = INSTR (9, 5);
1950   unsigned rd = INSTR (4, 0);
1951
1952   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954                               shift, count);
1955
1956   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1957   set_flags_for_sub32 (cpu, value1, value2);
1958 }
1959
1960 /* 64 bit SUB shifted register setting flags.  */
1961 static void
1962 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1963 {
1964   unsigned rm = INSTR (20, 16);
1965   unsigned rn = INSTR (9, 5);
1966   unsigned rd = INSTR (4, 0);
1967
1968   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1969   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1970                                shift, count);
1971
1972   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1973   set_flags_for_sub64 (cpu, value1, value2);
1974 }
1975
1976 /* First a couple more helpers to fetch the
1977    relevant source register element either
1978    sign or zero extended as required by the
1979    extension value.  */
1980
1981 static uint32_t
1982 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
1983 {
1984   switch (extension)
1985     {
1986     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
1987     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
1988     case UXTW: /* Fall through.  */
1989     case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
1990     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
1991     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
1992     case SXTW: /* Fall through.  */
1993     case SXTX: /* Fall through.  */
1994     default:   return aarch64_get_reg_s32 (cpu, lo, NO_SP);
1995   }
1996 }
1997
1998 static uint64_t
1999 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2000 {
2001   switch (extension)
2002     {
2003     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2004     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2005     case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2006     case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2007     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2008     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2009     case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2010     case SXTX:
2011     default:   return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2012     }
2013 }
2014
2015 /* Arithmetic extending register
2016    These allow an optional sign extension of some portion of the
2017    second source register followed by an optional left shift of
2018    between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2019
2020    N.B output (dest) and first input arg (source) may normally be Xn
2021    or SP. However, for flag setting operations dest can only be
2022    Xn. Second input registers are always Xn.  */
2023
2024 /* 32 bit ADD extending register.  */
2025 static void
2026 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2027 {
2028   unsigned rm = INSTR (20, 16);
2029   unsigned rn = INSTR (9, 5);
2030   unsigned rd = INSTR (4, 0);
2031
2032   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2033                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2034                        + (extreg32 (cpu, rm, extension) << shift));
2035 }
2036
2037 /* 64 bit ADD extending register.
2038    N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2039 static void
2040 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2041 {
2042   unsigned rm = INSTR (20, 16);
2043   unsigned rn = INSTR (9, 5);
2044   unsigned rd = INSTR (4, 0);
2045
2046   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2047                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2048                        + (extreg64 (cpu, rm, extension) << shift));
2049 }
2050
2051 /* 32 bit ADD extending register setting flags.  */
2052 static void
2053 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2054 {
2055   unsigned rm = INSTR (20, 16);
2056   unsigned rn = INSTR (9, 5);
2057   unsigned rd = INSTR (4, 0);
2058
2059   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2060   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2061
2062   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2063   set_flags_for_add32 (cpu, value1, value2);
2064 }
2065
2066 /* 64 bit ADD extending register setting flags  */
2067 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2068 static void
2069 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2070 {
2071   unsigned rm = INSTR (20, 16);
2072   unsigned rn = INSTR (9, 5);
2073   unsigned rd = INSTR (4, 0);
2074
2075   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2076   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2077
2078   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2079   set_flags_for_add64 (cpu, value1, value2);
2080 }
2081
2082 /* 32 bit SUB extending register.  */
2083 static void
2084 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2085 {
2086   unsigned rm = INSTR (20, 16);
2087   unsigned rn = INSTR (9, 5);
2088   unsigned rd = INSTR (4, 0);
2089
2090   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2091                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2092                        - (extreg32 (cpu, rm, extension) << shift));
2093 }
2094
2095 /* 64 bit SUB extending register.  */
2096 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2097 static void
2098 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2099 {
2100   unsigned rm = INSTR (20, 16);
2101   unsigned rn = INSTR (9, 5);
2102   unsigned rd = INSTR (4, 0);
2103
2104   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2105                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2106                        - (extreg64 (cpu, rm, extension) << shift));
2107 }
2108
2109 /* 32 bit SUB extending register setting flags.  */
2110 static void
2111 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2112 {
2113   unsigned rm = INSTR (20, 16);
2114   unsigned rn = INSTR (9, 5);
2115   unsigned rd = INSTR (4, 0);
2116
2117   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2118   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2119
2120   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2121   set_flags_for_sub32 (cpu, value1, value2);
2122 }
2123
2124 /* 64 bit SUB extending register setting flags  */
2125 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2126 static void
2127 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2128 {
2129   unsigned rm = INSTR (20, 16);
2130   unsigned rn = INSTR (9, 5);
2131   unsigned rd = INSTR (4, 0);
2132
2133   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2134   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2135
2136   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2137   set_flags_for_sub64 (cpu, value1, value2);
2138 }
2139
2140 static void
2141 dexAddSubtractImmediate (sim_cpu *cpu)
2142 {
2143   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2144      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2145      instr[29]    = set : 0 ==> no flags, 1 ==> set flags
2146      instr[28,24] = 10001
2147      instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2148      instr[21,10] = uimm12
2149      instr[9,5]   = Rn
2150      instr[4,0]   = Rd  */
2151
2152   /* N.B. the shift is applied at decode before calling the add/sub routine.  */
2153   uint32_t shift = INSTR (23, 22);
2154   uint32_t imm = INSTR (21, 10);
2155   uint32_t dispatch = INSTR (31, 29);
2156
2157   NYI_assert (28, 24, 0x11);
2158
2159   if (shift > 1)
2160     HALT_UNALLOC;
2161
2162   if (shift)
2163     imm <<= 12;
2164
2165   switch (dispatch)
2166     {
2167     case 0: add32 (cpu, imm); break;
2168     case 1: adds32 (cpu, imm); break;
2169     case 2: sub32 (cpu, imm); break;
2170     case 3: subs32 (cpu, imm); break;
2171     case 4: add64 (cpu, imm); break;
2172     case 5: adds64 (cpu, imm); break;
2173     case 6: sub64 (cpu, imm); break;
2174     case 7: subs64 (cpu, imm); break;
2175     }
2176 }
2177
2178 static void
2179 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2180 {
2181   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2182      instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2183      instr[28,24] = 01011
2184      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2185      instr[21]    = 0
2186      instr[20,16] = Rm
2187      instr[15,10] = count : must be 0xxxxx for 32 bit
2188      instr[9,5]   = Rn
2189      instr[4,0]   = Rd  */
2190
2191   uint32_t size = INSTR (31, 31);
2192   uint32_t count = INSTR (15, 10);
2193   Shift shiftType = INSTR (23, 22);
2194
2195   NYI_assert (28, 24, 0x0B);
2196   NYI_assert (21, 21, 0);
2197
2198   /* Shift encoded as ROR is unallocated.  */
2199   if (shiftType == ROR)
2200     HALT_UNALLOC;
2201
2202   /* 32 bit operations must have count[5] = 0
2203      or else we have an UNALLOC.  */
2204   if (size == 0 && uimm (count, 5, 5))
2205     HALT_UNALLOC;
2206
2207   /* Dispatch on size:op i.e instr [31,29].  */
2208   switch (INSTR (31, 29))
2209     {
2210     case 0: add32_shift  (cpu, shiftType, count); break;
2211     case 1: adds32_shift (cpu, shiftType, count); break;
2212     case 2: sub32_shift  (cpu, shiftType, count); break;
2213     case 3: subs32_shift (cpu, shiftType, count); break;
2214     case 4: add64_shift  (cpu, shiftType, count); break;
2215     case 5: adds64_shift (cpu, shiftType, count); break;
2216     case 6: sub64_shift  (cpu, shiftType, count); break;
2217     case 7: subs64_shift (cpu, shiftType, count); break;
2218     }
2219 }
2220
2221 static void
2222 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2223 {
2224   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2225      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2226      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2227      instr[28,24] = 01011
2228      instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2229      instr[21]    = 1
2230      instr[20,16] = Rm
2231      instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2232                              000 ==> LSL|UXTW, 001 ==> UXTZ,
2233                              000 ==> SXTB, 001 ==> SXTH,
2234                              000 ==> SXTW, 001 ==> SXTX,
2235      instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2236      instr[9,5]   = Rn
2237      instr[4,0]   = Rd  */
2238
2239   Extension extensionType = INSTR (15, 13);
2240   uint32_t shift = INSTR (12, 10);
2241
2242   NYI_assert (28, 24, 0x0B);
2243   NYI_assert (21, 21, 1);
2244
2245   /* Shift may not exceed 4.  */
2246   if (shift > 4)
2247     HALT_UNALLOC;
2248
2249   /* Dispatch on size:op:set?.  */
2250   switch (INSTR (31, 29))
2251     {
2252     case 0: add32_ext  (cpu, extensionType, shift); break;
2253     case 1: adds32_ext (cpu, extensionType, shift); break;
2254     case 2: sub32_ext  (cpu, extensionType, shift); break;
2255     case 3: subs32_ext (cpu, extensionType, shift); break;
2256     case 4: add64_ext  (cpu, extensionType, shift); break;
2257     case 5: adds64_ext (cpu, extensionType, shift); break;
2258     case 6: sub64_ext  (cpu, extensionType, shift); break;
2259     case 7: subs64_ext (cpu, extensionType, shift); break;
2260     }
2261 }
2262
2263 /* Conditional data processing
2264    Condition register is implicit 3rd source.  */
2265
2266 /* 32 bit add with carry.  */
2267 /* N.B register args may not be SP.  */
2268
2269 static void
2270 adc32 (sim_cpu *cpu)
2271 {
2272   unsigned rm = INSTR (20, 16);
2273   unsigned rn = INSTR (9, 5);
2274   unsigned rd = INSTR (4, 0);
2275
2276   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2277                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2278                        + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2279                        + IS_SET (C));
2280 }
2281
2282 /* 64 bit add with carry  */
2283 static void
2284 adc64 (sim_cpu *cpu)
2285 {
2286   unsigned rm = INSTR (20, 16);
2287   unsigned rn = INSTR (9, 5);
2288   unsigned rd = INSTR (4, 0);
2289
2290   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2291                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2292                        + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2293                        + IS_SET (C));
2294 }
2295
2296 /* 32 bit add with carry setting flags.  */
2297 static void
2298 adcs32 (sim_cpu *cpu)
2299 {
2300   unsigned rm = INSTR (20, 16);
2301   unsigned rn = INSTR (9, 5);
2302   unsigned rd = INSTR (4, 0);
2303
2304   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2305   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2306   uint32_t carry = IS_SET (C);
2307
2308   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2309   set_flags_for_add32 (cpu, value1, value2 + carry);
2310 }
2311
2312 /* 64 bit add with carry setting flags.  */
2313 static void
2314 adcs64 (sim_cpu *cpu)
2315 {
2316   unsigned rm = INSTR (20, 16);
2317   unsigned rn = INSTR (9, 5);
2318   unsigned rd = INSTR (4, 0);
2319
2320   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2321   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2322   uint64_t carry = IS_SET (C);
2323
2324   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2325   set_flags_for_add64 (cpu, value1, value2 + carry);
2326 }
2327
2328 /* 32 bit sub with carry.  */
2329 static void
2330 sbc32 (sim_cpu *cpu)
2331 {
2332   unsigned rm = INSTR (20, 16);
2333   unsigned rn = INSTR (9, 5); /* ngc iff rn == 31.  */
2334   unsigned rd = INSTR (4, 0);
2335
2336   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2337                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2338                        - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2339                        - 1 + IS_SET (C));
2340 }
2341
2342 /* 64 bit sub with carry  */
2343 static void
2344 sbc64 (sim_cpu *cpu)
2345 {
2346   unsigned rm = INSTR (20, 16);
2347   unsigned rn = INSTR (9, 5);
2348   unsigned rd = INSTR (4, 0);
2349
2350   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2351                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2352                        - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2353                        - 1 + IS_SET (C));
2354 }
2355
2356 /* 32 bit sub with carry setting flags  */
2357 static void
2358 sbcs32 (sim_cpu *cpu)
2359 {
2360   unsigned rm = INSTR (20, 16);
2361   unsigned rn = INSTR (9, 5);
2362   unsigned rd = INSTR (4, 0);
2363
2364   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2365   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2366   uint32_t carry  = IS_SET (C);
2367   uint32_t result = value1 - value2 + 1 - carry;
2368
2369   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2370   set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2371 }
2372
2373 /* 64 bit sub with carry setting flags  */
2374 static void
2375 sbcs64 (sim_cpu *cpu)
2376 {
2377   unsigned rm = INSTR (20, 16);
2378   unsigned rn = INSTR (9, 5);
2379   unsigned rd = INSTR (4, 0);
2380
2381   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2382   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2383   uint64_t carry  = IS_SET (C);
2384   uint64_t result = value1 - value2 + 1 - carry;
2385
2386   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2387   set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2388 }
2389
2390 static void
2391 dexAddSubtractWithCarry (sim_cpu *cpu)
2392 {
2393   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2394      instr[30]    = op : 0 ==> ADC, 1 ==> SBC
2395      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2396      instr[28,21] = 1 1010 000
2397      instr[20,16] = Rm
2398      instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2399      instr[9,5]   = Rn
2400      instr[4,0]   = Rd  */
2401
2402   uint32_t op2 = INSTR (15, 10);
2403
2404   NYI_assert (28, 21, 0xD0);
2405
2406   if (op2 != 0)
2407     HALT_UNALLOC;
2408
2409   /* Dispatch on size:op:set?.  */
2410   switch (INSTR (31, 29))
2411     {
2412     case 0: adc32 (cpu); break;
2413     case 1: adcs32 (cpu); break;
2414     case 2: sbc32 (cpu); break;
2415     case 3: sbcs32 (cpu); break;
2416     case 4: adc64 (cpu); break;
2417     case 5: adcs64 (cpu); break;
2418     case 6: sbc64 (cpu); break;
2419     case 7: sbcs64 (cpu); break;
2420     }
2421 }
2422
2423 static uint32_t
2424 testConditionCode (sim_cpu *cpu, CondCode cc)
2425 {
2426   /* This should be reduceable to branchless logic
2427      by some careful testing of bits in CC followed
2428      by the requisite masking and combining of bits
2429      from the flag register.
2430
2431      For now we do it with a switch.  */
2432   int res;
2433
2434   switch (cc)
2435     {
2436     case EQ:  res = IS_SET (Z);    break;
2437     case NE:  res = IS_CLEAR (Z);  break;
2438     case CS:  res = IS_SET (C);    break;
2439     case CC:  res = IS_CLEAR (C);  break;
2440     case MI:  res = IS_SET (N);    break;
2441     case PL:  res = IS_CLEAR (N);  break;
2442     case VS:  res = IS_SET (V);    break;
2443     case VC:  res = IS_CLEAR (V);  break;
2444     case HI:  res = IS_SET (C) && IS_CLEAR (Z);  break;
2445     case LS:  res = IS_CLEAR (C) || IS_SET (Z);  break;
2446     case GE:  res = IS_SET (N) == IS_SET (V);    break;
2447     case LT:  res = IS_SET (N) != IS_SET (V);    break;
2448     case GT:  res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V));  break;
2449     case LE:  res = IS_SET (Z) || (IS_SET (N) != IS_SET (V));    break;
2450     case AL:
2451     case NV:
2452     default:
2453       res = 1;
2454       break;
2455     }
2456   return res;
2457 }
2458
2459 static void
2460 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn  */
2461 {
2462   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2463      instr[30]    = compare with positive (1) or negative value (0)
2464      instr[29,21] = 1 1101 0010
2465      instr[20,16] = Rm or const
2466      instr[15,12] = cond
2467      instr[11]    = compare reg (0) or const (1)
2468      instr[10]    = 0
2469      instr[9,5]   = Rn
2470      instr[4]     = 0
2471      instr[3,0]   = value for CPSR bits if the comparison does not take place.  */
2472   signed int negate;
2473   unsigned rm;
2474   unsigned rn;
2475
2476   NYI_assert (29, 21, 0x1d2);
2477   NYI_assert (10, 10, 0);
2478   NYI_assert (4, 4, 0);
2479
2480   if (! testConditionCode (cpu, INSTR (15, 12)))
2481     {
2482       aarch64_set_CPSR (cpu, INSTR (3, 0));
2483       return;
2484     }
2485
2486   negate = INSTR (30, 30) ? 1 : -1;
2487   rm = INSTR (20, 16);
2488   rn = INSTR ( 9,  5);
2489
2490   if (INSTR (31, 31))
2491     {
2492       if (INSTR (11, 11))
2493         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2494                              negate * (uint64_t) rm);
2495       else
2496         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2497                              negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2498     }
2499   else
2500     {
2501       if (INSTR (11, 11))
2502         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2503                              negate * rm);
2504       else
2505         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2506                              negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2507     }
2508 }
2509
2510 static void
2511 do_vec_MOV_whole_vector (sim_cpu *cpu)
2512 {
2513   /* MOV Vd.T, Vs.T  (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2514
2515      instr[31]    = 0
2516      instr[30]    = half(0)/full(1)
2517      instr[29,21] = 001110101
2518      instr[20,16] = Vs
2519      instr[15,10] = 000111
2520      instr[9,5]   = Vs
2521      instr[4,0]   = Vd  */
2522
2523   unsigned vs = INSTR (9, 5);
2524   unsigned vd = INSTR (4, 0);
2525
2526   NYI_assert (29, 21, 0x075);
2527   NYI_assert (15, 10, 0x07);
2528
2529   if (INSTR (20, 16) != vs)
2530     HALT_NYI;
2531
2532   if (INSTR (30, 30))
2533     aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2534
2535   aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2536 }
2537
2538 static void
2539 do_vec_MOV_into_scalar (sim_cpu *cpu)
2540 {
2541   /* instr[31]    = 0
2542      instr[30]    = word(0)/long(1)
2543      instr[29,21] = 00 1110 000
2544      instr[20,18] = element size and index
2545      instr[17,10] = 00 0011 11
2546      instr[9,5]   = V source
2547      instr[4,0]   = R dest  */
2548
2549   unsigned vs = INSTR (9, 5);
2550   unsigned rd = INSTR (4, 0);
2551
2552   NYI_assert (29, 21, 0x070);
2553   NYI_assert (17, 10, 0x0F);
2554
2555   switch (INSTR (20, 18))
2556     {
2557     case 0x2:
2558       aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2559       break;
2560
2561     case 0x6:
2562       aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2563       break;
2564
2565     case 0x1:
2566     case 0x3:
2567     case 0x5:
2568     case 0x7:
2569       aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2570                            (cpu, vs, INSTR (20, 19)));
2571       break;
2572
2573     default:
2574       HALT_NYI;
2575     }
2576 }
2577
2578 static void
2579 do_vec_INS (sim_cpu *cpu)
2580 {
2581   /* instr[31,21] = 01001110000
2582      instr[20,16] = element size and index
2583      instr[15,10] = 000111
2584      instr[9,5]   = W source
2585      instr[4,0]   = V dest  */
2586
2587   int index;
2588   unsigned rs = INSTR (9, 5);
2589   unsigned vd = INSTR (4, 0);
2590
2591   NYI_assert (31, 21, 0x270);
2592   NYI_assert (15, 10, 0x07);
2593
2594   if (INSTR (16, 16))
2595     {
2596       index = INSTR (20, 17);
2597       aarch64_set_vec_u8 (cpu, vd, index,
2598                           aarch64_get_reg_u8 (cpu, rs, NO_SP));
2599     }
2600   else if (INSTR (17, 17))
2601     {
2602       index = INSTR (20, 18);
2603       aarch64_set_vec_u16 (cpu, vd, index,
2604                            aarch64_get_reg_u16 (cpu, rs, NO_SP));
2605     }
2606   else if (INSTR (18, 18))
2607     {
2608       index = INSTR (20, 19);
2609       aarch64_set_vec_u32 (cpu, vd, index,
2610                            aarch64_get_reg_u32 (cpu, rs, NO_SP));
2611     }
2612   else if (INSTR (19, 19))
2613     {
2614       index = INSTR (20, 20);
2615       aarch64_set_vec_u64 (cpu, vd, index,
2616                            aarch64_get_reg_u64 (cpu, rs, NO_SP));
2617     }
2618   else
2619     HALT_NYI;
2620 }
2621
2622 static void
2623 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2624 {
2625   /* instr[31]    = 0
2626      instr[30]    = half(0)/full(1)
2627      instr[29,21] = 00 1110 000
2628      instr[20,16] = element size and index
2629      instr[15,10] = 0000 01
2630      instr[9,5]   = V source
2631      instr[4,0]   = V dest.  */
2632
2633   unsigned full = INSTR (30, 30);
2634   unsigned vs = INSTR (9, 5);
2635   unsigned vd = INSTR (4, 0);
2636   int i, index;
2637
2638   NYI_assert (29, 21, 0x070);
2639   NYI_assert (15, 10, 0x01);
2640
2641   if (INSTR (16, 16))
2642     {
2643       index = INSTR (20, 17);
2644
2645       for (i = 0; i < (full ? 16 : 8); i++)
2646         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2647     }
2648   else if (INSTR (17, 17))
2649     {
2650       index = INSTR (20, 18);
2651
2652       for (i = 0; i < (full ? 8 : 4); i++)
2653         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2654     }
2655   else if (INSTR (18, 18))
2656     {
2657       index = INSTR (20, 19);
2658
2659       for (i = 0; i < (full ? 4 : 2); i++)
2660         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2661     }
2662   else
2663     {
2664       if (INSTR (19, 19) == 0)
2665         HALT_UNALLOC;
2666
2667       if (! full)
2668         HALT_UNALLOC;
2669
2670       index = INSTR (20, 20);
2671
2672       for (i = 0; i < 2; i++)
2673         aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2674     }
2675 }
2676
2677 static void
2678 do_vec_TBL (sim_cpu *cpu)
2679 {
2680   /* instr[31]    = 0
2681      instr[30]    = half(0)/full(1)
2682      instr[29,21] = 00 1110 000
2683      instr[20,16] = Vm
2684      instr[15]    = 0
2685      instr[14,13] = vec length
2686      instr[12,10] = 000
2687      instr[9,5]   = V start
2688      instr[4,0]   = V dest  */
2689
2690   int full    = INSTR (30, 30);
2691   int len     = INSTR (14, 13) + 1;
2692   unsigned vm = INSTR (20, 16);
2693   unsigned vn = INSTR (9, 5);
2694   unsigned vd = INSTR (4, 0);
2695   unsigned i;
2696
2697   NYI_assert (29, 21, 0x070);
2698   NYI_assert (12, 10, 0);
2699
2700   for (i = 0; i < (full ? 16 : 8); i++)
2701     {
2702       unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2703       uint8_t val;
2704
2705       if (selector < 16)
2706         val = aarch64_get_vec_u8 (cpu, vn, selector);
2707       else if (selector < 32)
2708         val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2709       else if (selector < 48)
2710         val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2711       else if (selector < 64)
2712         val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2713       else
2714         val = 0;
2715
2716       aarch64_set_vec_u8 (cpu, vd, i, val);
2717     }
2718 }
2719
2720 static void
2721 do_vec_TRN (sim_cpu *cpu)
2722 {
2723   /* instr[31]    = 0
2724      instr[30]    = half(0)/full(1)
2725      instr[29,24] = 00 1110
2726      instr[23,22] = size
2727      instr[21]    = 0
2728      instr[20,16] = Vm
2729      instr[15]    = 0
2730      instr[14]    = TRN1 (0) / TRN2 (1)
2731      instr[13,10] = 1010
2732      instr[9,5]   = V source
2733      instr[4,0]   = V dest.  */
2734
2735   int full    = INSTR (30, 30);
2736   int second  = INSTR (14, 14);
2737   unsigned vm = INSTR (20, 16);
2738   unsigned vn = INSTR (9, 5);
2739   unsigned vd = INSTR (4, 0);
2740   unsigned i;
2741
2742   NYI_assert (29, 24, 0x0E);
2743   NYI_assert (13, 10, 0xA);
2744
2745   switch (INSTR (23, 22))
2746     {
2747     case 0:
2748       for (i = 0; i < (full ? 8 : 4); i++)
2749         {
2750           aarch64_set_vec_u8
2751             (cpu, vd, i * 2,
2752              aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2753           aarch64_set_vec_u8
2754             (cpu, vd, 1 * 2 + 1,
2755              aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2756         }
2757       break;
2758
2759     case 1:
2760       for (i = 0; i < (full ? 4 : 2); i++)
2761         {
2762           aarch64_set_vec_u16
2763             (cpu, vd, i * 2,
2764              aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2765           aarch64_set_vec_u16
2766             (cpu, vd, 1 * 2 + 1,
2767              aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2768         }
2769       break;
2770
2771     case 2:
2772       aarch64_set_vec_u32
2773         (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2774       aarch64_set_vec_u32
2775         (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2776       aarch64_set_vec_u32
2777         (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2778       aarch64_set_vec_u32
2779         (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2780       break;
2781
2782     case 3:
2783       if (! full)
2784         HALT_UNALLOC;
2785
2786       aarch64_set_vec_u64 (cpu, vd, 0,
2787                            aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2788       aarch64_set_vec_u64 (cpu, vd, 1,
2789                            aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2790       break;
2791     }
2792 }
2793
2794 static void
2795 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2796 {
2797   /* instr[31]    = 0
2798      instr[30]    = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2799                     [must be 1 for 64-bit xfer]
2800      instr[29,20] = 00 1110 0000
2801      instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2802                                   0100=> 32-bits. 1000=>64-bits
2803      instr[15,10] = 0000 11
2804      instr[9,5]   = W source
2805      instr[4,0]   = V dest.  */
2806
2807   unsigned i;
2808   unsigned Vd = INSTR (4, 0);
2809   unsigned Rs = INSTR (9, 5);
2810   int both    = INSTR (30, 30);
2811
2812   NYI_assert (29, 20, 0x0E0);
2813   NYI_assert (15, 10, 0x03);
2814
2815   switch (INSTR (19, 16))
2816     {
2817     case 1:
2818       for (i = 0; i < (both ? 16 : 8); i++)
2819         aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2820       break;
2821
2822     case 2:
2823       for (i = 0; i < (both ? 8 : 4); i++)
2824         aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2825       break;
2826
2827     case 4:
2828       for (i = 0; i < (both ? 4 : 2); i++)
2829         aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2830       break;
2831
2832     case 8:
2833       if (!both)
2834         HALT_NYI;
2835       aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2836       aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2837       break;
2838
2839     default:
2840       HALT_NYI;
2841     }
2842 }
2843
2844 static void
2845 do_vec_UZP (sim_cpu *cpu)
2846 {
2847   /* instr[31]    = 0
2848      instr[30]    = half(0)/full(1)
2849      instr[29,24] = 00 1110
2850      instr[23,22] = size: byte(00), half(01), word (10), long (11)
2851      instr[21]    = 0
2852      instr[20,16] = Vm
2853      instr[15]    = 0
2854      instr[14]    = lower (0) / upper (1)
2855      instr[13,10] = 0110
2856      instr[9,5]   = Vn
2857      instr[4,0]   = Vd.  */
2858
2859   int full = INSTR (30, 30);
2860   int upper = INSTR (14, 14);
2861
2862   unsigned vm = INSTR (20, 16);
2863   unsigned vn = INSTR (9, 5);
2864   unsigned vd = INSTR (4, 0);
2865
2866   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2867   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2868   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2869   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2870
2871   uint64_t val1 = 0;
2872   uint64_t val2 = 0;
2873
2874   uint64_t input1 = upper ? val_n1 : val_m1;
2875   uint64_t input2 = upper ? val_n2 : val_m2;
2876   unsigned i;
2877
2878   NYI_assert (29, 24, 0x0E);
2879   NYI_assert (21, 21, 0);
2880   NYI_assert (15, 15, 0);
2881   NYI_assert (13, 10, 6);
2882
2883   switch (INSTR (23, 23))
2884     {
2885     case 0:
2886       for (i = 0; i < 8; i++)
2887         {
2888           val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
2889           val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
2890         }
2891       break;
2892
2893     case 1:
2894       for (i = 0; i < 4; i++)
2895         {
2896           val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
2897           val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
2898         }
2899       break;
2900
2901     case 2:
2902       val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
2903       val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
2904
2905     case 3:
2906       val1 = input1;
2907       val2 = input2;
2908            break;
2909     }
2910
2911   aarch64_set_vec_u64 (cpu, vd, 0, val1);
2912   if (full)
2913     aarch64_set_vec_u64 (cpu, vd, 1, val2);
2914 }
2915
2916 static void
2917 do_vec_ZIP (sim_cpu *cpu)
2918 {
2919   /* instr[31]    = 0
2920      instr[30]    = half(0)/full(1)
2921      instr[29,24] = 00 1110
2922      instr[23,22] = size: byte(00), hald(01), word (10), long (11)
2923      instr[21]    = 0
2924      instr[20,16] = Vm
2925      instr[15]    = 0
2926      instr[14]    = lower (0) / upper (1)
2927      instr[13,10] = 1110
2928      instr[9,5]   = Vn
2929      instr[4,0]   = Vd.  */
2930
2931   int full = INSTR (30, 30);
2932   int upper = INSTR (14, 14);
2933
2934   unsigned vm = INSTR (20, 16);
2935   unsigned vn = INSTR (9, 5);
2936   unsigned vd = INSTR (4, 0);
2937
2938   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2939   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2940   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2941   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2942
2943   uint64_t val1 = 0;
2944   uint64_t val2 = 0;
2945
2946   uint64_t input1 = upper ? val_n1 : val_m1;
2947   uint64_t input2 = upper ? val_n2 : val_m2;
2948
2949   NYI_assert (29, 24, 0x0E);
2950   NYI_assert (21, 21, 0);
2951   NYI_assert (15, 15, 0);
2952   NYI_assert (13, 10, 0xE);
2953
2954   switch (INSTR (23, 23))
2955     {
2956     case 0:
2957       val1 =
2958           ((input1 <<  0) & (0xFF    <<  0))
2959         | ((input2 <<  8) & (0xFF    <<  8))
2960         | ((input1 <<  8) & (0xFF    << 16))
2961         | ((input2 << 16) & (0xFF    << 24))
2962         | ((input1 << 16) & (0xFFULL << 32))
2963         | ((input2 << 24) & (0xFFULL << 40))
2964         | ((input1 << 24) & (0xFFULL << 48))
2965         | ((input2 << 32) & (0xFFULL << 56));
2966
2967       val2 =
2968           ((input1 >> 32) & (0xFF    <<  0))
2969         | ((input2 >> 24) & (0xFF    <<  8))
2970         | ((input1 >> 24) & (0xFF    << 16))
2971         | ((input2 >> 16) & (0xFF    << 24))
2972         | ((input1 >> 16) & (0xFFULL << 32))
2973         | ((input2 >>  8) & (0xFFULL << 40))
2974         | ((input1 >>  8) & (0xFFULL << 48))
2975         | ((input2 >>  0) & (0xFFULL << 56));
2976       break;
2977
2978     case 1:
2979       val1 =
2980           ((input1 <<  0) & (0xFFFF    <<  0))
2981         | ((input2 << 16) & (0xFFFF    << 16))
2982         | ((input1 << 16) & (0xFFFFULL << 32))
2983         | ((input2 << 32) & (0xFFFFULL << 48));
2984
2985       val2 =
2986           ((input1 >> 32) & (0xFFFF    <<  0))
2987         | ((input2 >> 16) & (0xFFFF    << 16))
2988         | ((input1 >> 16) & (0xFFFFULL << 32))
2989         | ((input2 >>  0) & (0xFFFFULL << 48));
2990       break;
2991
2992     case 2:
2993       val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
2994       val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
2995       break;
2996
2997     case 3:
2998       val1 = input1;
2999       val2 = input2;
3000       break;
3001     }
3002
3003   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3004   if (full)
3005     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3006 }
3007
3008 /* Floating point immediates are encoded in 8 bits.
3009    fpimm[7] = sign bit.
3010    fpimm[6:4] = signed exponent.
3011    fpimm[3:0] = fraction (assuming leading 1).
3012    i.e. F = s * 1.f * 2^(e - b).  */
3013
3014 static float
3015 fp_immediate_for_encoding_32 (uint32_t imm8)
3016 {
3017   float u;
3018   uint32_t s, e, f, i;
3019
3020   s = (imm8 >> 7) & 0x1;
3021   e = (imm8 >> 4) & 0x7;
3022   f = imm8 & 0xf;
3023
3024   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3025   u = (16.0 + f) / 16.0;
3026
3027   /* N.B. exponent is signed.  */
3028   if (e < 4)
3029     {
3030       int epos = e;
3031
3032       for (i = 0; i <= epos; i++)
3033         u *= 2.0;
3034     }
3035   else
3036     {
3037       int eneg = 7 - e;
3038
3039       for (i = 0; i < eneg; i++)
3040         u /= 2.0;
3041     }
3042
3043   if (s)
3044     u = - u;
3045
3046   return u;
3047 }
3048
3049 static double
3050 fp_immediate_for_encoding_64 (uint32_t imm8)
3051 {
3052   double u;
3053   uint32_t s, e, f, i;
3054
3055   s = (imm8 >> 7) & 0x1;
3056   e = (imm8 >> 4) & 0x7;
3057   f = imm8 & 0xf;
3058
3059   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3060   u = (16.0 + f) / 16.0;
3061
3062   /* N.B. exponent is signed.  */
3063   if (e < 4)
3064     {
3065       int epos = e;
3066
3067       for (i = 0; i <= epos; i++)
3068         u *= 2.0;
3069     }
3070   else
3071     {
3072       int eneg = 7 - e;
3073
3074       for (i = 0; i < eneg; i++)
3075         u /= 2.0;
3076     }
3077
3078   if (s)
3079     u = - u;
3080
3081   return u;
3082 }
3083
3084 static void
3085 do_vec_MOV_immediate (sim_cpu *cpu)
3086 {
3087   /* instr[31]    = 0
3088      instr[30]    = full/half selector
3089      instr[29,19] = 00111100000
3090      instr[18,16] = high 3 bits of uimm8
3091      instr[15,12] = size & shift:
3092                                   0000 => 32-bit
3093                                   0010 => 32-bit + LSL#8
3094                                   0100 => 32-bit + LSL#16
3095                                   0110 => 32-bit + LSL#24
3096                                   1010 => 16-bit + LSL#8
3097                                   1000 => 16-bit
3098                                   1101 => 32-bit + MSL#16
3099                                   1100 => 32-bit + MSL#8
3100                                   1110 => 8-bit
3101                                   1111 => double
3102      instr[11,10] = 01
3103      instr[9,5]   = low 5-bits of uimm8
3104      instr[4,0]   = Vd.  */
3105
3106   int full     = INSTR (30, 30);
3107   unsigned vd  = INSTR (4, 0);
3108   unsigned val = INSTR (18, 16) << 5
3109     | INSTR (9, 5);
3110   unsigned i;
3111
3112   NYI_assert (29, 19, 0x1E0);
3113   NYI_assert (11, 10, 1);
3114
3115   switch (INSTR (15, 12))
3116     {
3117     case 0x0: /* 32-bit, no shift.  */
3118     case 0x2: /* 32-bit, shift by 8.  */
3119     case 0x4: /* 32-bit, shift by 16.  */
3120     case 0x6: /* 32-bit, shift by 24.  */
3121       val <<= (8 * INSTR (14, 13));
3122       for (i = 0; i < (full ? 4 : 2); i++)
3123         aarch64_set_vec_u32 (cpu, vd, i, val);
3124       break;
3125
3126     case 0xa: /* 16-bit, shift by 8.  */
3127       val <<= 8;
3128       /* Fall through.  */
3129     case 0x8: /* 16-bit, no shift.  */
3130       for (i = 0; i < (full ? 8 : 4); i++)
3131         aarch64_set_vec_u16 (cpu, vd, i, val);
3132       /* Fall through.  */
3133     case 0xd: /* 32-bit, mask shift by 16.  */
3134       val <<= 8;
3135       val |= 0xFF;
3136       /* Fall through.  */
3137     case 0xc: /* 32-bit, mask shift by 8. */
3138       val <<= 8;
3139       val |= 0xFF;
3140       for (i = 0; i < (full ? 4 : 2); i++)
3141         aarch64_set_vec_u32 (cpu, vd, i, val);
3142       break;
3143
3144     case 0xe: /* 8-bit, no shift.  */
3145       for (i = 0; i < (full ? 16 : 8); i++)
3146         aarch64_set_vec_u8 (cpu, vd, i, val);
3147       break;
3148
3149     case 0xf: /* FMOV Vs.{2|4}S, #fpimm.  */
3150       {
3151         float u = fp_immediate_for_encoding_32 (val);
3152         for (i = 0; i < (full ? 4 : 2); i++)
3153           aarch64_set_vec_float (cpu, vd, i, u);
3154         break;
3155       }
3156
3157     default:
3158       HALT_NYI;
3159     }
3160 }
3161
3162 static void
3163 do_vec_MVNI (sim_cpu *cpu)
3164 {
3165   /* instr[31]    = 0
3166      instr[30]    = full/half selector
3167      instr[29,19] = 10111100000
3168      instr[18,16] = high 3 bits of uimm8
3169      instr[15,12] = selector
3170      instr[11,10] = 01
3171      instr[9,5]   = low 5-bits of uimm8
3172      instr[4,0]   = Vd.  */
3173
3174   int full     = INSTR (30, 30);
3175   unsigned vd  = INSTR (4, 0);
3176   unsigned val = INSTR (18, 16) << 5
3177     | INSTR (9, 5);
3178   unsigned i;
3179
3180   NYI_assert (29, 19, 0x5E0);
3181   NYI_assert (11, 10, 1);
3182
3183   switch (INSTR (15, 12))
3184     {
3185     case 0x0: /* 32-bit, no shift.  */
3186     case 0x2: /* 32-bit, shift by 8.  */
3187     case 0x4: /* 32-bit, shift by 16.  */
3188     case 0x6: /* 32-bit, shift by 24.  */
3189       val <<= (8 * INSTR (14, 13));
3190       val = ~ val;
3191       for (i = 0; i < (full ? 4 : 2); i++)
3192         aarch64_set_vec_u32 (cpu, vd, i, val);
3193       return;
3194
3195     case 0xa: /* 16-bit, 8 bit shift. */
3196       val <<= 8;
3197     case 0x8: /* 16-bit, no shift. */
3198       val = ~ val;
3199       for (i = 0; i < (full ? 8 : 4); i++)
3200         aarch64_set_vec_u16 (cpu, vd, i, val);
3201       return;
3202
3203     case 0xd: /* 32-bit, mask shift by 16.  */
3204       val <<= 8;
3205       val |= 0xFF;
3206     case 0xc: /* 32-bit, mask shift by 8. */
3207       val <<= 8;
3208       val |= 0xFF;
3209       val = ~ val;
3210       for (i = 0; i < (full ? 4 : 2); i++)
3211         aarch64_set_vec_u32 (cpu, vd, i, val);
3212       return;
3213
3214     case 0xE: /* MOVI Dn, #mask64 */
3215       {
3216         uint64_t mask = 0;
3217
3218         for (i = 0; i < 8; i++)
3219           if (val & (1 << i))
3220             mask |= (0xF << (i * 4));
3221         aarch64_set_vec_u64 (cpu, vd, 0, mask);
3222         aarch64_set_vec_u64 (cpu, vd, 1, 0);
3223         return;
3224       }
3225
3226     case 0xf: /* FMOV Vd.2D, #fpimm.  */
3227       {
3228         double u = fp_immediate_for_encoding_64 (val);
3229
3230         if (! full)
3231           HALT_UNALLOC;
3232
3233         aarch64_set_vec_double (cpu, vd, 0, u);
3234         aarch64_set_vec_double (cpu, vd, 1, u);
3235         return;
3236       }
3237
3238     default:
3239       HALT_NYI;
3240     }
3241 }
3242
3243 #define ABS(A) ((A) < 0 ? - (A) : (A))
3244
3245 static void
3246 do_vec_ABS (sim_cpu *cpu)
3247 {
3248   /* instr[31]    = 0
3249      instr[30]    = half(0)/full(1)
3250      instr[29,24] = 00 1110
3251      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3252      instr[21,10] = 10 0000 1011 10
3253      instr[9,5]   = Vn
3254      instr[4.0]   = Vd.  */
3255
3256   unsigned vn = INSTR (9, 5);
3257   unsigned vd = INSTR (4, 0);
3258   unsigned full = INSTR (30, 30);
3259   unsigned i;
3260
3261   NYI_assert (29, 24, 0x0E);
3262   NYI_assert (21, 10, 0x82E);
3263
3264   switch (INSTR (23, 22))
3265     {
3266     case 0:
3267       for (i = 0; i < (full ? 16 : 8); i++)
3268         aarch64_set_vec_s8 (cpu, vd, i,
3269                             ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3270       break;
3271
3272     case 1:
3273       for (i = 0; i < (full ? 8 : 4); i++)
3274         aarch64_set_vec_s16 (cpu, vd, i,
3275                              ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3276       break;
3277
3278     case 2:
3279       for (i = 0; i < (full ? 4 : 2); i++)
3280         aarch64_set_vec_s32 (cpu, vd, i,
3281                              ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3282       break;
3283
3284     case 3:
3285       if (! full)
3286         HALT_NYI;
3287       for (i = 0; i < 2; i++)
3288         aarch64_set_vec_s64 (cpu, vd, i,
3289                              ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3290       break;
3291     }
3292 }
3293
3294 static void
3295 do_vec_ADDV (sim_cpu *cpu)
3296 {
3297   /* instr[31]    = 0
3298      instr[30]    = full/half selector
3299      instr[29,24] = 00 1110
3300      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3301      instr[21,10] = 11 0001 1011 10
3302      instr[9,5]   = Vm
3303      instr[4.0]   = Rd.  */
3304
3305   unsigned vm = INSTR (9, 5);
3306   unsigned rd = INSTR (4, 0);
3307   unsigned i;
3308   uint64_t val = 0;
3309   int      full = INSTR (30, 30);
3310
3311   NYI_assert (29, 24, 0x0E);
3312   NYI_assert (21, 10, 0xC6E);
3313
3314   switch (INSTR (23, 22))
3315     {
3316     case 0:
3317       for (i = 0; i < (full ? 16 : 8); i++)
3318         val += aarch64_get_vec_u8 (cpu, vm, i);
3319       aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3320       return;
3321
3322     case 1:
3323       for (i = 0; i < (full ? 8 : 4); i++)
3324         val += aarch64_get_vec_u16 (cpu, vm, i);
3325       aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3326       return;
3327
3328     case 2:
3329       for (i = 0; i < (full ? 4 : 2); i++)
3330         val += aarch64_get_vec_u32 (cpu, vm, i);
3331       aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3332       return;
3333
3334     case 3:
3335       if (! full)
3336         HALT_UNALLOC;
3337       val = aarch64_get_vec_u64 (cpu, vm, 0);
3338       val += aarch64_get_vec_u64 (cpu, vm, 1);
3339       aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3340       return;
3341     }
3342 }
3343
3344 static void
3345 do_vec_ins_2 (sim_cpu *cpu)
3346 {
3347   /* instr[31,21] = 01001110000
3348      instr[20,18] = size & element selector
3349      instr[17,14] = 0000
3350      instr[13]    = direction: to vec(0), from vec (1)
3351      instr[12,10] = 111
3352      instr[9,5]   = Vm
3353      instr[4,0]   = Vd.  */
3354
3355   unsigned elem;
3356   unsigned vm = INSTR (9, 5);
3357   unsigned vd = INSTR (4, 0);
3358
3359   NYI_assert (31, 21, 0x270);
3360   NYI_assert (17, 14, 0);
3361   NYI_assert (12, 10, 7);
3362
3363   if (INSTR (13, 13) == 1)
3364     {
3365       if (INSTR (18, 18) == 1)
3366         {
3367           /* 32-bit moves.  */
3368           elem = INSTR (20, 19);
3369           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3370                                aarch64_get_vec_u32 (cpu, vm, elem));
3371         }
3372       else
3373         {
3374           /* 64-bit moves.  */
3375           if (INSTR (19, 19) != 1)
3376             HALT_NYI;
3377
3378           elem = INSTR (20, 20);
3379           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3380                                aarch64_get_vec_u64 (cpu, vm, elem));
3381         }
3382     }
3383   else
3384     {
3385       if (INSTR (18, 18) == 1)
3386         {
3387           /* 32-bit moves.  */
3388           elem = INSTR (20, 19);
3389           aarch64_set_vec_u32 (cpu, vd, elem,
3390                                aarch64_get_reg_u32 (cpu, vm, NO_SP));
3391         }
3392       else
3393         {
3394           /* 64-bit moves.  */
3395           if (INSTR (19, 19) != 1)
3396             HALT_NYI;
3397
3398           elem = INSTR (20, 20);
3399           aarch64_set_vec_u64 (cpu, vd, elem,
3400                                aarch64_get_reg_u64 (cpu, vm, NO_SP));
3401         }
3402     }
3403 }
3404
3405 static void
3406 do_vec_mull (sim_cpu *cpu)
3407 {
3408   /* instr[31]    = 0
3409      instr[30]    = lower(0)/upper(1) selector
3410      instr[29]    = signed(0)/unsigned(1)
3411      instr[28,24] = 0 1110
3412      instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3413      instr[21]    = 1
3414      instr[20,16] = Vm
3415      instr[15,10] = 11 0000
3416      instr[9,5]   = Vn
3417      instr[4.0]   = Vd.  */
3418
3419   int    unsign = INSTR (29, 29);
3420   int    bias = INSTR (30, 30);
3421   unsigned vm = INSTR (20, 16);
3422   unsigned vn = INSTR ( 9,  5);
3423   unsigned vd = INSTR ( 4,  0);
3424   unsigned i;
3425
3426   NYI_assert (28, 24, 0x0E);
3427   NYI_assert (15, 10, 0x30);
3428
3429   switch (INSTR (23, 22))
3430     {
3431     case 0:
3432       if (bias)
3433         bias = 8;
3434       if (unsign)
3435         for (i = 0; i < 8; i++)
3436           aarch64_set_vec_u16 (cpu, vd, i,
3437                                aarch64_get_vec_u8 (cpu, vn, i + bias)
3438                                * aarch64_get_vec_u8 (cpu, vm, i + bias));
3439       else
3440         for (i = 0; i < 8; i++)
3441           aarch64_set_vec_s16 (cpu, vd, i,
3442                                aarch64_get_vec_s8 (cpu, vn, i + bias)
3443                                * aarch64_get_vec_s8 (cpu, vm, i + bias));
3444       return;
3445
3446     case 1:
3447       if (bias)
3448         bias = 4;
3449       if (unsign)
3450         for (i = 0; i < 4; i++)
3451           aarch64_set_vec_u32 (cpu, vd, i,
3452                                aarch64_get_vec_u16 (cpu, vn, i + bias)
3453                                * aarch64_get_vec_u16 (cpu, vm, i + bias));
3454       else
3455         for (i = 0; i < 4; i++)
3456           aarch64_set_vec_s32 (cpu, vd, i,
3457                                aarch64_get_vec_s16 (cpu, vn, i + bias)
3458                                * aarch64_get_vec_s16 (cpu, vm, i + bias));
3459       return;
3460
3461     case 2:
3462       if (bias)
3463         bias = 2;
3464       if (unsign)
3465         for (i = 0; i < 2; i++)
3466           aarch64_set_vec_u64 (cpu, vd, i,
3467                                (uint64_t) aarch64_get_vec_u32 (cpu, vn,
3468                                                                i + bias)
3469                                * (uint64_t) aarch64_get_vec_u32 (cpu, vm,
3470                                                                  i + bias));
3471       else
3472         for (i = 0; i < 2; i++)
3473           aarch64_set_vec_s64 (cpu, vd, i,
3474                                aarch64_get_vec_s32 (cpu, vn, i + bias)
3475                                * aarch64_get_vec_s32 (cpu, vm, i + bias));
3476       return;
3477
3478     case 3:
3479       HALT_NYI;
3480     }
3481 }
3482
3483 static void
3484 do_vec_fadd (sim_cpu *cpu)
3485 {
3486   /* instr[31]    = 0
3487      instr[30]    = half(0)/full(1)
3488      instr[29,24] = 001110
3489      instr[23]    = FADD(0)/FSUB(1)
3490      instr[22]    = float (0)/double(1)
3491      instr[21]    = 1
3492      instr[20,16] = Vm
3493      instr[15,10] = 110101
3494      instr[9,5]   = Vn
3495      instr[4.0]   = Vd.  */
3496
3497   unsigned vm = INSTR (20, 16);
3498   unsigned vn = INSTR (9, 5);
3499   unsigned vd = INSTR (4, 0);
3500   unsigned i;
3501   int      full = INSTR (30, 30);
3502
3503   NYI_assert (29, 24, 0x0E);
3504   NYI_assert (21, 21, 1);
3505   NYI_assert (15, 10, 0x35);
3506
3507   if (INSTR (23, 23))
3508     {
3509       if (INSTR (22, 22))
3510         {
3511           if (! full)
3512             HALT_NYI;
3513
3514           for (i = 0; i < 2; i++)
3515             aarch64_set_vec_double (cpu, vd, i,
3516                                     aarch64_get_vec_double (cpu, vn, i)
3517                                     - aarch64_get_vec_double (cpu, vm, i));
3518         }
3519       else
3520         {
3521           for (i = 0; i < (full ? 4 : 2); i++)
3522             aarch64_set_vec_float (cpu, vd, i,
3523                                    aarch64_get_vec_float (cpu, vn, i)
3524                                    - aarch64_get_vec_float (cpu, vm, i));
3525         }
3526     }
3527   else
3528     {
3529       if (INSTR (22, 22))
3530         {
3531           if (! full)
3532             HALT_NYI;
3533
3534           for (i = 0; i < 2; i++)
3535             aarch64_set_vec_double (cpu, vd, i,
3536                                     aarch64_get_vec_double (cpu, vm, i)
3537                                     + aarch64_get_vec_double (cpu, vn, i));
3538         }
3539       else
3540         {
3541           for (i = 0; i < (full ? 4 : 2); i++)
3542             aarch64_set_vec_float (cpu, vd, i,
3543                                    aarch64_get_vec_float (cpu, vm, i)
3544                                    + aarch64_get_vec_float (cpu, vn, i));
3545         }
3546     }
3547 }
3548
3549 static void
3550 do_vec_add (sim_cpu *cpu)
3551 {
3552   /* instr[31]    = 0
3553      instr[30]    = full/half selector
3554      instr[29,24] = 001110
3555      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3556      instr[21]    = 1
3557      instr[20,16] = Vn
3558      instr[15,10] = 100001
3559      instr[9,5]   = Vm
3560      instr[4.0]   = Vd.  */
3561
3562   unsigned vm = INSTR (20, 16);
3563   unsigned vn = INSTR (9, 5);
3564   unsigned vd = INSTR (4, 0);
3565   unsigned i;
3566   int      full = INSTR (30, 30);
3567
3568   NYI_assert (29, 24, 0x0E);
3569   NYI_assert (21, 21, 1);
3570   NYI_assert (15, 10, 0x21);
3571
3572   switch (INSTR (23, 22))
3573     {
3574     case 0:
3575       for (i = 0; i < (full ? 16 : 8); i++)
3576         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3577                             + aarch64_get_vec_u8 (cpu, vm, i));
3578       return;
3579
3580     case 1:
3581       for (i = 0; i < (full ? 8 : 4); i++)
3582         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3583                              + aarch64_get_vec_u16 (cpu, vm, i));
3584       return;
3585
3586     case 2:
3587       for (i = 0; i < (full ? 4 : 2); i++)
3588         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3589                              + aarch64_get_vec_u32 (cpu, vm, i));
3590       return;
3591
3592     case 3:
3593       if (! full)
3594         HALT_UNALLOC;
3595       aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3596                            + aarch64_get_vec_u64 (cpu, vm, 0));
3597       aarch64_set_vec_u64 (cpu, vd, 1,
3598                            aarch64_get_vec_u64 (cpu, vn, 1)
3599                            + aarch64_get_vec_u64 (cpu, vm, 1));
3600       return;
3601     }
3602 }
3603
3604 static void
3605 do_vec_mul (sim_cpu *cpu)
3606 {
3607   /* instr[31]    = 0
3608      instr[30]    = full/half selector
3609      instr[29,24] = 00 1110
3610      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3611      instr[21]    = 1
3612      instr[20,16] = Vn
3613      instr[15,10] = 10 0111
3614      instr[9,5]   = Vm
3615      instr[4.0]   = Vd.  */
3616
3617   unsigned vm = INSTR (20, 16);
3618   unsigned vn = INSTR (9, 5);
3619   unsigned vd = INSTR (4, 0);
3620   unsigned i;
3621   int      full = INSTR (30, 30);
3622
3623   NYI_assert (29, 24, 0x0E);
3624   NYI_assert (21, 21, 1);
3625   NYI_assert (15, 10, 0x27);
3626
3627   switch (INSTR (23, 22))
3628     {
3629     case 0:
3630       for (i = 0; i < (full ? 16 : 8); i++)
3631         {
3632           uint16_t val = aarch64_get_vec_u8 (cpu, vn, i);
3633           val *= aarch64_get_vec_u8 (cpu, vm, i);
3634
3635           aarch64_set_vec_u16 (cpu, vd, i, val);
3636         }
3637       return;
3638
3639     case 1:
3640       for (i = 0; i < (full ? 8 : 4); i++)
3641         {
3642           uint32_t val = aarch64_get_vec_u16 (cpu, vn, i);
3643           val *= aarch64_get_vec_u16 (cpu, vm, i);
3644
3645           aarch64_set_vec_u32 (cpu, vd, i, val);
3646         }
3647       return;
3648
3649     case 2:
3650       for (i = 0; i < (full ? 4 : 2); i++)
3651         {
3652           uint64_t val = aarch64_get_vec_u32 (cpu, vn, i);
3653           val *= aarch64_get_vec_u32 (cpu, vm, i);
3654
3655           aarch64_set_vec_u64 (cpu, vd, i, val);
3656         }
3657       return;
3658
3659     case 3:
3660       HALT_UNALLOC;
3661     }
3662 }
3663
3664 static void
3665 do_vec_MLA (sim_cpu *cpu)
3666 {
3667   /* instr[31]    = 0
3668      instr[30]    = full/half selector
3669      instr[29,24] = 00 1110
3670      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3671      instr[21]    = 1
3672      instr[20,16] = Vn
3673      instr[15,10] = 1001 01
3674      instr[9,5]   = Vm
3675      instr[4.0]   = Vd.  */
3676
3677   unsigned vm = INSTR (20, 16);
3678   unsigned vn = INSTR (9, 5);
3679   unsigned vd = INSTR (4, 0);
3680   unsigned i;
3681   int      full = INSTR (30, 30);
3682
3683   NYI_assert (29, 24, 0x0E);
3684   NYI_assert (21, 21, 1);
3685   NYI_assert (15, 10, 0x25);
3686
3687   switch (INSTR (23, 22))
3688     {
3689     case 0:
3690       for (i = 0; i < (full ? 16 : 8); i++)
3691         {
3692           uint16_t val = aarch64_get_vec_u8 (cpu, vn, i);
3693           val *= aarch64_get_vec_u8 (cpu, vm, i);
3694           val += aarch64_get_vec_u8 (cpu, vd, i);
3695
3696           aarch64_set_vec_u16 (cpu, vd, i, val);
3697         }
3698       return;
3699
3700     case 1:
3701       for (i = 0; i < (full ? 8 : 4); i++)
3702         {
3703           uint32_t val = aarch64_get_vec_u16 (cpu, vn, i);
3704           val *= aarch64_get_vec_u16 (cpu, vm, i);
3705           val += aarch64_get_vec_u16 (cpu, vd, i);
3706
3707           aarch64_set_vec_u32 (cpu, vd, i, val);
3708         }
3709       return;
3710
3711     case 2:
3712       for (i = 0; i < (full ? 4 : 2); i++)
3713         {
3714           uint64_t val = aarch64_get_vec_u32 (cpu, vn, i);
3715           val *= aarch64_get_vec_u32 (cpu, vm, i);
3716           val += aarch64_get_vec_u32 (cpu, vd, i);
3717
3718           aarch64_set_vec_u64 (cpu, vd, i, val);
3719         }
3720       return;
3721
3722     case 3:
3723       HALT_UNALLOC;
3724     }
3725 }
3726
3727 static float
3728 fmaxnm (float a, float b)
3729 {
3730   if (fpclassify (a) == FP_NORMAL)
3731     {
3732       if (fpclassify (b) == FP_NORMAL)
3733         return a > b ? a : b;
3734       return a;
3735     }
3736   else if (fpclassify (b) == FP_NORMAL)
3737     return b;
3738   return a;
3739 }
3740
3741 static float
3742 fminnm (float a, float b)
3743 {
3744   if (fpclassify (a) == FP_NORMAL)
3745     {
3746       if (fpclassify (b) == FP_NORMAL)
3747         return a < b ? a : b;
3748       return a;
3749     }
3750   else if (fpclassify (b) == FP_NORMAL)
3751     return b;
3752   return a;
3753 }
3754
3755 static double
3756 dmaxnm (double a, double b)
3757 {
3758   if (fpclassify (a) == FP_NORMAL)
3759     {
3760       if (fpclassify (b) == FP_NORMAL)
3761         return a > b ? a : b;
3762       return a;
3763     }
3764   else if (fpclassify (b) == FP_NORMAL)
3765     return b;
3766   return a;
3767 }
3768
3769 static double
3770 dminnm (double a, double b)
3771 {
3772   if (fpclassify (a) == FP_NORMAL)
3773     {
3774       if (fpclassify (b) == FP_NORMAL)
3775         return a < b ? a : b;
3776       return a;
3777     }
3778   else if (fpclassify (b) == FP_NORMAL)
3779     return b;
3780   return a;
3781 }
3782
3783 static void
3784 do_vec_FminmaxNMP (sim_cpu *cpu)
3785 {
3786   /* instr [31]    = 0
3787      instr [30]    = half (0)/full (1)
3788      instr [29,24] = 10 1110
3789      instr [23]    = max(0)/min(1)
3790      instr [22]    = float (0)/double (1)
3791      instr [21]    = 1
3792      instr [20,16] = Vn
3793      instr [15,10] = 1100 01
3794      instr [9,5]   = Vm
3795      instr [4.0]   = Vd.  */
3796
3797   unsigned vm = INSTR (20, 16);
3798   unsigned vn = INSTR (9, 5);
3799   unsigned vd = INSTR (4, 0);
3800   int      full = INSTR (30, 30);
3801
3802   NYI_assert (29, 24, 0x2E);
3803   NYI_assert (21, 21, 1);
3804   NYI_assert (15, 10, 0x31);
3805
3806   if (INSTR (22, 22))
3807     {
3808       double (* fn)(double, double) = INSTR (23, 23)
3809         ? dminnm : dmaxnm;
3810
3811       if (! full)
3812         HALT_NYI;
3813       aarch64_set_vec_double (cpu, vd, 0,
3814                               fn (aarch64_get_vec_double (cpu, vn, 0),
3815                                   aarch64_get_vec_double (cpu, vn, 1)));
3816       aarch64_set_vec_double (cpu, vd, 0,
3817                               fn (aarch64_get_vec_double (cpu, vm, 0),
3818                                   aarch64_get_vec_double (cpu, vm, 1)));
3819     }
3820   else
3821     {
3822       float (* fn)(float, float) = INSTR (23, 23)
3823         ? fminnm : fmaxnm;
3824
3825       aarch64_set_vec_float (cpu, vd, 0,
3826                              fn (aarch64_get_vec_float (cpu, vn, 0),
3827                                  aarch64_get_vec_float (cpu, vn, 1)));
3828       if (full)
3829         aarch64_set_vec_float (cpu, vd, 1,
3830                                fn (aarch64_get_vec_float (cpu, vn, 2),
3831                                    aarch64_get_vec_float (cpu, vn, 3)));
3832
3833       aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3834                              fn (aarch64_get_vec_float (cpu, vm, 0),
3835                                  aarch64_get_vec_float (cpu, vm, 1)));
3836       if (full)
3837         aarch64_set_vec_float (cpu, vd, 3,
3838                                fn (aarch64_get_vec_float (cpu, vm, 2),
3839                                    aarch64_get_vec_float (cpu, vm, 3)));
3840     }
3841 }
3842
3843 static void
3844 do_vec_AND (sim_cpu *cpu)
3845 {
3846   /* instr[31]    = 0
3847      instr[30]    = half (0)/full (1)
3848      instr[29,21] = 001110001
3849      instr[20,16] = Vm
3850      instr[15,10] = 000111
3851      instr[9,5]   = Vn
3852      instr[4.0]   = Vd.  */
3853
3854   unsigned vm = INSTR (20, 16);
3855   unsigned vn = INSTR (9, 5);
3856   unsigned vd = INSTR (4, 0);
3857   unsigned i;
3858   int      full = INSTR (30, 30);
3859
3860   NYI_assert (29, 21, 0x071);
3861   NYI_assert (15, 10, 0x07);
3862
3863   for (i = 0; i < (full ? 4 : 2); i++)
3864     aarch64_set_vec_u32 (cpu, vd, i,
3865                          aarch64_get_vec_u32 (cpu, vn, i)
3866                          & aarch64_get_vec_u32 (cpu, vm, i));
3867 }
3868
3869 static void
3870 do_vec_BSL (sim_cpu *cpu)
3871 {
3872   /* instr[31]    = 0
3873      instr[30]    = half (0)/full (1)
3874      instr[29,21] = 101110011
3875      instr[20,16] = Vm
3876      instr[15,10] = 000111
3877      instr[9,5]   = Vn
3878      instr[4.0]   = Vd.  */
3879
3880   unsigned vm = INSTR (20, 16);
3881   unsigned vn = INSTR (9, 5);
3882   unsigned vd = INSTR (4, 0);
3883   unsigned i;
3884   int      full = INSTR (30, 30);
3885
3886   NYI_assert (29, 21, 0x173);
3887   NYI_assert (15, 10, 0x07);
3888
3889   for (i = 0; i < (full ? 16 : 8); i++)
3890     aarch64_set_vec_u8 (cpu, vd, i,
3891                         (    aarch64_get_vec_u8 (cpu, vd, i)
3892                            & aarch64_get_vec_u8 (cpu, vn, i))
3893                         | ((~ aarch64_get_vec_u8 (cpu, vd, i))
3894                            & aarch64_get_vec_u8 (cpu, vm, i)));
3895 }
3896
3897 static void
3898 do_vec_EOR (sim_cpu *cpu)
3899 {
3900   /* instr[31]    = 0
3901      instr[30]    = half (0)/full (1)
3902      instr[29,21] = 10 1110 001
3903      instr[20,16] = Vm
3904      instr[15,10] = 000111
3905      instr[9,5]   = Vn
3906      instr[4.0]   = Vd.  */
3907
3908   unsigned vm = INSTR (20, 16);
3909   unsigned vn = INSTR (9, 5);
3910   unsigned vd = INSTR (4, 0);
3911   unsigned i;
3912   int      full = INSTR (30, 30);
3913
3914   NYI_assert (29, 21, 0x171);
3915   NYI_assert (15, 10, 0x07);
3916
3917   for (i = 0; i < (full ? 4 : 2); i++)
3918     aarch64_set_vec_u32 (cpu, vd, i,
3919                          aarch64_get_vec_u32 (cpu, vn, i)
3920                          ^ aarch64_get_vec_u32 (cpu, vm, i));
3921 }
3922
3923 static void
3924 do_vec_bit (sim_cpu *cpu)
3925 {
3926   /* instr[31]    = 0
3927      instr[30]    = half (0)/full (1)
3928      instr[29,23] = 10 1110 1
3929      instr[22]    = BIT (0) / BIF (1)
3930      instr[21]    = 1
3931      instr[20,16] = Vm
3932      instr[15,10] = 0001 11
3933      instr[9,5]   = Vn
3934      instr[4.0]   = Vd.  */
3935
3936   unsigned vm = INSTR (20, 16);
3937   unsigned vn = INSTR (9, 5);
3938   unsigned vd = INSTR (4, 0);
3939   unsigned full = INSTR (30, 30);
3940   unsigned test_false = INSTR (22, 22);
3941   unsigned i;
3942
3943   NYI_assert (29, 23, 0x5D);
3944   NYI_assert (21, 21, 1);
3945   NYI_assert (15, 10, 0x07);
3946
3947   if (test_false)
3948     {
3949       for (i = 0; i < (full ? 16 : 8); i++)
3950         if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
3951           aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3952     }
3953   else
3954     {
3955       for (i = 0; i < (full ? 16 : 8); i++)
3956         if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
3957           aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3958     }
3959 }
3960
3961 static void
3962 do_vec_ORN (sim_cpu *cpu)
3963 {
3964   /* instr[31]    = 0
3965      instr[30]    = half (0)/full (1)
3966      instr[29,21] = 00 1110 111
3967      instr[20,16] = Vm
3968      instr[15,10] = 00 0111
3969      instr[9,5]   = Vn
3970      instr[4.0]   = Vd.  */
3971
3972   unsigned vm = INSTR (20, 16);
3973   unsigned vn = INSTR (9, 5);
3974   unsigned vd = INSTR (4, 0);
3975   unsigned i;
3976   int      full = INSTR (30, 30);
3977
3978   NYI_assert (29, 21, 0x077);
3979   NYI_assert (15, 10, 0x07);
3980
3981   for (i = 0; i < (full ? 16 : 8); i++)
3982     aarch64_set_vec_u8 (cpu, vd, i,
3983                         aarch64_get_vec_u8 (cpu, vn, i)
3984                         | ~ aarch64_get_vec_u8 (cpu, vm, i));
3985 }
3986
3987 static void
3988 do_vec_ORR (sim_cpu *cpu)
3989 {
3990   /* instr[31]    = 0
3991      instr[30]    = half (0)/full (1)
3992      instr[29,21] = 00 1110 101
3993      instr[20,16] = Vm
3994      instr[15,10] = 0001 11
3995      instr[9,5]   = Vn
3996      instr[4.0]   = Vd.  */
3997
3998   unsigned vm = INSTR (20, 16);
3999   unsigned vn = INSTR (9, 5);
4000   unsigned vd = INSTR (4, 0);
4001   unsigned i;
4002   int      full = INSTR (30, 30);
4003
4004   NYI_assert (29, 21, 0x075);
4005   NYI_assert (15, 10, 0x07);
4006
4007   for (i = 0; i < (full ? 16 : 8); i++)
4008     aarch64_set_vec_u8 (cpu, vd, i,
4009                         aarch64_get_vec_u8 (cpu, vn, i)
4010                         | aarch64_get_vec_u8 (cpu, vm, i));
4011 }
4012
4013 static void
4014 do_vec_BIC (sim_cpu *cpu)
4015 {
4016   /* instr[31]    = 0
4017      instr[30]    = half (0)/full (1)
4018      instr[29,21] = 00 1110 011
4019      instr[20,16] = Vm
4020      instr[15,10] = 00 0111
4021      instr[9,5]   = Vn
4022      instr[4.0]   = Vd.  */
4023
4024   unsigned vm = INSTR (20, 16);
4025   unsigned vn = INSTR (9, 5);
4026   unsigned vd = INSTR (4, 0);
4027   unsigned i;
4028   int      full = INSTR (30, 30);
4029
4030   NYI_assert (29, 21, 0x073);
4031   NYI_assert (15, 10, 0x07);
4032
4033   for (i = 0; i < (full ? 16 : 8); i++)
4034     aarch64_set_vec_u8 (cpu, vd, i,
4035                         aarch64_get_vec_u8 (cpu, vn, i)
4036                         & ~ aarch64_get_vec_u8 (cpu, vm, i));
4037 }
4038
4039 static void
4040 do_vec_XTN (sim_cpu *cpu)
4041 {
4042   /* instr[31]    = 0
4043      instr[30]    = first part (0)/ second part (1)
4044      instr[29,24] = 00 1110
4045      instr[23,22] = size: byte(00), half(01), word (10)
4046      instr[21,10] = 1000 0100 1010
4047      instr[9,5]   = Vs
4048      instr[4,0]   = Vd.  */
4049
4050   unsigned vs = INSTR (9, 5);
4051   unsigned vd = INSTR (4, 0);
4052   unsigned bias = INSTR (30, 30);
4053   unsigned i;
4054
4055   NYI_assert (29, 24, 0x0E);
4056   NYI_assert (21, 10, 0x84A);
4057
4058   switch (INSTR (23, 22))
4059     {
4060     case 0:
4061       if (bias)
4062         for (i = 0; i < 8; i++)
4063           aarch64_set_vec_u8 (cpu, vd, i + 8,
4064                               aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4065       else
4066         for (i = 0; i < 8; i++)
4067           aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4068       return;
4069
4070     case 1:
4071       if (bias)
4072         for (i = 0; i < 4; i++)
4073           aarch64_set_vec_u16 (cpu, vd, i + 4,
4074                                aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4075       else
4076         for (i = 0; i < 4; i++)
4077           aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4078       return;
4079
4080     case 2:
4081       if (bias)
4082         for (i = 0; i < 2; i++)
4083           aarch64_set_vec_u32 (cpu, vd, i + 4,
4084                                aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4085       else
4086         for (i = 0; i < 2; i++)
4087           aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4088       return;
4089     }
4090 }
4091
4092 static void
4093 do_vec_maxv (sim_cpu *cpu)
4094 {
4095   /* instr[31]    = 0
4096      instr[30]    = half(0)/full(1)
4097      instr[29]    = signed (0)/unsigned(1)
4098      instr[28,24] = 0 1110
4099      instr[23,22] = size: byte(00), half(01), word (10)
4100      instr[21]    = 1
4101      instr[20,17] = 1 000
4102      instr[16]    = max(0)/min(1)
4103      instr[15,10] = 1010 10
4104      instr[9,5]   = V source
4105      instr[4.0]   = R dest.  */
4106
4107   unsigned vs = INSTR (9, 5);
4108   unsigned rd = INSTR (4, 0);
4109   unsigned full = INSTR (30, 30);
4110   unsigned i;
4111
4112   NYI_assert (28, 24, 0x0E);
4113   NYI_assert (21, 21, 1);
4114   NYI_assert (20, 17, 8);
4115   NYI_assert (15, 10, 0x2A);
4116
4117   switch ((INSTR (29, 29) << 1)
4118           | INSTR (16, 16))
4119     {
4120     case 0: /* SMAXV.  */
4121        {
4122         int64_t smax;
4123         switch (INSTR (23, 22))
4124           {
4125           case 0:
4126             smax = aarch64_get_vec_s8 (cpu, vs, 0);
4127             for (i = 1; i < (full ? 16 : 8); i++)
4128               smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4129             break;
4130           case 1:
4131             smax = aarch64_get_vec_s16 (cpu, vs, 0);
4132             for (i = 1; i < (full ? 8 : 4); i++)
4133               smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4134             break;
4135           case 2:
4136             smax = aarch64_get_vec_s32 (cpu, vs, 0);
4137             for (i = 1; i < (full ? 4 : 2); i++)
4138               smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4139             break;
4140           case 3:
4141             HALT_UNALLOC;
4142           }
4143         aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4144         return;
4145       }
4146
4147     case 1: /* SMINV.  */
4148       {
4149         int64_t smin;
4150         switch (INSTR (23, 22))
4151           {
4152           case 0:
4153             smin = aarch64_get_vec_s8 (cpu, vs, 0);
4154             for (i = 1; i < (full ? 16 : 8); i++)
4155               smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4156             break;
4157           case 1:
4158             smin = aarch64_get_vec_s16 (cpu, vs, 0);
4159             for (i = 1; i < (full ? 8 : 4); i++)
4160               smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4161             break;
4162           case 2:
4163             smin = aarch64_get_vec_s32 (cpu, vs, 0);
4164             for (i = 1; i < (full ? 4 : 2); i++)
4165               smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4166             break;
4167
4168           case 3:
4169             HALT_UNALLOC;
4170           }
4171         aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4172         return;
4173       }
4174
4175     case 2: /* UMAXV.  */
4176       {
4177         uint64_t umax;
4178         switch (INSTR (23, 22))
4179           {
4180           case 0:
4181             umax = aarch64_get_vec_u8 (cpu, vs, 0);
4182             for (i = 1; i < (full ? 16 : 8); i++)
4183               umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4184             break;
4185           case 1:
4186             umax = aarch64_get_vec_u16 (cpu, vs, 0);
4187             for (i = 1; i < (full ? 8 : 4); i++)
4188               umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4189             break;
4190           case 2:
4191             umax = aarch64_get_vec_u32 (cpu, vs, 0);
4192             for (i = 1; i < (full ? 4 : 2); i++)
4193               umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4194             break;
4195
4196           case 3:
4197             HALT_UNALLOC;
4198           }
4199         aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4200         return;
4201       }
4202
4203     case 3: /* UMINV.  */
4204       {
4205         uint64_t umin;
4206         switch (INSTR (23, 22))
4207           {
4208           case 0:
4209             umin = aarch64_get_vec_u8 (cpu, vs, 0);
4210             for (i = 1; i < (full ? 16 : 8); i++)
4211               umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4212             break;
4213           case 1:
4214             umin = aarch64_get_vec_u16 (cpu, vs, 0);
4215             for (i = 1; i < (full ? 8 : 4); i++)
4216               umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4217             break;
4218           case 2:
4219             umin = aarch64_get_vec_u32 (cpu, vs, 0);
4220             for (i = 1; i < (full ? 4 : 2); i++)
4221               umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4222             break;
4223
4224           case 3:
4225             HALT_UNALLOC;
4226           }
4227         aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4228         return;
4229       }
4230     }
4231 }
4232
4233 static void
4234 do_vec_fminmaxV (sim_cpu *cpu)
4235 {
4236   /* instr[31,24] = 0110 1110
4237      instr[23]    = max(0)/min(1)
4238      instr[22,14] = 011 0000 11
4239      instr[13,12] = nm(00)/normal(11)
4240      instr[11,10] = 10
4241      instr[9,5]   = V source
4242      instr[4.0]   = R dest.  */
4243
4244   unsigned vs = INSTR (9, 5);
4245   unsigned rd = INSTR (4, 0);
4246   unsigned i;
4247   float res   = aarch64_get_vec_float (cpu, vs, 0);
4248
4249   NYI_assert (31, 24, 0x6E);
4250   NYI_assert (22, 14, 0x0C3);
4251   NYI_assert (11, 10, 2);
4252
4253   if (INSTR (23, 23))
4254     {
4255       switch (INSTR (13, 12))
4256         {
4257         case 0: /* FMNINNMV.  */
4258           for (i = 1; i < 4; i++)
4259             res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4260           break;
4261
4262         case 3: /* FMINV.  */
4263           for (i = 1; i < 4; i++)
4264             res = min (res, aarch64_get_vec_float (cpu, vs, i));
4265           break;
4266
4267         default:
4268           HALT_NYI;
4269         }
4270     }
4271   else
4272     {
4273       switch (INSTR (13, 12))
4274         {
4275         case 0: /* FMNAXNMV.  */
4276           for (i = 1; i < 4; i++)
4277             res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4278           break;
4279
4280         case 3: /* FMAXV.  */
4281           for (i = 1; i < 4; i++)
4282             res = max (res, aarch64_get_vec_float (cpu, vs, i));
4283           break;
4284
4285         default:
4286           HALT_NYI;
4287         }
4288     }
4289
4290   aarch64_set_FP_float (cpu, rd, res);
4291 }
4292
4293 static void
4294 do_vec_Fminmax (sim_cpu *cpu)
4295 {
4296   /* instr[31]    = 0
4297      instr[30]    = half(0)/full(1)
4298      instr[29,24] = 00 1110
4299      instr[23]    = max(0)/min(1)
4300      instr[22]    = float(0)/double(1)
4301      instr[21]    = 1
4302      instr[20,16] = Vm
4303      instr[15,14] = 11
4304      instr[13,12] = nm(00)/normal(11)
4305      instr[11,10] = 01
4306      instr[9,5]   = Vn
4307      instr[4,0]   = Vd.  */
4308
4309   unsigned vm = INSTR (20, 16);
4310   unsigned vn = INSTR (9, 5);
4311   unsigned vd = INSTR (4, 0);
4312   unsigned full = INSTR (30, 30);
4313   unsigned min = INSTR (23, 23);
4314   unsigned i;
4315
4316   NYI_assert (29, 24, 0x0E);
4317   NYI_assert (21, 21, 1);
4318   NYI_assert (15, 14, 3);
4319   NYI_assert (11, 10, 1);
4320
4321   if (INSTR (22, 22))
4322     {
4323       double (* func)(double, double);
4324
4325       if (! full)
4326         HALT_NYI;
4327
4328       if (INSTR (13, 12) == 0)
4329         func = min ? dminnm : dmaxnm;
4330       else if (INSTR (13, 12) == 3)
4331         func = min ? fmin : fmax;
4332       else
4333         HALT_NYI;
4334
4335       for (i = 0; i < 2; i++)
4336         aarch64_set_vec_double (cpu, vd, i,
4337                                 func (aarch64_get_vec_double (cpu, vn, i),
4338                                       aarch64_get_vec_double (cpu, vm, i)));
4339     }
4340   else
4341     {
4342       float (* func)(float, float);
4343
4344       if (INSTR (13, 12) == 0)
4345         func = min ? fminnm : fmaxnm;
4346       else if (INSTR (13, 12) == 3)
4347         func = min ? fminf : fmaxf;
4348       else
4349         HALT_NYI;
4350
4351       for (i = 0; i < (full ? 4 : 2); i++)
4352         aarch64_set_vec_float (cpu, vd, i,
4353                                func (aarch64_get_vec_float (cpu, vn, i),
4354                                      aarch64_get_vec_float (cpu, vm, i)));
4355     }
4356 }
4357
4358 static void
4359 do_vec_SCVTF (sim_cpu *cpu)
4360 {
4361   /* instr[31]    = 0
4362      instr[30]    = Q
4363      instr[29,23] = 00 1110 0
4364      instr[22]    = float(0)/double(1)
4365      instr[21,10] = 10 0001 1101 10
4366      instr[9,5]   = Vn
4367      instr[4,0]   = Vd.  */
4368
4369   unsigned vn = INSTR (9, 5);
4370   unsigned vd = INSTR (4, 0);
4371   unsigned full = INSTR (30, 30);
4372   unsigned size = INSTR (22, 22);
4373   unsigned i;
4374
4375   NYI_assert (29, 23, 0x1C);
4376   NYI_assert (21, 10, 0x876);
4377
4378   if (size)
4379     {
4380       if (! full)
4381         HALT_UNALLOC;
4382
4383       for (i = 0; i < 2; i++)
4384         {
4385           double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4386           aarch64_set_vec_double (cpu, vd, i, val);
4387         }
4388     }
4389   else
4390     {
4391       for (i = 0; i < (full ? 4 : 2); i++)
4392         {
4393           float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4394           aarch64_set_vec_float (cpu, vd, i, val);
4395         }
4396     }
4397 }
4398
4399 #define VEC_CMP(SOURCE, CMP)                                            \
4400   do                                                                    \
4401     {                                                                   \
4402       switch (size)                                                     \
4403         {                                                               \
4404         case 0:                                                         \
4405           for (i = 0; i < (full ? 16 : 8); i++)                         \
4406             aarch64_set_vec_u8 (cpu, vd, i,                             \
4407                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4408                                 CMP                                     \
4409                                 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4410                                 ? -1 : 0);                              \
4411           return;                                                       \
4412         case 1:                                                         \
4413           for (i = 0; i < (full ? 8 : 4); i++)                          \
4414             aarch64_set_vec_u16 (cpu, vd, i,                            \
4415                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4416                                  CMP                                    \
4417                                  aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4418                                  ? -1 : 0);                             \
4419           return;                                                       \
4420         case 2:                                                         \
4421           for (i = 0; i < (full ? 4 : 2); i++)                          \
4422             aarch64_set_vec_u32 (cpu, vd, i, \
4423                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4424                                  CMP                                    \
4425                                  aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4426                                  ? -1 : 0);                             \
4427           return;                                                       \
4428         case 3:                                                         \
4429           if (! full)                                                   \
4430             HALT_UNALLOC;                                               \
4431           for (i = 0; i < 2; i++)                                       \
4432             aarch64_set_vec_u64 (cpu, vd, i, \
4433                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4434                                  CMP                                    \
4435                                  aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4436                                  ? -1ULL : 0);                          \
4437           return;                                                       \
4438         }                                                               \
4439     }                                                                   \
4440   while (0)
4441
4442 #define VEC_CMP0(SOURCE, CMP)                                           \
4443   do                                                                    \
4444     {                                                                   \
4445       switch (size)                                                     \
4446         {                                                               \
4447         case 0:                                                         \
4448           for (i = 0; i < (full ? 16 : 8); i++)                         \
4449             aarch64_set_vec_u8 (cpu, vd, i,                             \
4450                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4451                                 CMP 0 ? -1 : 0);                        \
4452           return;                                                       \
4453         case 1:                                                         \
4454           for (i = 0; i < (full ? 8 : 4); i++)                          \
4455             aarch64_set_vec_u16 (cpu, vd, i,                            \
4456                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4457                                  CMP 0 ? -1 : 0);                       \
4458           return;                                                       \
4459         case 2:                                                         \
4460           for (i = 0; i < (full ? 4 : 2); i++)                          \
4461             aarch64_set_vec_u32 (cpu, vd, i,                            \
4462                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4463                                  CMP 0 ? -1 : 0);                       \
4464           return;                                                       \
4465         case 3:                                                         \
4466           if (! full)                                                   \
4467             HALT_UNALLOC;                                               \
4468           for (i = 0; i < 2; i++)                                       \
4469             aarch64_set_vec_u64 (cpu, vd, i,                            \
4470                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4471                                  CMP 0 ? -1ULL : 0);                    \
4472           return;                                                       \
4473         }                                                               \
4474     }                                                                   \
4475   while (0)
4476
4477 #define VEC_FCMP0(CMP)                                                  \
4478   do                                                                    \
4479     {                                                                   \
4480       if (vm != 0)                                                      \
4481         HALT_NYI;                                                       \
4482       if (INSTR (22, 22))                       \
4483         {                                                               \
4484           if (! full)                                                   \
4485             HALT_NYI;                                                   \
4486           for (i = 0; i < 2; i++)                                       \
4487             aarch64_set_vec_u64 (cpu, vd, i,                            \
4488                                  aarch64_get_vec_double (cpu, vn, i)    \
4489                                  CMP 0.0 ? -1 : 0);                     \
4490         }                                                               \
4491       else                                                              \
4492         {                                                               \
4493           for (i = 0; i < (full ? 4 : 2); i++)                          \
4494             aarch64_set_vec_u32 (cpu, vd, i,                            \
4495                                  aarch64_get_vec_float (cpu, vn, i)     \
4496                                  CMP 0.0 ? -1 : 0);                     \
4497         }                                                               \
4498       return;                                                           \
4499     }                                                                   \
4500   while (0)
4501
4502 #define VEC_FCMP(CMP)                                                   \
4503   do                                                                    \
4504     {                                                                   \
4505       if (INSTR (22, 22))                       \
4506         {                                                               \
4507           if (! full)                                                   \
4508             HALT_NYI;                                                   \
4509           for (i = 0; i < 2; i++)                                       \
4510             aarch64_set_vec_u64 (cpu, vd, i,                            \
4511                                  aarch64_get_vec_double (cpu, vn, i)    \
4512                                  CMP                                    \
4513                                  aarch64_get_vec_double (cpu, vm, i)    \
4514                                  ? -1 : 0);                             \
4515         }                                                               \
4516       else                                                              \
4517         {                                                               \
4518           for (i = 0; i < (full ? 4 : 2); i++)                          \
4519             aarch64_set_vec_u32 (cpu, vd, i,                            \
4520                                  aarch64_get_vec_float (cpu, vn, i)     \
4521                                  CMP                                    \
4522                                  aarch64_get_vec_float (cpu, vm, i)     \
4523                                  ? -1 : 0);                             \
4524         }                                                               \
4525       return;                                                           \
4526     }                                                                   \
4527   while (0)
4528
4529 static void
4530 do_vec_compare (sim_cpu *cpu)
4531 {
4532   /* instr[31]    = 0
4533      instr[30]    = half(0)/full(1)
4534      instr[29]    = part-of-comparison-type
4535      instr[28,24] = 0 1110
4536      instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4537                     type of float compares: single (-0) / double (-1)
4538      instr[21]    = 1
4539      instr[20,16] = Vm or 00000 (compare vs 0)
4540      instr[15,10] = part-of-comparison-type
4541      instr[9,5]   = Vn
4542      instr[4.0]   = Vd.  */
4543
4544   int full = INSTR (30, 30);
4545   int size = INSTR (23, 22);
4546   unsigned vm = INSTR (20, 16);
4547   unsigned vn = INSTR (9, 5);
4548   unsigned vd = INSTR (4, 0);
4549   unsigned i;
4550
4551   NYI_assert (28, 24, 0x0E);
4552   NYI_assert (21, 21, 1);
4553
4554   if ((INSTR (11, 11)
4555        && INSTR (14, 14))
4556       || ((INSTR (11, 11) == 0
4557            && INSTR (10, 10) == 0)))
4558     {
4559       /* A compare vs 0.  */
4560       if (vm != 0)
4561         {
4562           if (INSTR (15, 10) == 0x2A)
4563             do_vec_maxv (cpu);
4564           else if (INSTR (15, 10) == 0x32
4565                    || INSTR (15, 10) == 0x3E)
4566             do_vec_fminmaxV (cpu);
4567           else if (INSTR (29, 23) == 0x1C
4568                    && INSTR (21, 10) == 0x876)
4569             do_vec_SCVTF (cpu);
4570           else
4571             HALT_NYI;
4572           return;
4573         }
4574     }
4575
4576   if (INSTR (14, 14))
4577     {
4578       /* A floating point compare.  */
4579       unsigned decode = (INSTR (29, 29) << 5)
4580         | (INSTR (23, 23) << 4)
4581         | INSTR (13, 10);
4582
4583       NYI_assert (15, 15, 1);
4584
4585       switch (decode)
4586         {
4587         case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4588         case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4589         case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4590         case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4591         case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4592         case /* 0b111001: GT */   0x39: VEC_FCMP  (>);
4593         case /* 0b101001: GE */   0x29: VEC_FCMP  (>=);
4594         case /* 0b001001: EQ */   0x09: VEC_FCMP  (==);
4595
4596         default:
4597           HALT_NYI;
4598         }
4599     }
4600   else
4601     {
4602       unsigned decode = (INSTR (29, 29) << 6)
4603         | INSTR (15, 10);
4604
4605       switch (decode)
4606         {
4607         case 0x0D: /* 0001101 GT */     VEC_CMP  (s, > );
4608         case 0x0F: /* 0001111 GE */     VEC_CMP  (s, >= );
4609         case 0x22: /* 0100010 GT #0 */  VEC_CMP0 (s, > );
4610         case 0x26: /* 0100110 EQ #0 */  VEC_CMP0 (s, == );
4611         case 0x2A: /* 0101010 LT #0 */  VEC_CMP0 (s, < );
4612         case 0x4D: /* 1001101 HI */     VEC_CMP  (u, > );
4613         case 0x4F: /* 1001111 HS */     VEC_CMP  (u, >= );
4614         case 0x62: /* 1100010 GE #0 */  VEC_CMP0 (s, >= );
4615         case 0x63: /* 1100011 EQ */     VEC_CMP  (u, == );
4616         case 0x66: /* 1100110 LE #0 */  VEC_CMP0 (s, <= );
4617         default:
4618           if (vm == 0)
4619             HALT_NYI;
4620           do_vec_maxv (cpu);
4621         }
4622     }
4623 }
4624
4625 static void
4626 do_vec_SSHL (sim_cpu *cpu)
4627 {
4628   /* instr[31]    = 0
4629      instr[30]    = first part (0)/ second part (1)
4630      instr[29,24] = 00 1110
4631      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4632      instr[21]    = 1
4633      instr[20,16] = Vm
4634      instr[15,10] = 0100 01
4635      instr[9,5]   = Vn
4636      instr[4,0]   = Vd.  */
4637
4638   unsigned full = INSTR (30, 30);
4639   unsigned vm = INSTR (20, 16);
4640   unsigned vn = INSTR (9, 5);
4641   unsigned vd = INSTR (4, 0);
4642   unsigned i;
4643   signed int shift;
4644
4645   NYI_assert (29, 24, 0x0E);
4646   NYI_assert (21, 21, 1);
4647   NYI_assert (15, 10, 0x11);
4648
4649   /* FIXME: What is a signed shift left in this context ?.  */
4650
4651   switch (INSTR (23, 22))
4652     {
4653     case 0:
4654       for (i = 0; i < (full ? 16 : 8); i++)
4655         {
4656           shift = aarch64_get_vec_s8 (cpu, vm, i);
4657           if (shift >= 0)
4658             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4659                                 << shift);
4660           else
4661             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4662                                 >> - shift);
4663         }
4664       return;
4665
4666     case 1:
4667       for (i = 0; i < (full ? 8 : 4); i++)
4668         {
4669           shift = aarch64_get_vec_s8 (cpu, vm, i);
4670           if (shift >= 0)
4671             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4672                                  << shift);
4673           else
4674             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4675                                  >> - shift);
4676         }
4677       return;
4678
4679     case 2:
4680       for (i = 0; i < (full ? 4 : 2); i++)
4681         {
4682           shift = aarch64_get_vec_s8 (cpu, vm, i);
4683           if (shift >= 0)
4684             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4685                                  << shift);
4686           else
4687             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4688                                  >> - shift);
4689         }
4690       return;
4691
4692     case 3:
4693       if (! full)
4694         HALT_UNALLOC;
4695       for (i = 0; i < 2; i++)
4696         {
4697           shift = aarch64_get_vec_s8 (cpu, vm, i);
4698           if (shift >= 0)
4699             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4700                                  << shift);
4701           else
4702             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4703                                  >> - shift);
4704         }
4705       return;
4706     }
4707 }
4708
4709 static void
4710 do_vec_USHL (sim_cpu *cpu)
4711 {
4712   /* instr[31]    = 0
4713      instr[30]    = first part (0)/ second part (1)
4714      instr[29,24] = 10 1110
4715      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4716      instr[21]    = 1
4717      instr[20,16] = Vm
4718      instr[15,10] = 0100 01
4719      instr[9,5]   = Vn
4720      instr[4,0]   = Vd  */
4721
4722   unsigned full = INSTR (30, 30);
4723   unsigned vm = INSTR (20, 16);
4724   unsigned vn = INSTR (9, 5);
4725   unsigned vd = INSTR (4, 0);
4726   unsigned i;
4727   signed int shift;
4728
4729   NYI_assert (29, 24, 0x2E);
4730   NYI_assert (15, 10, 0x11);
4731
4732   switch (INSTR (23, 22))
4733     {
4734     case 0:
4735         for (i = 0; i < (full ? 16 : 8); i++)
4736           {
4737             shift = aarch64_get_vec_s8 (cpu, vm, i);
4738             if (shift >= 0)
4739               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4740                                   << shift);
4741             else
4742               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4743                                   >> - shift);
4744           }
4745       return;
4746
4747     case 1:
4748       for (i = 0; i < (full ? 8 : 4); i++)
4749         {
4750           shift = aarch64_get_vec_s8 (cpu, vm, i);
4751           if (shift >= 0)
4752             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4753                                  << shift);
4754           else
4755             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4756                                  >> - shift);
4757         }
4758       return;
4759
4760     case 2:
4761       for (i = 0; i < (full ? 4 : 2); i++)
4762         {
4763           shift = aarch64_get_vec_s8 (cpu, vm, i);
4764           if (shift >= 0)
4765             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4766                                  << shift);
4767           else
4768             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4769                                  >> - shift);
4770         }
4771       return;
4772
4773     case 3:
4774       if (! full)
4775         HALT_UNALLOC;
4776       for (i = 0; i < 2; i++)
4777         {
4778           shift = aarch64_get_vec_s8 (cpu, vm, i);
4779           if (shift >= 0)
4780             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4781                                  << shift);
4782           else
4783             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4784                                  >> - shift);
4785         }
4786       return;
4787     }
4788 }
4789
4790 static void
4791 do_vec_FMLA (sim_cpu *cpu)
4792 {
4793   /* instr[31]    = 0
4794      instr[30]    = full/half selector
4795      instr[29,23] = 0011100
4796      instr[22]    = size: 0=>float, 1=>double
4797      instr[21]    = 1
4798      instr[20,16] = Vn
4799      instr[15,10] = 1100 11
4800      instr[9,5]   = Vm
4801      instr[4.0]   = Vd.  */
4802
4803   unsigned vm = INSTR (20, 16);
4804   unsigned vn = INSTR (9, 5);
4805   unsigned vd = INSTR (4, 0);
4806   unsigned i;
4807   int      full = INSTR (30, 30);
4808
4809   NYI_assert (29, 23, 0x1C);
4810   NYI_assert (21, 21, 1);
4811   NYI_assert (15, 10, 0x33);
4812
4813   if (INSTR (22, 22))
4814     {
4815       if (! full)
4816         HALT_UNALLOC;
4817       for (i = 0; i < 2; i++)
4818         aarch64_set_vec_double (cpu, vd, i,
4819                                 aarch64_get_vec_double (cpu, vn, i) *
4820                                 aarch64_get_vec_double (cpu, vm, i) +
4821                                 aarch64_get_vec_double (cpu, vd, i));
4822     }
4823   else
4824     {
4825       for (i = 0; i < (full ? 4 : 2); i++)
4826         aarch64_set_vec_float (cpu, vd, i,
4827                                aarch64_get_vec_float (cpu, vn, i) *
4828                                aarch64_get_vec_float (cpu, vm, i) +
4829                                aarch64_get_vec_float (cpu, vd, i));
4830     }
4831 }
4832
4833 static void
4834 do_vec_max (sim_cpu *cpu)
4835 {
4836   /* instr[31]    = 0
4837      instr[30]    = full/half selector
4838      instr[29]    = SMAX (0) / UMAX (1)
4839      instr[28,24] = 0 1110
4840      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4841      instr[21]    = 1
4842      instr[20,16] = Vn
4843      instr[15,10] = 0110 01
4844      instr[9,5]   = Vm
4845      instr[4.0]   = Vd.  */
4846
4847   unsigned vm = INSTR (20, 16);
4848   unsigned vn = INSTR (9, 5);
4849   unsigned vd = INSTR (4, 0);
4850   unsigned i;
4851   int      full = INSTR (30, 30);
4852
4853   NYI_assert (28, 24, 0x0E);
4854   NYI_assert (21, 21, 1);
4855   NYI_assert (15, 10, 0x19);
4856
4857   if (INSTR (29, 29))
4858     {
4859       switch (INSTR (23, 22))
4860         {
4861         case 0:
4862           for (i = 0; i < (full ? 16 : 8); i++)
4863             aarch64_set_vec_u8 (cpu, vd, i,
4864                                 aarch64_get_vec_u8 (cpu, vn, i)
4865                                 > aarch64_get_vec_u8 (cpu, vm, i)
4866                                 ? aarch64_get_vec_u8 (cpu, vn, i)
4867                                 : aarch64_get_vec_u8 (cpu, vm, i));
4868           return;
4869
4870         case 1:
4871           for (i = 0; i < (full ? 8 : 4); i++)
4872             aarch64_set_vec_u16 (cpu, vd, i,
4873                                  aarch64_get_vec_u16 (cpu, vn, i)
4874                                  > aarch64_get_vec_u16 (cpu, vm, i)
4875                                  ? aarch64_get_vec_u16 (cpu, vn, i)
4876                                  : aarch64_get_vec_u16 (cpu, vm, i));
4877           return;
4878
4879         case 2:
4880           for (i = 0; i < (full ? 4 : 2); i++)
4881             aarch64_set_vec_u32 (cpu, vd, i,
4882                                  aarch64_get_vec_u32 (cpu, vn, i)
4883                                  > aarch64_get_vec_u32 (cpu, vm, i)
4884                                  ? aarch64_get_vec_u32 (cpu, vn, i)
4885                                  : aarch64_get_vec_u32 (cpu, vm, i));
4886           return;
4887
4888         case 3:
4889           HALT_UNALLOC;
4890         }
4891     }
4892   else
4893     {
4894       switch (INSTR (23, 22))
4895         {
4896         case 0:
4897           for (i = 0; i < (full ? 16 : 8); i++)
4898             aarch64_set_vec_s8 (cpu, vd, i,
4899                                 aarch64_get_vec_s8 (cpu, vn, i)
4900                                 > aarch64_get_vec_s8 (cpu, vm, i)
4901                                 ? aarch64_get_vec_s8 (cpu, vn, i)
4902                                 : aarch64_get_vec_s8 (cpu, vm, i));
4903           return;
4904
4905         case 1:
4906           for (i = 0; i < (full ? 8 : 4); i++)
4907             aarch64_set_vec_s16 (cpu, vd, i,
4908                                  aarch64_get_vec_s16 (cpu, vn, i)
4909                                  > aarch64_get_vec_s16 (cpu, vm, i)
4910                                  ? aarch64_get_vec_s16 (cpu, vn, i)
4911                                  : aarch64_get_vec_s16 (cpu, vm, i));
4912           return;
4913
4914         case 2:
4915           for (i = 0; i < (full ? 4 : 2); i++)
4916             aarch64_set_vec_s32 (cpu, vd, i,
4917                                  aarch64_get_vec_s32 (cpu, vn, i)
4918                                  > aarch64_get_vec_s32 (cpu, vm, i)
4919                                  ? aarch64_get_vec_s32 (cpu, vn, i)
4920                                  : aarch64_get_vec_s32 (cpu, vm, i));
4921           return;
4922
4923         case 3:
4924           HALT_UNALLOC;
4925         }
4926     }
4927 }
4928
4929 static void
4930 do_vec_min (sim_cpu *cpu)
4931 {
4932   /* instr[31]    = 0
4933      instr[30]    = full/half selector
4934      instr[29]    = SMIN (0) / UMIN (1)
4935      instr[28,24] = 0 1110
4936      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4937      instr[21]    = 1
4938      instr[20,16] = Vn
4939      instr[15,10] = 0110 11
4940      instr[9,5]   = Vm
4941      instr[4.0]   = Vd.  */
4942
4943   unsigned vm = INSTR (20, 16);
4944   unsigned vn = INSTR (9, 5);
4945   unsigned vd = INSTR (4, 0);
4946   unsigned i;
4947   int      full = INSTR (30, 30);
4948
4949   NYI_assert (28, 24, 0x0E);
4950   NYI_assert (21, 21, 1);
4951   NYI_assert (15, 10, 0x1B);
4952
4953   if (INSTR (29, 29))
4954     {
4955       switch (INSTR (23, 22))
4956         {
4957         case 0:
4958           for (i = 0; i < (full ? 16 : 8); i++)
4959             aarch64_set_vec_u8 (cpu, vd, i,
4960                                 aarch64_get_vec_u8 (cpu, vn, i)
4961                                 < aarch64_get_vec_u8 (cpu, vm, i)
4962                                 ? aarch64_get_vec_u8 (cpu, vn, i)
4963                                 : aarch64_get_vec_u8 (cpu, vm, i));
4964           return;
4965
4966         case 1:
4967           for (i = 0; i < (full ? 8 : 4); i++)
4968             aarch64_set_vec_u16 (cpu, vd, i,
4969                                  aarch64_get_vec_u16 (cpu, vn, i)
4970                                  < aarch64_get_vec_u16 (cpu, vm, i)
4971                                  ? aarch64_get_vec_u16 (cpu, vn, i)
4972                                  : aarch64_get_vec_u16 (cpu, vm, i));
4973           return;
4974
4975         case 2:
4976           for (i = 0; i < (full ? 4 : 2); i++)
4977             aarch64_set_vec_u32 (cpu, vd, i,
4978                                  aarch64_get_vec_u32 (cpu, vn, i)
4979                                  < aarch64_get_vec_u32 (cpu, vm, i)
4980                                  ? aarch64_get_vec_u32 (cpu, vn, i)
4981                                  : aarch64_get_vec_u32 (cpu, vm, i));
4982           return;
4983
4984         case 3:
4985           HALT_UNALLOC;
4986         }
4987     }
4988   else
4989     {
4990       switch (INSTR (23, 22))
4991         {
4992         case 0:
4993           for (i = 0; i < (full ? 16 : 8); i++)
4994             aarch64_set_vec_s8 (cpu, vd, i,
4995                                 aarch64_get_vec_s8 (cpu, vn, i)
4996                                 < aarch64_get_vec_s8 (cpu, vm, i)
4997                                 ? aarch64_get_vec_s8 (cpu, vn, i)
4998                                 : aarch64_get_vec_s8 (cpu, vm, i));
4999           return;
5000
5001         case 1:
5002           for (i = 0; i < (full ? 8 : 4); i++)
5003             aarch64_set_vec_s16 (cpu, vd, i,
5004                                  aarch64_get_vec_s16 (cpu, vn, i)
5005                                  < aarch64_get_vec_s16 (cpu, vm, i)
5006                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5007                                  : aarch64_get_vec_s16 (cpu, vm, i));
5008           return;
5009
5010         case 2:
5011           for (i = 0; i < (full ? 4 : 2); i++)
5012             aarch64_set_vec_s32 (cpu, vd, i,
5013                                  aarch64_get_vec_s32 (cpu, vn, i)
5014                                  < aarch64_get_vec_s32 (cpu, vm, i)
5015                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5016                                  : aarch64_get_vec_s32 (cpu, vm, i));
5017           return;
5018
5019         case 3:
5020           HALT_UNALLOC;
5021         }
5022     }
5023 }
5024
5025 static void
5026 do_vec_sub_long (sim_cpu *cpu)
5027 {
5028   /* instr[31]    = 0
5029      instr[30]    = lower (0) / upper (1)
5030      instr[29]    = signed (0) / unsigned (1)
5031      instr[28,24] = 0 1110
5032      instr[23,22] = size: bytes (00), half (01), word (10)
5033      instr[21]    = 1
5034      insrt[20,16] = Vm
5035      instr[15,10] = 0010 00
5036      instr[9,5]   = Vn
5037      instr[4,0]   = V dest.  */
5038
5039   unsigned size = INSTR (23, 22);
5040   unsigned vm = INSTR (20, 16);
5041   unsigned vn = INSTR (9, 5);
5042   unsigned vd = INSTR (4, 0);
5043   unsigned bias = 0;
5044   unsigned i;
5045
5046   NYI_assert (28, 24, 0x0E);
5047   NYI_assert (21, 21, 1);
5048   NYI_assert (15, 10, 0x08);
5049
5050   if (size == 3)
5051     HALT_UNALLOC;
5052
5053   switch (INSTR (30, 29))
5054     {
5055     case 2: /* SSUBL2.  */
5056       bias = 2;
5057     case 0: /* SSUBL.  */
5058       switch (size)
5059         {
5060         case 0:
5061           bias *= 3;
5062           for (i = 0; i < 8; i++)
5063             aarch64_set_vec_s16 (cpu, vd, i,
5064                                  aarch64_get_vec_s8 (cpu, vn, i + bias)
5065                                  - aarch64_get_vec_s8 (cpu, vm, i + bias));
5066           break;
5067
5068         case 1:
5069           bias *= 2;
5070           for (i = 0; i < 4; i++)
5071             aarch64_set_vec_s32 (cpu, vd, i,
5072                                  aarch64_get_vec_s16 (cpu, vn, i + bias)
5073                                  - aarch64_get_vec_s16 (cpu, vm, i + bias));
5074           break;
5075
5076         case 2:
5077           for (i = 0; i < 2; i++)
5078             aarch64_set_vec_s64 (cpu, vd, i,
5079                                  aarch64_get_vec_s32 (cpu, vn, i + bias)
5080                                  - aarch64_get_vec_s32 (cpu, vm, i + bias));
5081           break;
5082
5083         default:
5084           HALT_UNALLOC;
5085         }
5086       break;
5087
5088     case 3: /* USUBL2.  */
5089       bias = 2;
5090     case 1: /* USUBL.  */
5091       switch (size)
5092         {
5093         case 0:
5094           bias *= 3;
5095           for (i = 0; i < 8; i++)
5096             aarch64_set_vec_u16 (cpu, vd, i,
5097                                  aarch64_get_vec_u8 (cpu, vn, i + bias)
5098                                  - aarch64_get_vec_u8 (cpu, vm, i + bias));
5099           break;
5100
5101         case 1:
5102           bias *= 2;
5103           for (i = 0; i < 4; i++)
5104             aarch64_set_vec_u32 (cpu, vd, i,
5105                                  aarch64_get_vec_u16 (cpu, vn, i + bias)
5106                                  - aarch64_get_vec_u16 (cpu, vm, i + bias));
5107           break;
5108
5109         case 2:
5110           for (i = 0; i < 2; i++)
5111             aarch64_set_vec_u64 (cpu, vd, i,
5112                                  aarch64_get_vec_u32 (cpu, vn, i + bias)
5113                                  - aarch64_get_vec_u32 (cpu, vm, i + bias));
5114           break;
5115
5116         default:
5117           HALT_UNALLOC;
5118         }
5119       break;
5120     }
5121 }
5122
5123 static void
5124 do_vec_ADDP (sim_cpu *cpu)
5125 {
5126   /* instr[31]    = 0
5127      instr[30]    = half(0)/full(1)
5128      instr[29,24] = 00 1110
5129      instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5130      instr[21]    = 1
5131      insrt[20,16] = Vm
5132      instr[15,10] = 1011 11
5133      instr[9,5]   = Vn
5134      instr[4,0]   = V dest.  */
5135
5136   FRegister copy_vn;
5137   FRegister copy_vm;
5138   unsigned full = INSTR (30, 30);
5139   unsigned size = INSTR (23, 22);
5140   unsigned vm = INSTR (20, 16);
5141   unsigned vn = INSTR (9, 5);
5142   unsigned vd = INSTR (4, 0);
5143   unsigned i, range;
5144
5145   NYI_assert (29, 24, 0x0E);
5146   NYI_assert (21, 21, 1);
5147   NYI_assert (15, 10, 0x2F);
5148
5149   /* Make copies of the source registers in case vd == vn/vm.  */
5150   copy_vn = cpu->fr[vn];
5151   copy_vm = cpu->fr[vm];
5152
5153   switch (size)
5154     {
5155     case 0:
5156       range = full ? 8 : 4;
5157       for (i = 0; i < range; i++)
5158         {
5159           aarch64_set_vec_u8 (cpu, vd, i,
5160                               copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5161           aarch64_set_vec_u8 (cpu, vd, i + range,
5162                               copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5163         }
5164       return;
5165
5166     case 1:
5167       range = full ? 4 : 2;
5168       for (i = 0; i < range; i++)
5169         {
5170           aarch64_set_vec_u16 (cpu, vd, i,
5171                                copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5172           aarch64_set_vec_u16 (cpu, vd, i + range,
5173                                copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5174         }
5175       return;
5176
5177     case 2:
5178       range = full ? 2 : 1;
5179       for (i = 0; i < range; i++)
5180         {
5181           aarch64_set_vec_u32 (cpu, vd, i,
5182                                copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5183           aarch64_set_vec_u32 (cpu, vd, i + range,
5184                                copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5185         }
5186       return;
5187
5188     case 3:
5189       if (! full)
5190         HALT_UNALLOC;
5191       aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5192       aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5193       return;
5194     }
5195 }
5196
5197 static void
5198 do_vec_UMOV (sim_cpu *cpu)
5199 {
5200   /* instr[31]    = 0
5201      instr[30]    = 32-bit(0)/64-bit(1)
5202      instr[29,21] = 00 1110 000
5203      insrt[20,16] = size & index
5204      instr[15,10] = 0011 11
5205      instr[9,5]   = V source
5206      instr[4,0]   = R dest.  */
5207
5208   unsigned vs = INSTR (9, 5);
5209   unsigned rd = INSTR (4, 0);
5210   unsigned index;
5211
5212   NYI_assert (29, 21, 0x070);
5213   NYI_assert (15, 10, 0x0F);
5214
5215   if (INSTR (16, 16))
5216     {
5217       /* Byte transfer.  */
5218       index = INSTR (20, 17);
5219       aarch64_set_reg_u64 (cpu, rd, NO_SP,
5220                            aarch64_get_vec_u8 (cpu, vs, index));
5221     }
5222   else if (INSTR (17, 17))
5223     {
5224       index = INSTR (20, 18);
5225       aarch64_set_reg_u64 (cpu, rd, NO_SP,
5226                            aarch64_get_vec_u16 (cpu, vs, index));
5227     }
5228   else if (INSTR (18, 18))
5229     {
5230       index = INSTR (20, 19);
5231       aarch64_set_reg_u64 (cpu, rd, NO_SP,
5232                            aarch64_get_vec_u32 (cpu, vs, index));
5233     }
5234   else
5235     {
5236       if (INSTR (30, 30) != 1)
5237         HALT_UNALLOC;
5238
5239       index = INSTR (20, 20);
5240       aarch64_set_reg_u64 (cpu, rd, NO_SP,
5241                            aarch64_get_vec_u64 (cpu, vs, index));
5242     }
5243 }
5244
5245 static void
5246 do_vec_FABS (sim_cpu *cpu)
5247 {
5248   /* instr[31]    = 0
5249      instr[30]    = half(0)/full(1)
5250      instr[29,23] = 00 1110 1
5251      instr[22]    = float(0)/double(1)
5252      instr[21,16] = 10 0000
5253      instr[15,10] = 1111 10
5254      instr[9,5]   = Vn
5255      instr[4,0]   = Vd.  */
5256
5257   unsigned vn = INSTR (9, 5);
5258   unsigned vd = INSTR (4, 0);
5259   unsigned full = INSTR (30, 30);
5260   unsigned i;
5261
5262   NYI_assert (29, 23, 0x1D);
5263   NYI_assert (21, 10, 0x83E);
5264
5265   if (INSTR (22, 22))
5266     {
5267       if (! full)
5268         HALT_NYI;
5269
5270       for (i = 0; i < 2; i++)
5271         aarch64_set_vec_double (cpu, vd, i,
5272                                 fabs (aarch64_get_vec_double (cpu, vn, i)));
5273     }
5274   else
5275     {
5276       for (i = 0; i < (full ? 4 : 2); i++)
5277         aarch64_set_vec_float (cpu, vd, i,
5278                                fabsf (aarch64_get_vec_float (cpu, vn, i)));
5279     }
5280 }
5281
5282 static void
5283 do_vec_FCVTZS (sim_cpu *cpu)
5284 {
5285   /* instr[31]    = 0
5286      instr[30]    = half (0) / all (1)
5287      instr[29,23] = 00 1110 1
5288      instr[22]    = single (0) / double (1)
5289      instr[21,10] = 10 0001 1011 10
5290      instr[9,5]   = Rn
5291      instr[4,0]   = Rd.  */
5292
5293   unsigned rn = INSTR (9, 5);
5294   unsigned rd = INSTR (4, 0);
5295   unsigned full = INSTR (30, 30);
5296   unsigned i;
5297
5298   NYI_assert (31, 31, 0);
5299   NYI_assert (29, 23, 0x1D);
5300   NYI_assert (21, 10, 0x86E);
5301
5302   if (INSTR (22, 22))
5303     {
5304       if (! full)
5305         HALT_UNALLOC;
5306
5307       for (i = 0; i < 2; i++)
5308         aarch64_set_vec_s64 (cpu, rd, i,
5309                              (int64_t) aarch64_get_vec_double (cpu, rn, i));
5310     }
5311   else
5312     for (i = 0; i < (full ? 4 : 2); i++)
5313       aarch64_set_vec_s32 (cpu, rd, i,
5314                            (int32_t) aarch64_get_vec_float (cpu, rn, i));
5315 }
5316
5317 static void
5318 do_vec_op1 (sim_cpu *cpu)
5319 {
5320   /* instr[31]    = 0
5321      instr[30]    = half/full
5322      instr[29,24] = 00 1110
5323      instr[23,21] = ???
5324      instr[20,16] = Vm
5325      instr[15,10] = sub-opcode
5326      instr[9,5]   = Vn
5327      instr[4,0]   = Vd  */
5328   NYI_assert (29, 24, 0x0E);
5329
5330   if (INSTR (21, 21) == 0)
5331     {
5332       if (INSTR (23, 22) == 0)
5333         {
5334           if (INSTR (30, 30) == 1
5335               && INSTR (17, 14) == 0
5336               && INSTR (12, 10) == 7)
5337             return do_vec_ins_2 (cpu);
5338
5339           switch (INSTR (15, 10))
5340             {
5341             case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5342             case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5343             case 0x07: do_vec_INS (cpu); return;
5344             case 0x0A: do_vec_TRN (cpu); return;
5345
5346             case 0x0F:
5347               if (INSTR (17, 16) == 0)
5348                 {
5349                   do_vec_MOV_into_scalar (cpu);
5350                   return;
5351                 }
5352               break;
5353
5354             case 0x00:
5355             case 0x08:
5356             case 0x10:
5357             case 0x18:
5358               do_vec_TBL (cpu); return;
5359
5360             case 0x06:
5361             case 0x16:
5362               do_vec_UZP (cpu); return;
5363
5364             case 0x0E:
5365             case 0x1E:
5366               do_vec_ZIP (cpu); return;
5367
5368             default:
5369               HALT_NYI;
5370             }
5371         }
5372
5373       switch (INSTR (13, 10))
5374         {
5375         case 0x6: do_vec_UZP (cpu); return;
5376         case 0xE: do_vec_ZIP (cpu); return;
5377         case 0xA: do_vec_TRN (cpu); return;
5378         case 0xF: do_vec_UMOV (cpu); return;
5379         default:  HALT_NYI;
5380         }
5381     }
5382
5383   switch (INSTR (15, 10))
5384     {
5385     case 0x07:
5386       switch (INSTR (23, 21))
5387         {
5388         case 1: do_vec_AND (cpu); return;
5389         case 3: do_vec_BIC (cpu); return;
5390         case 5: do_vec_ORR (cpu); return;
5391         case 7: do_vec_ORN (cpu); return;
5392         default: HALT_NYI;
5393         }
5394
5395     case 0x08: do_vec_sub_long (cpu); return;
5396     case 0x0a: do_vec_XTN (cpu); return;
5397     case 0x11: do_vec_SSHL (cpu); return;
5398     case 0x19: do_vec_max (cpu); return;
5399     case 0x1B: do_vec_min (cpu); return;
5400     case 0x21: do_vec_add (cpu); return;
5401     case 0x25: do_vec_MLA (cpu); return;
5402     case 0x27: do_vec_mul (cpu); return;
5403     case 0x2F: do_vec_ADDP (cpu); return;
5404     case 0x30: do_vec_mull (cpu); return;
5405     case 0x33: do_vec_FMLA (cpu); return;
5406     case 0x35: do_vec_fadd (cpu); return;
5407
5408     case 0x2E:
5409       switch (INSTR (20, 16))
5410         {
5411         case 0x00: do_vec_ABS (cpu); return;
5412         case 0x01: do_vec_FCVTZS (cpu); return;
5413         case 0x11: do_vec_ADDV (cpu); return;
5414         default: HALT_NYI;
5415         }
5416
5417     case 0x31:
5418     case 0x3B:
5419       do_vec_Fminmax (cpu); return;
5420
5421     case 0x0D:
5422     case 0x0F:
5423     case 0x22:
5424     case 0x23:
5425     case 0x26:
5426     case 0x2A:
5427     case 0x32:
5428     case 0x36:
5429     case 0x39:
5430     case 0x3A:
5431       do_vec_compare (cpu); return;
5432
5433     case 0x3E:
5434       do_vec_FABS (cpu); return;
5435
5436     default:
5437       HALT_NYI;
5438     }
5439 }
5440
5441 static void
5442 do_vec_xtl (sim_cpu *cpu)
5443 {
5444   /* instr[31]    = 0
5445      instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5446      instr[28,22] = 0 1111 00
5447      instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5448      instr[15,10] = 1010 01
5449      instr[9,5]   = V source
5450      instr[4,0]   = V dest.  */
5451
5452   unsigned vs = INSTR (9, 5);
5453   unsigned vd = INSTR (4, 0);
5454   unsigned i, shift, bias = 0;
5455
5456   NYI_assert (28, 22, 0x3C);
5457   NYI_assert (15, 10, 0x29);
5458
5459   switch (INSTR (30, 29))
5460     {
5461     case 2: /* SXTL2, SSHLL2.  */
5462       bias = 2;
5463     case 0: /* SXTL, SSHLL.  */
5464       if (INSTR (21, 21))
5465         {
5466           shift = INSTR (20, 16);
5467           aarch64_set_vec_s64
5468             (cpu, vd, 0, aarch64_get_vec_s32 (cpu, vs, bias) << shift);
5469           aarch64_set_vec_s64
5470             (cpu, vd, 1, aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift);
5471         }
5472       else if (INSTR (20, 20))
5473         {
5474           shift = INSTR (19, 16);
5475           bias *= 2;
5476           for (i = 0; i < 4; i++)
5477             aarch64_set_vec_s32
5478               (cpu, vd, i, aarch64_get_vec_s16 (cpu, vs, i + bias) << shift);
5479         }
5480       else
5481         {
5482           NYI_assert (19, 19, 1);
5483
5484           shift = INSTR (18, 16);
5485           bias *= 3;
5486           for (i = 0; i < 8; i++)
5487             aarch64_set_vec_s16
5488               (cpu, vd, i, aarch64_get_vec_s8 (cpu, vs, i + bias) << shift);
5489         }
5490       return;
5491
5492     case 3: /* UXTL2, USHLL2.  */
5493       bias = 2;
5494     case 1: /* UXTL, USHLL.  */
5495       if (INSTR (21, 21))
5496         {
5497           shift = INSTR (20, 16);
5498           aarch64_set_vec_u64
5499             (cpu, vd, 0, aarch64_get_vec_u32 (cpu, vs, bias) << shift);
5500           aarch64_set_vec_u64
5501             (cpu, vd, 1, aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift);
5502         }
5503       else if (INSTR (20, 20))
5504         {
5505           shift = INSTR (19, 16);
5506           bias *= 2;
5507           for (i = 0; i < 4; i++)
5508             aarch64_set_vec_u32
5509               (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i + bias) << shift);
5510         }
5511       else
5512         {
5513           NYI_assert (19, 19, 1);
5514
5515           shift = INSTR (18, 16);
5516           bias *= 3;
5517           for (i = 0; i < 8; i++)
5518             aarch64_set_vec_u16
5519               (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, i + bias) << shift);
5520         }
5521       return;
5522     }
5523 }
5524
5525 static void
5526 do_vec_SHL (sim_cpu *cpu)
5527 {
5528   /* instr [31]    = 0
5529      instr [30]    = half(0)/full(1)
5530      instr [29,23] = 001 1110
5531      instr [22,16] = size and shift amount
5532      instr [15,10] = 01 0101
5533      instr [9, 5]  = Vs
5534      instr [4, 0]  = Vd.  */
5535
5536   int shift;
5537   int full    = INSTR (30, 30);
5538   unsigned vs = INSTR (9, 5);
5539   unsigned vd = INSTR (4, 0);
5540   unsigned i;
5541
5542   NYI_assert (29, 23, 0x1E);
5543   NYI_assert (15, 10, 0x15);
5544
5545   if (INSTR (22, 22))
5546     {
5547       shift = INSTR (21, 16);
5548
5549       if (full == 0)
5550         HALT_UNALLOC;
5551
5552       for (i = 0; i < 2; i++)
5553         {
5554           uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5555           aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5556         }
5557
5558       return;
5559     }
5560
5561   if (INSTR (21, 21))
5562     {
5563       shift = INSTR (20, 16);
5564
5565       for (i = 0; i < (full ? 4 : 2); i++)
5566         {
5567           uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5568           aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5569         }
5570
5571       return;
5572     }
5573
5574   if (INSTR (20, 20))
5575     {
5576       shift = INSTR (19, 16);
5577
5578       for (i = 0; i < (full ? 8 : 4); i++)
5579         {
5580           uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5581           aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5582         }
5583
5584       return;
5585     }
5586
5587   if (INSTR (19, 19) == 0)
5588     HALT_UNALLOC;
5589
5590   shift = INSTR (18, 16);
5591
5592   for (i = 0; i < (full ? 16 : 8); i++)
5593     {
5594       uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5595       aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5596     }
5597 }
5598
5599 static void
5600 do_vec_SSHR_USHR (sim_cpu *cpu)
5601 {
5602   /* instr [31]    = 0
5603      instr [30]    = half(0)/full(1)
5604      instr [29]    = signed(0)/unsigned(1)
5605      instr [28,23] = 0 1111 0
5606      instr [22,16] = size and shift amount
5607      instr [15,10] = 0000 01
5608      instr [9, 5]  = Vs
5609      instr [4, 0]  = Vd.  */
5610
5611   int full       = INSTR (30, 30);
5612   int sign       = ! INSTR (29, 29);
5613   unsigned shift = INSTR (22, 16);
5614   unsigned vs    = INSTR (9, 5);
5615   unsigned vd    = INSTR (4, 0);
5616   unsigned i;
5617
5618   NYI_assert (28, 23, 0x1E);
5619   NYI_assert (15, 10, 0x01);
5620
5621   if (INSTR (22, 22))
5622     {
5623       shift = 128 - shift;
5624
5625       if (full == 0)
5626         HALT_UNALLOC;
5627
5628       if (sign)
5629         for (i = 0; i < 2; i++)
5630           {
5631             int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5632             aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5633           }
5634       else
5635         for (i = 0; i < 2; i++)
5636           {
5637             uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5638             aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5639           }
5640
5641       return;
5642     }
5643
5644   if (INSTR (21, 21))
5645     {
5646       shift = 64 - shift;
5647
5648       if (sign)
5649         for (i = 0; i < (full ? 4 : 2); i++)
5650           {
5651             int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5652             aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5653           }
5654       else
5655         for (i = 0; i < (full ? 4 : 2); i++)
5656           {
5657             uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5658             aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5659           }
5660
5661       return;
5662     }
5663
5664   if (INSTR (20, 20))
5665     {
5666       shift = 32 - shift;
5667
5668       if (sign)
5669         for (i = 0; i < (full ? 8 : 4); i++)
5670           {
5671             int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5672             aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5673           }
5674       else
5675         for (i = 0; i < (full ? 8 : 4); i++)
5676           {
5677             uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5678             aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5679           }
5680
5681       return;
5682     }
5683
5684   if (INSTR (19, 19) == 0)
5685     HALT_UNALLOC;
5686
5687   shift = 16 - shift;
5688
5689   if (sign)
5690     for (i = 0; i < (full ? 16 : 8); i++)
5691       {
5692         int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5693         aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5694       }
5695   else
5696     for (i = 0; i < (full ? 16 : 8); i++)
5697       {
5698         uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5699         aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5700       }
5701 }
5702
5703 static void
5704 do_vec_MUL_by_element (sim_cpu *cpu)
5705 {
5706   /* instr[31]    = 0
5707      instr[30]    = half/full
5708      instr[29,24] = 00 1111
5709      instr[23,22] = size
5710      instr[21]    = L
5711      instr[20]    = M
5712      instr[19,16] = m
5713      instr[15,12] = 1000
5714      instr[11]    = H
5715      instr[10]    = 0
5716      instr[9,5]   = Vn
5717      instr[4,0]   = Vd  */
5718
5719   unsigned full     = INSTR (30, 30);
5720   unsigned L        = INSTR (21, 21);
5721   unsigned H        = INSTR (11, 11);
5722   unsigned vn       = INSTR (9, 5);
5723   unsigned vd       = INSTR (4, 0);
5724   unsigned size     = INSTR (23, 22);
5725   unsigned index;
5726   unsigned vm;
5727   unsigned e;
5728
5729   NYI_assert (29, 24, 0x0F);
5730   NYI_assert (15, 12, 0x8);
5731   NYI_assert (10, 10, 0);
5732
5733   switch (size)
5734     {
5735     case 1:
5736       {
5737         /* 16 bit products.  */
5738         uint16_t product;
5739         uint16_t element1;
5740         uint16_t element2;
5741
5742         index = (H << 2) | (L << 1) | INSTR (20, 20);
5743         vm = INSTR (19, 16);
5744         element2 = aarch64_get_vec_u16 (cpu, vm, index);
5745
5746         for (e = 0; e < (full ? 8 : 4); e ++)
5747           {
5748             element1 = aarch64_get_vec_u16 (cpu, vn, e);
5749             product  = element1 * element2;
5750             aarch64_set_vec_u16 (cpu, vd, e, product);
5751           }
5752       }
5753       break;
5754
5755     case 2:
5756       {
5757         /* 32 bit products.  */
5758         uint32_t product;
5759         uint32_t element1;
5760         uint32_t element2;
5761
5762         index = (H << 1) | L;
5763         vm = INSTR (20, 16);
5764         element2 = aarch64_get_vec_u32 (cpu, vm, index);
5765
5766         for (e = 0; e < (full ? 4 : 2); e ++)
5767           {
5768             element1 = aarch64_get_vec_u32 (cpu, vn, e);
5769             product  = element1 * element2;
5770             aarch64_set_vec_u32 (cpu, vd, e, product);
5771           }
5772       }
5773       break;
5774
5775     default:
5776       HALT_UNALLOC;
5777     }
5778 }
5779
5780 static void
5781 do_vec_op2 (sim_cpu *cpu)
5782 {
5783   /* instr[31]    = 0
5784      instr[30]    = half/full
5785      instr[29,24] = 00 1111
5786      instr[23]    = ?
5787      instr[22,16] = element size & index
5788      instr[15,10] = sub-opcode
5789      instr[9,5]   = Vm
5790      instr[4,0]   = Vd  */
5791
5792   NYI_assert (29, 24, 0x0F);
5793
5794   if (INSTR (23, 23) != 0)
5795     {
5796       switch (INSTR (15, 10))
5797         {
5798         case 0x20:
5799         case 0x22: do_vec_MUL_by_element (cpu); return;
5800         default:   HALT_NYI;
5801         }
5802     }
5803   else
5804     {
5805       switch (INSTR (15, 10))
5806         {
5807         case 0x01: do_vec_SSHR_USHR (cpu); return;
5808         case 0x15: do_vec_SHL (cpu); return;
5809         case 0x20:
5810         case 0x22: do_vec_MUL_by_element (cpu); return;
5811         case 0x29: do_vec_xtl (cpu); return;
5812         default:   HALT_NYI;
5813         }
5814     }
5815 }
5816
5817 static void
5818 do_vec_neg (sim_cpu *cpu)
5819 {
5820   /* instr[31]    = 0
5821      instr[30]    = full(1)/half(0)
5822      instr[29,24] = 10 1110
5823      instr[23,22] = size: byte(00), half (01), word (10), long (11)
5824      instr[21,10] = 1000 0010 1110
5825      instr[9,5]   = Vs
5826      instr[4,0]   = Vd  */
5827
5828   int    full = INSTR (30, 30);
5829   unsigned vs = INSTR (9, 5);
5830   unsigned vd = INSTR (4, 0);
5831   unsigned i;
5832
5833   NYI_assert (29, 24, 0x2E);
5834   NYI_assert (21, 10, 0x82E);
5835
5836   switch (INSTR (23, 22))
5837     {
5838     case 0:
5839       for (i = 0; i < (full ? 16 : 8); i++)
5840         aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
5841       return;
5842
5843     case 1:
5844       for (i = 0; i < (full ? 8 : 4); i++)
5845         aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
5846       return;
5847
5848     case 2:
5849       for (i = 0; i < (full ? 4 : 2); i++)
5850         aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
5851       return;
5852
5853     case 3:
5854       if (! full)
5855         HALT_NYI;
5856       for (i = 0; i < 2; i++)
5857         aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
5858       return;
5859     }
5860 }
5861
5862 static void
5863 do_vec_sqrt (sim_cpu *cpu)
5864 {
5865   /* instr[31]    = 0
5866      instr[30]    = full(1)/half(0)
5867      instr[29,23] = 101 1101
5868      instr[22]    = single(0)/double(1)
5869      instr[21,10] = 1000 0111 1110
5870      instr[9,5]   = Vs
5871      instr[4,0]   = Vd.  */
5872
5873   int    full = INSTR (30, 30);
5874   unsigned vs = INSTR (9, 5);
5875   unsigned vd = INSTR (4, 0);
5876   unsigned i;
5877
5878   NYI_assert (29, 23, 0x5B);
5879   NYI_assert (21, 10, 0x87E);
5880
5881   if (INSTR (22, 22) == 0)
5882     for (i = 0; i < (full ? 4 : 2); i++)
5883       aarch64_set_vec_float (cpu, vd, i,
5884                              sqrtf (aarch64_get_vec_float (cpu, vs, i)));
5885   else
5886     for (i = 0; i < 2; i++)
5887       aarch64_set_vec_double (cpu, vd, i,
5888                               sqrt (aarch64_get_vec_double (cpu, vs, i)));
5889 }
5890
5891 static void
5892 do_vec_mls_indexed (sim_cpu *cpu)
5893 {
5894   /* instr[31]       = 0
5895      instr[30]       = half(0)/full(1)
5896      instr[29,24]    = 10 1111
5897      instr[23,22]    = 16-bit(01)/32-bit(10)
5898      instr[21,20+11] = index (if 16-bit)
5899      instr[21+11]    = index (if 32-bit)
5900      instr[20,16]    = Vm
5901      instr[15,12]    = 0100
5902      instr[11]       = part of index
5903      instr[10]       = 0
5904      instr[9,5]      = Vs
5905      instr[4,0]      = Vd.  */
5906
5907   int    full = INSTR (30, 30);
5908   unsigned vs = INSTR (9, 5);
5909   unsigned vd = INSTR (4, 0);
5910   unsigned vm = INSTR (20, 16);
5911   unsigned i;
5912
5913   NYI_assert (15, 12, 4);
5914   NYI_assert (10, 10, 0);
5915
5916   switch (INSTR (23, 22))
5917     {
5918     case 1:
5919       {
5920         unsigned elem;
5921         uint32_t val;
5922
5923         if (vm > 15)
5924           HALT_NYI;
5925
5926         elem = (INSTR (21, 20) << 1)
5927           | INSTR (11, 11);
5928         val = aarch64_get_vec_u16 (cpu, vm, elem);
5929
5930         for (i = 0; i < (full ? 8 : 4); i++)
5931           aarch64_set_vec_u32 (cpu, vd, i,
5932                                aarch64_get_vec_u32 (cpu, vd, i) -
5933                                (aarch64_get_vec_u32 (cpu, vs, i) * val));
5934         return;
5935       }
5936
5937     case 2:
5938       {
5939         unsigned elem = (INSTR (21, 21) << 1)
5940           | INSTR (11, 11);
5941         uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
5942
5943         for (i = 0; i < (full ? 4 : 2); i++)
5944           aarch64_set_vec_u64 (cpu, vd, i,
5945                                aarch64_get_vec_u64 (cpu, vd, i) -
5946                                (aarch64_get_vec_u64 (cpu, vs, i) * val));
5947         return;
5948       }
5949
5950     case 0:
5951     case 3:
5952     default:
5953       HALT_NYI;
5954     }
5955 }
5956
5957 static void
5958 do_vec_SUB (sim_cpu *cpu)
5959 {
5960   /* instr [31]    = 0
5961      instr [30]    = half(0)/full(1)
5962      instr [29,24] = 10 1110
5963      instr [23,22] = size: byte(00, half(01), word (10), long (11)
5964      instr [21]    = 1
5965      instr [20,16] = Vm
5966      instr [15,10] = 10 0001
5967      instr [9, 5]  = Vn
5968      instr [4, 0]  = Vd.  */
5969
5970   unsigned full = INSTR (30, 30);
5971   unsigned vm = INSTR (20, 16);
5972   unsigned vn = INSTR (9, 5);
5973   unsigned vd = INSTR (4, 0);
5974   unsigned i;
5975
5976   NYI_assert (29, 24, 0x2E);
5977   NYI_assert (21, 21, 1);
5978   NYI_assert (15, 10, 0x21);
5979
5980   switch (INSTR (23, 22))
5981     {
5982     case 0:
5983       for (i = 0; i < (full ? 16 : 8); i++)
5984         aarch64_set_vec_s8 (cpu, vd, i,
5985                             aarch64_get_vec_s8 (cpu, vn, i)
5986                             - aarch64_get_vec_s8 (cpu, vm, i));
5987       return;
5988
5989     case 1:
5990       for (i = 0; i < (full ? 8 : 4); i++)
5991         aarch64_set_vec_s16 (cpu, vd, i,
5992                              aarch64_get_vec_s16 (cpu, vn, i)
5993                              - aarch64_get_vec_s16 (cpu, vm, i));
5994       return;
5995
5996     case 2:
5997       for (i = 0; i < (full ? 4 : 2); i++)
5998         aarch64_set_vec_s32 (cpu, vd, i,
5999                              aarch64_get_vec_s32 (cpu, vn, i)
6000                              - aarch64_get_vec_s32 (cpu, vm, i));
6001       return;
6002
6003     case 3:
6004       if (full == 0)
6005         HALT_UNALLOC;
6006
6007       for (i = 0; i < 2; i++)
6008         aarch64_set_vec_s64 (cpu, vd, i,
6009                              aarch64_get_vec_s64 (cpu, vn, i)
6010                              - aarch64_get_vec_s64 (cpu, vm, i));
6011       return;
6012     }
6013 }
6014
6015 static void
6016 do_vec_MLS (sim_cpu *cpu)
6017 {
6018   /* instr [31]    = 0
6019      instr [30]    = half(0)/full(1)
6020      instr [29,24] = 10 1110
6021      instr [23,22] = size: byte(00, half(01), word (10)
6022      instr [21]    = 1
6023      instr [20,16] = Vm
6024      instr [15,10] = 10 0101
6025      instr [9, 5]  = Vn
6026      instr [4, 0]  = Vd.  */
6027
6028   unsigned full = INSTR (30, 30);
6029   unsigned vm = INSTR (20, 16);
6030   unsigned vn = INSTR (9, 5);
6031   unsigned vd = INSTR (4, 0);
6032   unsigned i;
6033
6034   NYI_assert (29, 24, 0x2E);
6035   NYI_assert (21, 21, 1);
6036   NYI_assert (15, 10, 0x25);
6037
6038   switch (INSTR (23, 22))
6039     {
6040     case 0:
6041       for (i = 0; i < (full ? 16 : 8); i++)
6042         aarch64_set_vec_u8 (cpu, vd, i,
6043                             (aarch64_get_vec_u8 (cpu, vn, i)
6044                              * aarch64_get_vec_u8 (cpu, vm, i))
6045                             - aarch64_get_vec_u8 (cpu, vd, i));
6046       return;
6047
6048     case 1:
6049       for (i = 0; i < (full ? 8 : 4); i++)
6050         aarch64_set_vec_u16 (cpu, vd, i,
6051                              (aarch64_get_vec_u16 (cpu, vn, i)
6052                               * aarch64_get_vec_u16 (cpu, vm, i))
6053                              - aarch64_get_vec_u16 (cpu, vd, i));
6054       return;
6055
6056     case 2:
6057       for (i = 0; i < (full ? 4 : 2); i++)
6058         aarch64_set_vec_u32 (cpu, vd, i,
6059                              (aarch64_get_vec_u32 (cpu, vn, i)
6060                               * aarch64_get_vec_u32 (cpu, vm, i))
6061                              - aarch64_get_vec_u32 (cpu, vd, i));
6062       return;
6063
6064     default:
6065       HALT_UNALLOC;
6066     }
6067 }
6068
6069 static void
6070 do_vec_FDIV (sim_cpu *cpu)
6071 {
6072   /* instr [31]    = 0
6073      instr [30]    = half(0)/full(1)
6074      instr [29,23] = 10 1110 0
6075      instr [22]    = float()/double(1)
6076      instr [21]    = 1
6077      instr [20,16] = Vm
6078      instr [15,10] = 1111 11
6079      instr [9, 5]  = Vn
6080      instr [4, 0]  = Vd.  */
6081
6082   unsigned full = INSTR (30, 30);
6083   unsigned vm = INSTR (20, 16);
6084   unsigned vn = INSTR (9, 5);
6085   unsigned vd = INSTR (4, 0);
6086   unsigned i;
6087
6088   NYI_assert (29, 23, 0x5C);
6089   NYI_assert (21, 21, 1);
6090   NYI_assert (15, 10, 0x3F);
6091
6092   if (INSTR (22, 22))
6093     {
6094       if (! full)
6095         HALT_UNALLOC;
6096
6097       for (i = 0; i < 2; i++)
6098         aarch64_set_vec_double (cpu, vd, i,
6099                                 aarch64_get_vec_double (cpu, vn, i)
6100                                 / aarch64_get_vec_double (cpu, vm, i));
6101     }
6102   else
6103     for (i = 0; i < (full ? 4 : 2); i++)
6104       aarch64_set_vec_float (cpu, vd, i,
6105                              aarch64_get_vec_float (cpu, vn, i)
6106                              / aarch64_get_vec_float (cpu, vm, i));
6107 }
6108
6109 static void
6110 do_vec_FMUL (sim_cpu *cpu)
6111 {
6112   /* instr [31]    = 0
6113      instr [30]    = half(0)/full(1)
6114      instr [29,23] = 10 1110 0
6115      instr [22]    = float(0)/double(1)
6116      instr [21]    = 1
6117      instr [20,16] = Vm
6118      instr [15,10] = 1101 11
6119      instr [9, 5]  = Vn
6120      instr [4, 0]  = Vd.  */
6121
6122   unsigned full = INSTR (30, 30);
6123   unsigned vm = INSTR (20, 16);
6124   unsigned vn = INSTR (9, 5);
6125   unsigned vd = INSTR (4, 0);
6126   unsigned i;
6127
6128   NYI_assert (29, 23, 0x5C);
6129   NYI_assert (21, 21, 1);
6130   NYI_assert (15, 10, 0x37);
6131
6132   if (INSTR (22, 22))
6133     {
6134       if (! full)
6135         HALT_UNALLOC;
6136
6137       for (i = 0; i < 2; i++)
6138         aarch64_set_vec_double (cpu, vd, i,
6139                                 aarch64_get_vec_double (cpu, vn, i)
6140                                 * aarch64_get_vec_double (cpu, vm, i));
6141     }
6142   else
6143     for (i = 0; i < (full ? 4 : 2); i++)
6144       aarch64_set_vec_float (cpu, vd, i,
6145                              aarch64_get_vec_float (cpu, vn, i)
6146                              * aarch64_get_vec_float (cpu, vm, i));
6147 }
6148
6149 static void
6150 do_vec_FADDP (sim_cpu *cpu)
6151 {
6152   /* instr [31]    = 0
6153      instr [30]    = half(0)/full(1)
6154      instr [29,23] = 10 1110 0
6155      instr [22]    = float(0)/double(1)
6156      instr [21]    = 1
6157      instr [20,16] = Vm
6158      instr [15,10] = 1101 01
6159      instr [9, 5]  = Vn
6160      instr [4, 0]  = Vd.  */
6161
6162   unsigned full = INSTR (30, 30);
6163   unsigned vm = INSTR (20, 16);
6164   unsigned vn = INSTR (9, 5);
6165   unsigned vd = INSTR (4, 0);
6166
6167   NYI_assert (29, 23, 0x5C);
6168   NYI_assert (21, 21, 1);
6169   NYI_assert (15, 10, 0x35);
6170
6171   if (INSTR (22, 22))
6172     {
6173       /* Extract values before adding them incase vd == vn/vm.  */
6174       double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6175       double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6176       double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6177       double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6178
6179       if (! full)
6180         HALT_UNALLOC;
6181
6182       aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6183       aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6184     }
6185   else
6186     {
6187       /* Extract values before adding them incase vd == vn/vm.  */
6188       float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6189       float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6190       float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6191       float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6192
6193       if (full)
6194         {
6195           float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6196           float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6197           float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6198           float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6199
6200           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6201           aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6202           aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6203           aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6204         }
6205       else
6206         {
6207           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6208           aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6209         }
6210     }
6211 }
6212
6213 static void
6214 do_vec_FSQRT (sim_cpu *cpu)
6215 {
6216   /* instr[31]    = 0
6217      instr[30]    = half(0)/full(1)
6218      instr[29,23] = 10 1110 1
6219      instr[22]    = single(0)/double(1)
6220      instr[21,10] = 10 0001 1111 10
6221      instr[9,5]   = Vsrc
6222      instr[4,0]   = Vdest.  */
6223
6224   unsigned vn = INSTR (9, 5);
6225   unsigned vd = INSTR (4, 0);
6226   unsigned full = INSTR (30, 30);
6227   int i;
6228
6229   NYI_assert (29, 23, 0x5D);
6230   NYI_assert (21, 10, 0x87E);
6231
6232   if (INSTR (22, 22))
6233     {
6234       if (! full)
6235         HALT_UNALLOC;
6236
6237       for (i = 0; i < 2; i++)
6238         aarch64_set_vec_double (cpu, vd, i,
6239                                 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6240     }
6241   else
6242     {
6243       for (i = 0; i < (full ? 4 : 2); i++)
6244         aarch64_set_vec_float (cpu, vd, i,
6245                                sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6246     }
6247 }
6248
6249 static void
6250 do_vec_FNEG (sim_cpu *cpu)
6251 {
6252   /* instr[31]    = 0
6253      instr[30]    = half (0)/full (1)
6254      instr[29,23] = 10 1110 1
6255      instr[22]    = single (0)/double (1)
6256      instr[21,10] = 10 0000 1111 10
6257      instr[9,5]   = Vsrc
6258      instr[4,0]   = Vdest.  */
6259
6260   unsigned vn = INSTR (9, 5);
6261   unsigned vd = INSTR (4, 0);
6262   unsigned full = INSTR (30, 30);
6263   int i;
6264
6265   NYI_assert (29, 23, 0x5D);
6266   NYI_assert (21, 10, 0x83E);
6267
6268   if (INSTR (22, 22))
6269     {
6270       if (! full)
6271         HALT_UNALLOC;
6272
6273       for (i = 0; i < 2; i++)
6274         aarch64_set_vec_double (cpu, vd, i,
6275                                 - aarch64_get_vec_double (cpu, vn, i));
6276     }
6277   else
6278     {
6279       for (i = 0; i < (full ? 4 : 2); i++)
6280         aarch64_set_vec_float (cpu, vd, i,
6281                                - aarch64_get_vec_float (cpu, vn, i));
6282     }
6283 }
6284
6285 static void
6286 do_vec_NOT (sim_cpu *cpu)
6287 {
6288   /* instr[31]    = 0
6289      instr[30]    = half (0)/full (1)
6290      instr[29,10] = 10 1110 0010 0000 0101 10
6291      instr[9,5]   = Vn
6292      instr[4.0]   = Vd.  */
6293
6294   unsigned vn = INSTR (9, 5);
6295   unsigned vd = INSTR (4, 0);
6296   unsigned i;
6297   int      full = INSTR (30, 30);
6298
6299   NYI_assert (29, 10, 0xB8816);
6300
6301   for (i = 0; i < (full ? 16 : 8); i++)
6302     aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6303 }
6304
6305 static unsigned int
6306 clz (uint64_t val, unsigned size)
6307 {
6308   uint64_t mask = 1;
6309   int      count;
6310
6311   mask <<= (size - 1);
6312   count = 0;
6313   do
6314     {
6315       if (val & mask)
6316         break;
6317       mask >>= 1;
6318       count ++;
6319     }
6320   while (mask);
6321
6322   return count;
6323 }
6324
6325 static void
6326 do_vec_CLZ (sim_cpu *cpu)
6327 {
6328   /* instr[31]    = 0
6329      instr[30]    = half (0)/full (1)
6330      instr[29,24] = 10 1110
6331      instr[23,22] = size
6332      instr[21,10] = 10 0000 0100 10
6333      instr[9,5]   = Vn
6334      instr[4.0]   = Vd.  */
6335
6336   unsigned vn = INSTR (9, 5);
6337   unsigned vd = INSTR (4, 0);
6338   unsigned i;
6339   int      full = INSTR (30,30);
6340
6341   NYI_assert (29, 24, 0x2E);
6342   NYI_assert (21, 10, 0x812);
6343
6344   switch (INSTR (23, 22))
6345     {
6346     case 0:
6347       for (i = 0; i < (full ? 16 : 8); i++)
6348         aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6349       break;
6350     case 1:
6351       for (i = 0; i < (full ? 8 : 4); i++)
6352         aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6353       break;
6354     case 2:
6355       for (i = 0; i < (full ? 4 : 2); i++)
6356         aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6357       break;
6358     case 3:
6359       if (! full)
6360         HALT_UNALLOC;
6361       aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6362       aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6363       break;
6364     }
6365 }
6366
6367 static void
6368 do_vec_MOV_element (sim_cpu *cpu)
6369 {
6370   /* instr[31,21] = 0110 1110 000
6371      instr[20,16] = size & dest index
6372      instr[15]    = 0
6373      instr[14,11] = source index
6374      instr[10]    = 1
6375      instr[9,5]   = Vs
6376      instr[4.0]   = Vd.  */
6377
6378   unsigned vs = INSTR (9, 5);
6379   unsigned vd = INSTR (4, 0);
6380   unsigned src_index;
6381   unsigned dst_index;
6382
6383   NYI_assert (31, 21, 0x370);
6384   NYI_assert (15, 15, 0);
6385   NYI_assert (10, 10, 1);
6386
6387   if (INSTR (16, 16))
6388     {
6389       /* Move a byte.  */
6390       src_index = INSTR (14, 11);
6391       dst_index = INSTR (20, 17);
6392       aarch64_set_vec_u8 (cpu, vd, dst_index,
6393                           aarch64_get_vec_u8 (cpu, vs, src_index));
6394     }
6395   else if (INSTR (17, 17))
6396     {
6397       /* Move 16-bits.  */
6398       NYI_assert (11, 11, 0);
6399       src_index = INSTR (14, 12);
6400       dst_index = INSTR (20, 18);
6401       aarch64_set_vec_u16 (cpu, vd, dst_index,
6402                            aarch64_get_vec_u16 (cpu, vs, src_index));
6403     }
6404   else if (INSTR (18, 18))
6405     {
6406       /* Move 32-bits.  */
6407       NYI_assert (12, 11, 0);
6408       src_index = INSTR (14, 13);
6409       dst_index = INSTR (20, 19);
6410       aarch64_set_vec_u32 (cpu, vd, dst_index,
6411                            aarch64_get_vec_u32 (cpu, vs, src_index));
6412     }
6413   else
6414     {
6415       NYI_assert (19, 19, 1);
6416       NYI_assert (13, 11, 0);
6417       src_index = INSTR (14, 14);
6418       dst_index = INSTR (20, 20);
6419       aarch64_set_vec_u64 (cpu, vd, dst_index,
6420                            aarch64_get_vec_u64 (cpu, vs, src_index));
6421     }
6422 }
6423
6424 static void
6425 dexAdvSIMD0 (sim_cpu *cpu)
6426 {
6427   /* instr [28,25] = 0 111.  */
6428   if (    INSTR (15, 10) == 0x07
6429       && (INSTR (9, 5) ==
6430           INSTR (20, 16)))
6431     {
6432       if (INSTR (31, 21) == 0x075
6433           || INSTR (31, 21) == 0x275)
6434         {
6435           do_vec_MOV_whole_vector (cpu);
6436           return;
6437         }
6438     }
6439
6440   if (INSTR (29, 19) == 0x1E0)
6441     {
6442       do_vec_MOV_immediate (cpu);
6443       return;
6444     }
6445
6446   if (INSTR (29, 19) == 0x5E0)
6447     {
6448       do_vec_MVNI (cpu);
6449       return;
6450     }
6451
6452   if (INSTR (29, 19) == 0x1C0
6453       || INSTR (29, 19) == 0x1C1)
6454     {
6455       if (INSTR (15, 10) == 0x03)
6456         {
6457           do_vec_DUP_scalar_into_vector (cpu);
6458           return;
6459         }
6460     }
6461
6462   switch (INSTR (29, 24))
6463     {
6464     case 0x0E: do_vec_op1 (cpu); return;
6465     case 0x0F: do_vec_op2 (cpu); return;
6466
6467     case 0x2f:
6468       switch (INSTR (15, 10))
6469         {
6470         case 0x01: do_vec_SSHR_USHR (cpu); return;
6471         case 0x10:
6472         case 0x12: do_vec_mls_indexed (cpu); return;
6473         case 0x29: do_vec_xtl (cpu); return;
6474         default:
6475           HALT_NYI;
6476         }
6477
6478     case 0x2E:
6479       if (INSTR (21, 21) == 1)
6480         {
6481           switch (INSTR (15, 10))
6482             {
6483             case 0x07:
6484               switch (INSTR (23, 22))
6485                 {
6486                 case 0: do_vec_EOR (cpu); return;
6487                 case 1: do_vec_BSL (cpu); return;
6488                 case 2:
6489                 case 3: do_vec_bit (cpu); return;
6490                 }
6491               break;
6492
6493             case 0x08: do_vec_sub_long (cpu); return;
6494             case 0x11: do_vec_USHL (cpu); return;
6495             case 0x12: do_vec_CLZ (cpu); return;
6496             case 0x16: do_vec_NOT (cpu); return;
6497             case 0x19: do_vec_max (cpu); return;
6498             case 0x1B: do_vec_min (cpu); return;
6499             case 0x21: do_vec_SUB (cpu); return;
6500             case 0x25: do_vec_MLS (cpu); return;
6501             case 0x31: do_vec_FminmaxNMP (cpu); return;
6502             case 0x35: do_vec_FADDP (cpu); return;
6503             case 0x37: do_vec_FMUL (cpu); return;
6504             case 0x3F: do_vec_FDIV (cpu); return;
6505
6506             case 0x3E:
6507               switch (INSTR (20, 16))
6508                 {
6509                 case 0x00: do_vec_FNEG (cpu); return;
6510                 case 0x01: do_vec_FSQRT (cpu); return;
6511                 default:   HALT_NYI;
6512                 }
6513
6514             case 0x0D:
6515             case 0x0F:
6516             case 0x22:
6517             case 0x23:
6518             case 0x26:
6519             case 0x2A:
6520             case 0x32:
6521             case 0x36:
6522             case 0x39:
6523             case 0x3A:
6524               do_vec_compare (cpu); return;
6525
6526             default:
6527               break;
6528             }
6529         }
6530
6531       if (INSTR (31, 21) == 0x370)
6532         {
6533           do_vec_MOV_element (cpu);
6534           return;
6535         }
6536
6537       switch (INSTR (21, 10))
6538         {
6539         case 0x82E: do_vec_neg (cpu); return;
6540         case 0x87E: do_vec_sqrt (cpu); return;
6541         default:
6542           if (INSTR (15, 10) == 0x30)
6543             {
6544               do_vec_mull (cpu);
6545               return;
6546             }
6547           break;
6548         }
6549       break;
6550
6551     default:
6552       break;
6553     }
6554
6555   HALT_NYI;
6556 }
6557
6558 /* 3 sources.  */
6559
6560 /* Float multiply add.  */
6561 static void
6562 fmadds (sim_cpu *cpu)
6563 {
6564   unsigned sa = INSTR (14, 10);
6565   unsigned sm = INSTR (20, 16);
6566   unsigned sn = INSTR ( 9,  5);
6567   unsigned sd = INSTR ( 4,  0);
6568
6569   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6570                         + aarch64_get_FP_float (cpu, sn)
6571                         * aarch64_get_FP_float (cpu, sm));
6572 }
6573
6574 /* Double multiply add.  */
6575 static void
6576 fmaddd (sim_cpu *cpu)
6577 {
6578   unsigned sa = INSTR (14, 10);
6579   unsigned sm = INSTR (20, 16);
6580   unsigned sn = INSTR ( 9,  5);
6581   unsigned sd = INSTR ( 4,  0);
6582
6583   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6584                          + aarch64_get_FP_double (cpu, sn)
6585                          * aarch64_get_FP_double (cpu, sm));
6586 }
6587
6588 /* Float multiply subtract.  */
6589 static void
6590 fmsubs (sim_cpu *cpu)
6591 {
6592   unsigned sa = INSTR (14, 10);
6593   unsigned sm = INSTR (20, 16);
6594   unsigned sn = INSTR ( 9,  5);
6595   unsigned sd = INSTR ( 4,  0);
6596
6597   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6598                         - aarch64_get_FP_float (cpu, sn)
6599                         * aarch64_get_FP_float (cpu, sm));
6600 }
6601
6602 /* Double multiply subtract.  */
6603 static void
6604 fmsubd (sim_cpu *cpu)
6605 {
6606   unsigned sa = INSTR (14, 10);
6607   unsigned sm = INSTR (20, 16);
6608   unsigned sn = INSTR ( 9,  5);
6609   unsigned sd = INSTR ( 4,  0);
6610
6611   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6612                          - aarch64_get_FP_double (cpu, sn)
6613                          * aarch64_get_FP_double (cpu, sm));
6614 }
6615
6616 /* Float negative multiply add.  */
6617 static void
6618 fnmadds (sim_cpu *cpu)
6619 {
6620   unsigned sa = INSTR (14, 10);
6621   unsigned sm = INSTR (20, 16);
6622   unsigned sn = INSTR ( 9,  5);
6623   unsigned sd = INSTR ( 4,  0);
6624
6625   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6626                         + (- aarch64_get_FP_float (cpu, sn))
6627                         * aarch64_get_FP_float (cpu, sm));
6628 }
6629
6630 /* Double negative multiply add.  */
6631 static void
6632 fnmaddd (sim_cpu *cpu)
6633 {
6634   unsigned sa = INSTR (14, 10);
6635   unsigned sm = INSTR (20, 16);
6636   unsigned sn = INSTR ( 9,  5);
6637   unsigned sd = INSTR ( 4,  0);
6638
6639   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6640                          + (- aarch64_get_FP_double (cpu, sn))
6641                          * aarch64_get_FP_double (cpu, sm));
6642 }
6643
6644 /* Float negative multiply subtract.  */
6645 static void
6646 fnmsubs (sim_cpu *cpu)
6647 {
6648   unsigned sa = INSTR (14, 10);
6649   unsigned sm = INSTR (20, 16);
6650   unsigned sn = INSTR ( 9,  5);
6651   unsigned sd = INSTR ( 4,  0);
6652
6653   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6654                         + aarch64_get_FP_float (cpu, sn)
6655                         * aarch64_get_FP_float (cpu, sm));
6656 }
6657
6658 /* Double negative multiply subtract.  */
6659 static void
6660 fnmsubd (sim_cpu *cpu)
6661 {
6662   unsigned sa = INSTR (14, 10);
6663   unsigned sm = INSTR (20, 16);
6664   unsigned sn = INSTR ( 9,  5);
6665   unsigned sd = INSTR ( 4,  0);
6666
6667   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6668                          + aarch64_get_FP_double (cpu, sn)
6669                          * aarch64_get_FP_double (cpu, sm));
6670 }
6671
6672 static void
6673 dexSimpleFPDataProc3Source (sim_cpu *cpu)
6674 {
6675   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
6676      instr[30]    = 0
6677      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
6678      instr[28,25] = 1111
6679      instr[24]    = 1
6680      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6681      instr[21]    ==> o1 : 0 ==> unnegated, 1 ==> negated
6682      instr[15]    ==> o2 : 0 ==> ADD, 1 ==> SUB  */
6683
6684   uint32_t M_S = (INSTR (31, 31) << 1)
6685     | INSTR (29, 29);
6686   /* dispatch on combined type:o1:o2.  */
6687   uint32_t dispatch = (INSTR (23, 21) << 1)
6688     | INSTR (15, 15);
6689
6690   if (M_S != 0)
6691     HALT_UNALLOC;
6692
6693   switch (dispatch)
6694     {
6695     case 0: fmadds (cpu); return;
6696     case 1: fmsubs (cpu); return;
6697     case 2: fnmadds (cpu); return;
6698     case 3: fnmsubs (cpu); return;
6699     case 4: fmaddd (cpu); return;
6700     case 5: fmsubd (cpu); return;
6701     case 6: fnmaddd (cpu); return;
6702     case 7: fnmsubd (cpu); return;
6703     default:
6704       /* type > 1 is currently unallocated.  */
6705       HALT_UNALLOC;
6706     }
6707 }
6708
6709 static void
6710 dexSimpleFPFixedConvert (sim_cpu *cpu)
6711 {
6712   HALT_NYI;
6713 }
6714
6715 static void
6716 dexSimpleFPCondCompare (sim_cpu *cpu)
6717 {
6718   /* instr [31,23] = 0001 1110 0
6719      instr [22]    = type
6720      instr [21]    = 1
6721      instr [20,16] = Rm
6722      instr [15,12] = condition
6723      instr [11,10] = 01
6724      instr [9,5]   = Rn
6725      instr [4]     = 0
6726      instr [3,0]   = nzcv  */
6727
6728   unsigned rm = INSTR (20, 16);
6729   unsigned rn = INSTR (9, 5);
6730
6731   NYI_assert (31, 23, 0x3C);
6732   NYI_assert (11, 10, 0x1);
6733   NYI_assert (4,  4,  0);
6734
6735   if (! testConditionCode (cpu, INSTR (15, 12)))
6736     {
6737       aarch64_set_CPSR (cpu, INSTR (3, 0));
6738       return;
6739     }
6740
6741   if (INSTR (22, 22))
6742     {
6743       /* Double precision.  */
6744       double val1 = aarch64_get_vec_double (cpu, rn, 0);
6745       double val2 = aarch64_get_vec_double (cpu, rm, 0);
6746
6747       /* FIXME: Check for NaNs.  */
6748       if (val1 == val2)
6749         aarch64_set_CPSR (cpu, (Z | C));
6750       else if (val1 < val2)
6751         aarch64_set_CPSR (cpu, N);
6752       else /* val1 > val2 */
6753         aarch64_set_CPSR (cpu, C);
6754     }
6755   else
6756     {
6757       /* Single precision.  */
6758       float val1 = aarch64_get_vec_float (cpu, rn, 0);
6759       float val2 = aarch64_get_vec_float (cpu, rm, 0);
6760
6761       /* FIXME: Check for NaNs.  */
6762       if (val1 == val2)
6763         aarch64_set_CPSR (cpu, (Z | C));
6764       else if (val1 < val2)
6765         aarch64_set_CPSR (cpu, N);
6766       else /* val1 > val2 */
6767         aarch64_set_CPSR (cpu, C);
6768     }
6769 }
6770
6771 /* 2 sources.  */
6772
6773 /* Float add.  */
6774 static void
6775 fadds (sim_cpu *cpu)
6776 {
6777   unsigned sm = INSTR (20, 16);
6778   unsigned sn = INSTR ( 9,  5);
6779   unsigned sd = INSTR ( 4,  0);
6780
6781   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6782                         + aarch64_get_FP_float (cpu, sm));
6783 }
6784
6785 /* Double add.  */
6786 static void
6787 faddd (sim_cpu *cpu)
6788 {
6789   unsigned sm = INSTR (20, 16);
6790   unsigned sn = INSTR ( 9,  5);
6791   unsigned sd = INSTR ( 4,  0);
6792
6793   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6794                          + aarch64_get_FP_double (cpu, sm));
6795 }
6796
6797 /* Float divide.  */
6798 static void
6799 fdivs (sim_cpu *cpu)
6800 {
6801   unsigned sm = INSTR (20, 16);
6802   unsigned sn = INSTR ( 9,  5);
6803   unsigned sd = INSTR ( 4,  0);
6804
6805   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6806                         / aarch64_get_FP_float (cpu, sm));
6807 }
6808
6809 /* Double divide.  */
6810 static void
6811 fdivd (sim_cpu *cpu)
6812 {
6813   unsigned sm = INSTR (20, 16);
6814   unsigned sn = INSTR ( 9,  5);
6815   unsigned sd = INSTR ( 4,  0);
6816
6817   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6818                          / aarch64_get_FP_double (cpu, sm));
6819 }
6820
6821 /* Float multiply.  */
6822 static void
6823 fmuls (sim_cpu *cpu)
6824 {
6825   unsigned sm = INSTR (20, 16);
6826   unsigned sn = INSTR ( 9,  5);
6827   unsigned sd = INSTR ( 4,  0);
6828
6829   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6830                         * aarch64_get_FP_float (cpu, sm));
6831 }
6832
6833 /* Double multiply.  */
6834 static void
6835 fmuld (sim_cpu *cpu)
6836 {
6837   unsigned sm = INSTR (20, 16);
6838   unsigned sn = INSTR ( 9,  5);
6839   unsigned sd = INSTR ( 4,  0);
6840
6841   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6842                          * aarch64_get_FP_double (cpu, sm));
6843 }
6844
6845 /* Float negate and multiply.  */
6846 static void
6847 fnmuls (sim_cpu *cpu)
6848 {
6849   unsigned sm = INSTR (20, 16);
6850   unsigned sn = INSTR ( 9,  5);
6851   unsigned sd = INSTR ( 4,  0);
6852
6853   aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
6854                                     * aarch64_get_FP_float (cpu, sm)));
6855 }
6856
6857 /* Double negate and multiply.  */
6858 static void
6859 fnmuld (sim_cpu *cpu)
6860 {
6861   unsigned sm = INSTR (20, 16);
6862   unsigned sn = INSTR ( 9,  5);
6863   unsigned sd = INSTR ( 4,  0);
6864
6865   aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
6866                                      * aarch64_get_FP_double (cpu, sm)));
6867 }
6868
6869 /* Float subtract.  */
6870 static void
6871 fsubs (sim_cpu *cpu)
6872 {
6873   unsigned sm = INSTR (20, 16);
6874   unsigned sn = INSTR ( 9,  5);
6875   unsigned sd = INSTR ( 4,  0);
6876
6877   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6878                         - aarch64_get_FP_float (cpu, sm));
6879 }
6880
6881 /* Double subtract.  */
6882 static void
6883 fsubd (sim_cpu *cpu)
6884 {
6885   unsigned sm = INSTR (20, 16);
6886   unsigned sn = INSTR ( 9,  5);
6887   unsigned sd = INSTR ( 4,  0);
6888
6889   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6890                          - aarch64_get_FP_double (cpu, sm));
6891 }
6892
6893 static void
6894 do_FMINNM (sim_cpu *cpu)
6895 {
6896   /* instr[31,23] = 0 0011 1100
6897      instr[22]    = float(0)/double(1)
6898      instr[21]    = 1
6899      instr[20,16] = Sm
6900      instr[15,10] = 01 1110
6901      instr[9,5]   = Sn
6902      instr[4,0]   = Cpu  */
6903
6904   unsigned sm = INSTR (20, 16);
6905   unsigned sn = INSTR ( 9,  5);
6906   unsigned sd = INSTR ( 4,  0);
6907
6908   NYI_assert (31, 23, 0x03C);
6909   NYI_assert (15, 10, 0x1E);
6910
6911   if (INSTR (22, 22))
6912     aarch64_set_FP_double (cpu, sd,
6913                            dminnm (aarch64_get_FP_double (cpu, sn),
6914                                    aarch64_get_FP_double (cpu, sm)));
6915   else
6916     aarch64_set_FP_float (cpu, sd,
6917                           fminnm (aarch64_get_FP_float (cpu, sn),
6918                                   aarch64_get_FP_float (cpu, sm)));
6919 }
6920
6921 static void
6922 do_FMAXNM (sim_cpu *cpu)
6923 {
6924   /* instr[31,23] = 0 0011 1100
6925      instr[22]    = float(0)/double(1)
6926      instr[21]    = 1
6927      instr[20,16] = Sm
6928      instr[15,10] = 01 1010
6929      instr[9,5]   = Sn
6930      instr[4,0]   = Cpu  */
6931
6932   unsigned sm = INSTR (20, 16);
6933   unsigned sn = INSTR ( 9,  5);
6934   unsigned sd = INSTR ( 4,  0);
6935
6936   NYI_assert (31, 23, 0x03C);
6937   NYI_assert (15, 10, 0x1A);
6938
6939   if (INSTR (22, 22))
6940     aarch64_set_FP_double (cpu, sd,
6941                            dmaxnm (aarch64_get_FP_double (cpu, sn),
6942                                    aarch64_get_FP_double (cpu, sm)));
6943   else
6944     aarch64_set_FP_float (cpu, sd,
6945                           fmaxnm (aarch64_get_FP_float (cpu, sn),
6946                                   aarch64_get_FP_float (cpu, sm)));
6947 }
6948
6949 static void
6950 dexSimpleFPDataProc2Source (sim_cpu *cpu)
6951 {
6952   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
6953      instr[30]    = 0
6954      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
6955      instr[28,25] = 1111
6956      instr[24]    = 0
6957      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6958      instr[21]    = 1
6959      instr[20,16] = Vm
6960      instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
6961                                0010 ==> FADD, 0011 ==> FSUB,
6962                                0100 ==> FMAX, 0101 ==> FMIN
6963                                0110 ==> FMAXNM, 0111 ==> FMINNM
6964                                1000 ==> FNMUL, ow ==> UNALLOC
6965      instr[11,10] = 10
6966      instr[9,5]   = Vn
6967      instr[4,0]   = Vd  */
6968
6969   uint32_t M_S = (INSTR (31, 31) << 1)
6970     | INSTR (29, 29);
6971   uint32_t type = INSTR (23, 22);
6972   /* Dispatch on opcode.  */
6973   uint32_t dispatch = INSTR (15, 12);
6974
6975   if (type > 1)
6976     HALT_UNALLOC;
6977
6978   if (M_S != 0)
6979     HALT_UNALLOC;
6980
6981   if (type)
6982     switch (dispatch)
6983       {
6984       case 0: fmuld (cpu); return;
6985       case 1: fdivd (cpu); return;
6986       case 2: faddd (cpu); return;
6987       case 3: fsubd (cpu); return;
6988       case 6: do_FMAXNM (cpu); return;
6989       case 7: do_FMINNM (cpu); return;
6990       case 8: fnmuld (cpu); return;
6991
6992         /* Have not yet implemented fmax and fmin.  */
6993       case 4:
6994       case 5:
6995         HALT_NYI;
6996
6997       default:
6998         HALT_UNALLOC;
6999       }
7000   else /* type == 0 => floats.  */
7001     switch (dispatch)
7002       {
7003       case 0: fmuls (cpu); return;
7004       case 1: fdivs (cpu); return;
7005       case 2: fadds (cpu); return;
7006       case 3: fsubs (cpu); return;
7007       case 6: do_FMAXNM (cpu); return;
7008       case 7: do_FMINNM (cpu); return;
7009       case 8: fnmuls (cpu); return;
7010
7011       case 4:
7012       case 5:
7013         HALT_NYI;
7014
7015       default:
7016         HALT_UNALLOC;
7017       }
7018 }
7019
7020 static void
7021 dexSimpleFPCondSelect (sim_cpu *cpu)
7022 {
7023   /* FCSEL
7024      instr[31,23] = 0 0011 1100
7025      instr[22]    = 0=>single 1=>double
7026      instr[21]    = 1
7027      instr[20,16] = Sm
7028      instr[15,12] = cond
7029      instr[11,10] = 11
7030      instr[9,5]   = Sn
7031      instr[4,0]   = Cpu  */
7032   unsigned sm = INSTR (20, 16);
7033   unsigned sn = INSTR ( 9, 5);
7034   unsigned sd = INSTR ( 4, 0);
7035   uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7036
7037   NYI_assert (31, 23, 0x03C);
7038   NYI_assert (11, 10, 0x3);
7039
7040   if (INSTR (22, 22))
7041     aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7042   else
7043     aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7044 }
7045
7046 /* Store 32 bit unscaled signed 9 bit.  */
7047 static void
7048 fsturs (sim_cpu *cpu, int32_t offset)
7049 {
7050   unsigned int rn = INSTR (9, 5);
7051   unsigned int st = INSTR (4, 0);
7052
7053   aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7054                        aarch64_get_vec_u32 (cpu, rn, 0));
7055 }
7056
7057 /* Store 64 bit unscaled signed 9 bit.  */
7058 static void
7059 fsturd (sim_cpu *cpu, int32_t offset)
7060 {
7061   unsigned int rn = INSTR (9, 5);
7062   unsigned int st = INSTR (4, 0);
7063
7064   aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7065                        aarch64_get_vec_u64 (cpu, rn, 0));
7066 }
7067
7068 /* Store 128 bit unscaled signed 9 bit.  */
7069 static void
7070 fsturq (sim_cpu *cpu, int32_t offset)
7071 {
7072   unsigned int rn = INSTR (9, 5);
7073   unsigned int st = INSTR (4, 0);
7074   FRegister a;
7075
7076   aarch64_get_FP_long_double (cpu, rn, & a);
7077   aarch64_set_mem_long_double (cpu,
7078                                aarch64_get_reg_u64 (cpu, st, 1)
7079                                + offset, a);
7080 }
7081
7082 /* TODO FP move register.  */
7083
7084 /* 32 bit fp to fp move register.  */
7085 static void
7086 ffmovs (sim_cpu *cpu)
7087 {
7088   unsigned int rn = INSTR (9, 5);
7089   unsigned int st = INSTR (4, 0);
7090
7091   aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7092 }
7093
7094 /* 64 bit fp to fp move register.  */
7095 static void
7096 ffmovd (sim_cpu *cpu)
7097 {
7098   unsigned int rn = INSTR (9, 5);
7099   unsigned int st = INSTR (4, 0);
7100
7101   aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7102 }
7103
7104 /* 32 bit GReg to Vec move register.  */
7105 static void
7106 fgmovs (sim_cpu *cpu)
7107 {
7108   unsigned int rn = INSTR (9, 5);
7109   unsigned int st = INSTR (4, 0);
7110
7111   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7112 }
7113
7114 /* 64 bit g to fp move register.  */
7115 static void
7116 fgmovd (sim_cpu *cpu)
7117 {
7118   unsigned int rn = INSTR (9, 5);
7119   unsigned int st = INSTR (4, 0);
7120
7121   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7122 }
7123
7124 /* 32 bit fp to g move register.  */
7125 static void
7126 gfmovs (sim_cpu *cpu)
7127 {
7128   unsigned int rn = INSTR (9, 5);
7129   unsigned int st = INSTR (4, 0);
7130
7131   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7132 }
7133
7134 /* 64 bit fp to g move register.  */
7135 static void
7136 gfmovd (sim_cpu *cpu)
7137 {
7138   unsigned int rn = INSTR (9, 5);
7139   unsigned int st = INSTR (4, 0);
7140
7141   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7142 }
7143
7144 /* FP move immediate
7145
7146    These install an immediate 8 bit value in the target register
7147    where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7148    bit exponent.  */
7149
7150 static void
7151 fmovs (sim_cpu *cpu)
7152 {
7153   unsigned int sd = INSTR (4, 0);
7154   uint32_t imm = INSTR (20, 13);
7155   float f = fp_immediate_for_encoding_32 (imm);
7156
7157   aarch64_set_FP_float (cpu, sd, f);
7158 }
7159
7160 static void
7161 fmovd (sim_cpu *cpu)
7162 {
7163   unsigned int sd = INSTR (4, 0);
7164   uint32_t imm = INSTR (20, 13);
7165   double d = fp_immediate_for_encoding_64 (imm);
7166
7167   aarch64_set_FP_double (cpu, sd, d);
7168 }
7169
7170 static void
7171 dexSimpleFPImmediate (sim_cpu *cpu)
7172 {
7173   /* instr[31,23] == 00111100
7174      instr[22]    == type : single(0)/double(1)
7175      instr[21]    == 1
7176      instr[20,13] == imm8
7177      instr[12,10] == 100
7178      instr[9,5]   == imm5 : 00000 ==> PK, ow ==> UNALLOC
7179      instr[4,0]   == Rd  */
7180   uint32_t imm5 = INSTR (9, 5);
7181
7182   NYI_assert (31, 23, 0x3C);
7183
7184   if (imm5 != 0)
7185     HALT_UNALLOC;
7186
7187   if (INSTR (22, 22))
7188     fmovd (cpu);
7189   else
7190     fmovs (cpu);
7191 }
7192
7193 /* TODO specific decode and execute for group Load Store.  */
7194
7195 /* TODO FP load/store single register (unscaled offset).  */
7196
7197 /* TODO load 8 bit unscaled signed 9 bit.  */
7198 /* TODO load 16 bit unscaled signed 9 bit.  */
7199
7200 /* Load 32 bit unscaled signed 9 bit.  */
7201 static void
7202 fldurs (sim_cpu *cpu, int32_t offset)
7203 {
7204   unsigned int rn = INSTR (9, 5);
7205   unsigned int st = INSTR (4, 0);
7206
7207   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7208                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7209 }
7210
7211 /* Load 64 bit unscaled signed 9 bit.  */
7212 static void
7213 fldurd (sim_cpu *cpu, int32_t offset)
7214 {
7215   unsigned int rn = INSTR (9, 5);
7216   unsigned int st = INSTR (4, 0);
7217
7218   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7219                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7220 }
7221
7222 /* Load 128 bit unscaled signed 9 bit.  */
7223 static void
7224 fldurq (sim_cpu *cpu, int32_t offset)
7225 {
7226   unsigned int rn = INSTR (9, 5);
7227   unsigned int st = INSTR (4, 0);
7228   FRegister a;
7229   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7230
7231   aarch64_get_mem_long_double (cpu, addr, & a);
7232   aarch64_set_FP_long_double (cpu, st, a);
7233 }
7234
7235 /* TODO store 8 bit unscaled signed 9 bit.  */
7236 /* TODO store 16 bit unscaled signed 9 bit.  */
7237
7238
7239 /* 1 source.  */
7240
7241 /* Float absolute value.  */
7242 static void
7243 fabss (sim_cpu *cpu)
7244 {
7245   unsigned sn = INSTR (9, 5);
7246   unsigned sd = INSTR (4, 0);
7247   float value = aarch64_get_FP_float (cpu, sn);
7248
7249   aarch64_set_FP_float (cpu, sd, fabsf (value));
7250 }
7251
7252 /* Double absolute value.  */
7253 static void
7254 fabcpu (sim_cpu *cpu)
7255 {
7256   unsigned sn = INSTR (9, 5);
7257   unsigned sd = INSTR (4, 0);
7258   double value = aarch64_get_FP_double (cpu, sn);
7259
7260   aarch64_set_FP_double (cpu, sd, fabs (value));
7261 }
7262
7263 /* Float negative value.  */
7264 static void
7265 fnegs (sim_cpu *cpu)
7266 {
7267   unsigned sn = INSTR (9, 5);
7268   unsigned sd = INSTR (4, 0);
7269
7270   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7271 }
7272
7273 /* Double negative value.  */
7274 static void
7275 fnegd (sim_cpu *cpu)
7276 {
7277   unsigned sn = INSTR (9, 5);
7278   unsigned sd = INSTR (4, 0);
7279
7280   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7281 }
7282
7283 /* Float square root.  */
7284 static void
7285 fsqrts (sim_cpu *cpu)
7286 {
7287   unsigned sn = INSTR (9, 5);
7288   unsigned sd = INSTR (4, 0);
7289
7290   aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn)));
7291 }
7292
7293 /* Double square root.  */
7294 static void
7295 fsqrtd (sim_cpu *cpu)
7296 {
7297   unsigned sn = INSTR (9, 5);
7298   unsigned sd = INSTR (4, 0);
7299
7300   aarch64_set_FP_double (cpu, sd,
7301                          sqrt (aarch64_get_FP_double (cpu, sn)));
7302 }
7303
7304 /* Convert double to float.  */
7305 static void
7306 fcvtds (sim_cpu *cpu)
7307 {
7308   unsigned sn = INSTR (9, 5);
7309   unsigned sd = INSTR (4, 0);
7310
7311   aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7312 }
7313
7314 /* Convert float to double.  */
7315 static void
7316 fcvtcpu (sim_cpu *cpu)
7317 {
7318   unsigned sn = INSTR (9, 5);
7319   unsigned sd = INSTR (4, 0);
7320
7321   aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7322 }
7323
7324 static void
7325 do_FRINT (sim_cpu *cpu)
7326 {
7327   /* instr[31,23] = 0001 1110 0
7328      instr[22]    = single(0)/double(1)
7329      instr[21,18] = 1001
7330      instr[17,15] = rounding mode
7331      instr[14,10] = 10000
7332      instr[9,5]   = source
7333      instr[4,0]   = dest  */
7334
7335   float val;
7336   unsigned rs = INSTR (9, 5);
7337   unsigned rd = INSTR (4, 0);
7338   unsigned int rmode = INSTR (17, 15);
7339
7340   NYI_assert (31, 23, 0x03C);
7341   NYI_assert (21, 18, 0x9);
7342   NYI_assert (14, 10, 0x10);
7343
7344   if (rmode == 6 || rmode == 7)
7345     /* FIXME: Add support for rmode == 6 exactness check.  */
7346     rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7347
7348   if (INSTR (22, 22))
7349     {
7350       double val = aarch64_get_FP_double (cpu, rs);
7351
7352       switch (rmode)
7353         {
7354         case 0: /* mode N: nearest or even.  */
7355           {
7356             double rval = round (val);
7357
7358             if (val - rval == 0.5)
7359               {
7360                 if (((rval / 2.0) * 2.0) != rval)
7361                   rval += 1.0;
7362               }
7363
7364             aarch64_set_FP_double (cpu, rd, round (val));
7365             return;
7366           }
7367
7368         case 1: /* mode P: towards +inf.  */
7369           if (val < 0.0)
7370             aarch64_set_FP_double (cpu, rd, trunc (val));
7371           else
7372             aarch64_set_FP_double (cpu, rd, round (val));
7373           return;
7374
7375         case 2: /* mode M: towards -inf.  */
7376           if (val < 0.0)
7377             aarch64_set_FP_double (cpu, rd, round (val));
7378           else
7379             aarch64_set_FP_double (cpu, rd, trunc (val));
7380           return;
7381
7382         case 3: /* mode Z: towards 0.  */
7383           aarch64_set_FP_double (cpu, rd, trunc (val));
7384           return;
7385
7386         case 4: /* mode A: away from 0.  */
7387           aarch64_set_FP_double (cpu, rd, round (val));
7388           return;
7389
7390         case 6: /* mode X: use FPCR with exactness check.  */
7391         case 7: /* mode I: use FPCR mode.  */
7392           HALT_NYI;
7393
7394         default:
7395           HALT_UNALLOC;
7396         }
7397     }
7398
7399   val = aarch64_get_FP_float (cpu, rs);
7400
7401   switch (rmode)
7402     {
7403     case 0: /* mode N: nearest or even.  */
7404       {
7405         float rval = roundf (val);
7406
7407         if (val - rval == 0.5)
7408           {
7409             if (((rval / 2.0) * 2.0) != rval)
7410               rval += 1.0;
7411           }
7412
7413         aarch64_set_FP_float (cpu, rd, rval);
7414         return;
7415       }
7416
7417     case 1: /* mode P: towards +inf.  */
7418       if (val < 0.0)
7419         aarch64_set_FP_float (cpu, rd, truncf (val));
7420       else
7421         aarch64_set_FP_float (cpu, rd, roundf (val));
7422       return;
7423
7424     case 2: /* mode M: towards -inf.  */
7425       if (val < 0.0)
7426         aarch64_set_FP_float (cpu, rd, truncf (val));
7427       else
7428         aarch64_set_FP_float (cpu, rd, roundf (val));
7429       return;
7430
7431     case 3: /* mode Z: towards 0.  */
7432       aarch64_set_FP_float (cpu, rd, truncf (val));
7433       return;
7434
7435     case 4: /* mode A: away from 0.  */
7436       aarch64_set_FP_float (cpu, rd, roundf (val));
7437       return;
7438
7439     case 6: /* mode X: use FPCR with exactness check.  */
7440     case 7: /* mode I: use FPCR mode.  */
7441       HALT_NYI;
7442
7443     default:
7444       HALT_UNALLOC;
7445     }
7446 }
7447
7448 /* Convert half to float.  */
7449 static void
7450 do_FCVT_half_to_single (sim_cpu *cpu)
7451 {
7452   unsigned rn = INSTR (9, 5);
7453   unsigned rd = INSTR (4, 0);
7454
7455   NYI_assert (31, 10, 0x7B890);
7456
7457   aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half  (cpu, rn));
7458 }
7459
7460 /* Convert half to float.  */
7461 static void
7462 do_FCVT_half_to_double (sim_cpu *cpu)
7463 {
7464   unsigned rn = INSTR (9, 5);
7465   unsigned rd = INSTR (4, 0);
7466
7467   NYI_assert (31, 10, 0x7B8B0);
7468
7469   aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half  (cpu, rn));
7470 }
7471
7472 static void
7473 do_FCVT_single_to_half (sim_cpu *cpu)
7474 {
7475   unsigned rn = INSTR (9, 5);
7476   unsigned rd = INSTR (4, 0);
7477
7478   NYI_assert (31, 10, 0x788F0);
7479
7480   aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float  (cpu, rn));
7481 }
7482
7483 /* Convert half to float.  */
7484 static void
7485 do_FCVT_double_to_half (sim_cpu *cpu)
7486 {
7487   unsigned rn = INSTR (9, 5);
7488   unsigned rd = INSTR (4, 0);
7489
7490   NYI_assert (31, 10, 0x798F0);
7491
7492   aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double  (cpu, rn));
7493 }
7494
7495 static void
7496 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7497 {
7498   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7499      instr[30]    = 0
7500      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7501      instr[28,25] = 1111
7502      instr[24]    = 0
7503      instr[23,22] ==> type : 00 ==> source is single,
7504                              01 ==> source is double
7505                              10 ==> UNALLOC
7506                              11 ==> UNALLOC or source is half
7507      instr[21]    = 1
7508      instr[20,15] ==> opcode : with type 00 or 01
7509                                000000 ==> FMOV, 000001 ==> FABS,
7510                                000010 ==> FNEG, 000011 ==> FSQRT,
7511                                000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7512                                000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7513                                001000 ==> FRINTN, 001001 ==> FRINTP,
7514                                001010 ==> FRINTM, 001011 ==> FRINTZ,
7515                                001100 ==> FRINTA, 001101 ==> UNALLOC
7516                                001110 ==> FRINTX, 001111 ==> FRINTI
7517                                with type 11
7518                                000100 ==> FCVT (half-to-single)
7519                                000101 ==> FCVT (half-to-double)
7520                                instr[14,10] = 10000.  */
7521
7522   uint32_t M_S = (INSTR (31, 31) << 1)
7523     | INSTR (29, 29);
7524   uint32_t type   = INSTR (23, 22);
7525   uint32_t opcode = INSTR (20, 15);
7526
7527   if (M_S != 0)
7528     HALT_UNALLOC;
7529
7530   if (type == 3)
7531     {
7532       if (opcode == 4)
7533         do_FCVT_half_to_single (cpu);
7534       else if (opcode == 5)
7535         do_FCVT_half_to_double (cpu);
7536       else
7537         HALT_UNALLOC;
7538       return;
7539     }
7540
7541   if (type == 2)
7542     HALT_UNALLOC;
7543
7544   switch (opcode)
7545     {
7546     case 0:
7547       if (type)
7548         ffmovd (cpu);
7549       else
7550         ffmovs (cpu);
7551       return;
7552
7553     case 1:
7554       if (type)
7555         fabcpu (cpu);
7556       else
7557         fabss (cpu);
7558       return;
7559
7560     case 2:
7561       if (type)
7562         fnegd (cpu);
7563       else
7564         fnegs (cpu);
7565       return;
7566
7567     case 3:
7568       if (type)
7569         fsqrtd (cpu);
7570       else
7571         fsqrts (cpu);
7572       return;
7573
7574     case 4:
7575       if (type)
7576         fcvtds (cpu);
7577       else
7578         HALT_UNALLOC;
7579       return;
7580
7581     case 5:
7582       if (type)
7583         HALT_UNALLOC;
7584       fcvtcpu (cpu);
7585       return;
7586
7587     case 8:             /* FRINTN etc.  */
7588     case 9:
7589     case 10:
7590     case 11:
7591     case 12:
7592     case 14:
7593     case 15:
7594        do_FRINT (cpu);
7595        return;
7596
7597     case 7:
7598       if (INSTR (22, 22))
7599         do_FCVT_double_to_half (cpu);
7600       else
7601         do_FCVT_single_to_half (cpu);
7602       return;
7603
7604     case 13:
7605       HALT_NYI;
7606
7607     default:
7608       HALT_UNALLOC;
7609     }
7610 }
7611
7612 /* 32 bit signed int to float.  */
7613 static void
7614 scvtf32 (sim_cpu *cpu)
7615 {
7616   unsigned rn = INSTR (9, 5);
7617   unsigned sd = INSTR (4, 0);
7618
7619   aarch64_set_FP_float
7620     (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7621 }
7622
7623 /* signed int to float.  */
7624 static void
7625 scvtf (sim_cpu *cpu)
7626 {
7627   unsigned rn = INSTR (9, 5);
7628   unsigned sd = INSTR (4, 0);
7629
7630   aarch64_set_FP_float
7631     (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7632 }
7633
7634 /* 32 bit signed int to double.  */
7635 static void
7636 scvtd32 (sim_cpu *cpu)
7637 {
7638   unsigned rn = INSTR (9, 5);
7639   unsigned sd = INSTR (4, 0);
7640
7641   aarch64_set_FP_double
7642     (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7643 }
7644
7645 /* signed int to double.  */
7646 static void
7647 scvtd (sim_cpu *cpu)
7648 {
7649   unsigned rn = INSTR (9, 5);
7650   unsigned sd = INSTR (4, 0);
7651
7652   aarch64_set_FP_double
7653     (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7654 }
7655
7656 static const float  FLOAT_INT_MAX   = (float)  INT_MAX;
7657 static const float  FLOAT_INT_MIN   = (float)  INT_MIN;
7658 static const double DOUBLE_INT_MAX  = (double) INT_MAX;
7659 static const double DOUBLE_INT_MIN  = (double) INT_MIN;
7660 static const float  FLOAT_LONG_MAX  = (float)  LONG_MAX;
7661 static const float  FLOAT_LONG_MIN  = (float)  LONG_MIN;
7662 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
7663 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
7664
7665 /* Check for FP exception conditions:
7666      NaN raises IO
7667      Infinity raises IO
7668      Out of Range raises IO and IX and saturates value
7669      Denormal raises ID and IX and sets to zero.  */
7670 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE)        \
7671   do                                                    \
7672     {                                                   \
7673       switch (fpclassify (F))                           \
7674         {                                               \
7675         case FP_INFINITE:                               \
7676         case FP_NAN:                                    \
7677           aarch64_set_FPSR (cpu, IO);                   \
7678           if (signbit (F))                              \
7679             VALUE = ITYPE##_MAX;                        \
7680           else                                          \
7681             VALUE = ITYPE##_MIN;                        \
7682           break;                                        \
7683                                                         \
7684         case FP_NORMAL:                                 \
7685           if (F >= FTYPE##_##ITYPE##_MAX)               \
7686             {                                           \
7687               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
7688               VALUE = ITYPE##_MAX;                      \
7689             }                                           \
7690           else if (F <= FTYPE##_##ITYPE##_MIN)          \
7691             {                                           \
7692               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
7693               VALUE = ITYPE##_MIN;                      \
7694             }                                           \
7695           break;                                        \
7696                                                         \
7697         case FP_SUBNORMAL:                              \
7698           aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID);   \
7699           VALUE = 0;                                    \
7700           break;                                        \
7701                                                         \
7702         default:                                        \
7703         case FP_ZERO:                                   \
7704           VALUE = 0;                                    \
7705           break;                                        \
7706         }                                               \
7707     }                                                   \
7708   while (0)
7709
7710 /* 32 bit convert float to signed int truncate towards zero.  */
7711 static void
7712 fcvtszs32 (sim_cpu *cpu)
7713 {
7714   unsigned sn = INSTR (9, 5);
7715   unsigned rd = INSTR (4, 0);
7716   /* TODO : check that this rounds toward zero.  */
7717   float   f = aarch64_get_FP_float (cpu, sn);
7718   int32_t value = (int32_t) f;
7719
7720   RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7721
7722   /* Avoid sign extension to 64 bit.  */
7723   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7724 }
7725
7726 /* 64 bit convert float to signed int truncate towards zero.  */
7727 static void
7728 fcvtszs (sim_cpu *cpu)
7729 {
7730   unsigned sn = INSTR (9, 5);
7731   unsigned rd = INSTR (4, 0);
7732   float f = aarch64_get_FP_float (cpu, sn);
7733   int64_t value = (int64_t) f;
7734
7735   RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7736
7737   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7738 }
7739
7740 /* 32 bit convert double to signed int truncate towards zero.  */
7741 static void
7742 fcvtszd32 (sim_cpu *cpu)
7743 {
7744   unsigned sn = INSTR (9, 5);
7745   unsigned rd = INSTR (4, 0);
7746   /* TODO : check that this rounds toward zero.  */
7747   double   d = aarch64_get_FP_double (cpu, sn);
7748   int32_t  value = (int32_t) d;
7749
7750   RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7751
7752   /* Avoid sign extension to 64 bit.  */
7753   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7754 }
7755
7756 /* 64 bit convert double to signed int truncate towards zero.  */
7757 static void
7758 fcvtszd (sim_cpu *cpu)
7759 {
7760   unsigned sn = INSTR (9, 5);
7761   unsigned rd = INSTR (4, 0);
7762   /* TODO : check that this rounds toward zero.  */
7763   double  d = aarch64_get_FP_double (cpu, sn);
7764   int64_t value;
7765
7766   value = (int64_t) d;
7767
7768   RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7769
7770   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7771 }
7772
7773 static void
7774 do_fcvtzu (sim_cpu *cpu)
7775 {
7776   /* instr[31]    = size: 32-bit (0), 64-bit (1)
7777      instr[30,23] = 00111100
7778      instr[22]    = type: single (0)/ double (1)
7779      instr[21]    = enable (0)/disable(1) precision
7780      instr[20,16] = 11001
7781      instr[15,10] = precision
7782      instr[9,5]   = Rs
7783      instr[4,0]   = Rd.  */
7784
7785   unsigned rs = INSTR (9, 5);
7786   unsigned rd = INSTR (4, 0);
7787
7788   NYI_assert (30, 23, 0x3C);
7789   NYI_assert (20, 16, 0x19);
7790
7791   if (INSTR (21, 21) != 1)
7792     /* Convert to fixed point.  */
7793     HALT_NYI;
7794
7795   if (INSTR (31, 31))
7796     {
7797       /* Convert to unsigned 64-bit integer.  */
7798       if (INSTR (22, 22))
7799         {
7800           double  d = aarch64_get_FP_double (cpu, rs);
7801           uint64_t value = (uint64_t) d;
7802
7803           /* Do not raise an exception if we have reached ULONG_MAX.  */
7804           if (value != (1UL << 63))
7805             RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7806
7807           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7808         }
7809       else
7810         {
7811           float  f = aarch64_get_FP_float (cpu, rs);
7812           uint64_t value = (uint64_t) f;
7813
7814           /* Do not raise an exception if we have reached ULONG_MAX.  */
7815           if (value != (1UL << 63))
7816             RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7817
7818           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7819         }
7820     }
7821   else
7822     {
7823       uint32_t value;
7824
7825       /* Convert to unsigned 32-bit integer.  */
7826       if (INSTR (22, 22))
7827         {
7828           double  d = aarch64_get_FP_double (cpu, rs);
7829
7830           value = (uint32_t) d;
7831           /* Do not raise an exception if we have reached UINT_MAX.  */
7832           if (value != (1UL << 31))
7833             RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7834         }
7835       else
7836         {
7837           float  f = aarch64_get_FP_float (cpu, rs);
7838
7839           value = (uint32_t) f;
7840           /* Do not raise an exception if we have reached UINT_MAX.  */
7841           if (value != (1UL << 31))
7842             RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7843         }
7844
7845       aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7846     }
7847 }
7848
7849 static void
7850 do_UCVTF (sim_cpu *cpu)
7851 {
7852   /* instr[31]    = size: 32-bit (0), 64-bit (1)
7853      instr[30,23] = 001 1110 0
7854      instr[22]    = type: single (0)/ double (1)
7855      instr[21]    = enable (0)/disable(1) precision
7856      instr[20,16] = 0 0011
7857      instr[15,10] = precision
7858      instr[9,5]   = Rs
7859      instr[4,0]   = Rd.  */
7860
7861   unsigned rs = INSTR (9, 5);
7862   unsigned rd = INSTR (4, 0);
7863
7864   NYI_assert (30, 23, 0x3C);
7865   NYI_assert (20, 16, 0x03);
7866
7867   if (INSTR (21, 21) != 1)
7868     HALT_NYI;
7869
7870   /* FIXME: Add exception raising.  */
7871   if (INSTR (31, 31))
7872     {
7873       uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
7874
7875       if (INSTR (22, 22))
7876         aarch64_set_FP_double (cpu, rd, (double) value);
7877       else
7878         aarch64_set_FP_float (cpu, rd, (float) value);
7879     }
7880   else
7881     {
7882       uint32_t value =  aarch64_get_reg_u32 (cpu, rs, NO_SP);
7883
7884       if (INSTR (22, 22))
7885         aarch64_set_FP_double (cpu, rd, (double) value);
7886       else
7887         aarch64_set_FP_float (cpu, rd, (float) value);
7888     }
7889 }
7890
7891 static void
7892 float_vector_move (sim_cpu *cpu)
7893 {
7894   /* instr[31,17] == 100 1111 0101 0111
7895      instr[16]    ==> direction 0=> to GR, 1=> from GR
7896      instr[15,10] => ???
7897      instr[9,5]   ==> source
7898      instr[4,0]   ==> dest.  */
7899
7900   unsigned rn = INSTR (9, 5);
7901   unsigned rd = INSTR (4, 0);
7902
7903   NYI_assert (31, 17, 0x4F57);
7904
7905   if (INSTR (15, 10) != 0)
7906     HALT_UNALLOC;
7907
7908   if (INSTR (16, 16))
7909     aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7910   else
7911     aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
7912 }
7913
7914 static void
7915 dexSimpleFPIntegerConvert (sim_cpu *cpu)
7916 {
7917   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
7918      instr[30     = 0
7919      instr[29]    = S :  0 ==> OK, 1 ==> UNALLOC
7920      instr[28,25] = 1111
7921      instr[24]    = 0
7922      instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
7923      instr[21]    = 1
7924      instr[20,19] = rmode
7925      instr[18,16] = opcode
7926      instr[15,10] = 10 0000  */
7927
7928   uint32_t rmode_opcode;
7929   uint32_t size_type;
7930   uint32_t type;
7931   uint32_t size;
7932   uint32_t S;
7933
7934   if (INSTR (31, 17) == 0x4F57)
7935     {
7936       float_vector_move (cpu);
7937       return;
7938     }
7939
7940   size = INSTR (31, 31);
7941   S = INSTR (29, 29);
7942   if (S != 0)
7943     HALT_UNALLOC;
7944
7945   type = INSTR (23, 22);
7946   if (type > 1)
7947     HALT_UNALLOC;
7948
7949   rmode_opcode = INSTR (20, 16);
7950   size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d.  */
7951
7952   switch (rmode_opcode)
7953     {
7954     case 2:                     /* SCVTF.  */
7955       switch (size_type)
7956         {
7957         case 0: scvtf32 (cpu); return;
7958         case 1: scvtd32 (cpu); return;
7959         case 2: scvtf (cpu); return;
7960         case 3: scvtd (cpu); return;
7961         }
7962
7963     case 6:                     /* FMOV GR, Vec.  */
7964       switch (size_type)
7965         {
7966         case 0:  gfmovs (cpu); return;
7967         case 3:  gfmovd (cpu); return;
7968         default: HALT_UNALLOC;
7969         }
7970
7971     case 7:                     /* FMOV vec, GR.  */
7972       switch (size_type)
7973         {
7974         case 0:  fgmovs (cpu); return;
7975         case 3:  fgmovd (cpu); return;
7976         default: HALT_UNALLOC;
7977         }
7978
7979     case 24:                    /* FCVTZS.  */
7980       switch (size_type)
7981         {
7982         case 0: fcvtszs32 (cpu); return;
7983         case 1: fcvtszd32 (cpu); return;
7984         case 2: fcvtszs (cpu); return;
7985         case 3: fcvtszd (cpu); return;
7986         }
7987
7988     case 25: do_fcvtzu (cpu); return;
7989     case 3:  do_UCVTF (cpu); return;
7990
7991     case 0:     /* FCVTNS.  */
7992     case 1:     /* FCVTNU.  */
7993     case 4:     /* FCVTAS.  */
7994     case 5:     /* FCVTAU.  */
7995     case 8:     /* FCVPTS.  */
7996     case 9:     /* FCVTPU.  */
7997     case 16:    /* FCVTMS.  */
7998     case 17:    /* FCVTMU.  */
7999     default:
8000       HALT_NYI;
8001     }
8002 }
8003
8004 static void
8005 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8006 {
8007   uint32_t flags;
8008
8009   if (isnan (fvalue1) || isnan (fvalue2))
8010     flags = C|V;
8011   else
8012     {
8013       float result = fvalue1 - fvalue2;
8014
8015       if (result == 0.0)
8016         flags = Z|C;
8017       else if (result < 0)
8018         flags = N;
8019       else /* (result > 0).  */
8020         flags = C;
8021     }
8022
8023   aarch64_set_CPSR (cpu, flags);
8024 }
8025
8026 static void
8027 fcmps (sim_cpu *cpu)
8028 {
8029   unsigned sm = INSTR (20, 16);
8030   unsigned sn = INSTR ( 9,  5);
8031
8032   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8033   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8034
8035   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8036 }
8037
8038 /* Float compare to zero -- Invalid Operation exception
8039    only on signaling NaNs.  */
8040 static void
8041 fcmpzs (sim_cpu *cpu)
8042 {
8043   unsigned sn = INSTR ( 9,  5);
8044   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8045
8046   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8047 }
8048
8049 /* Float compare -- Invalid Operation exception on all NaNs.  */
8050 static void
8051 fcmpes (sim_cpu *cpu)
8052 {
8053   unsigned sm = INSTR (20, 16);
8054   unsigned sn = INSTR ( 9,  5);
8055
8056   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8057   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8058
8059   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8060 }
8061
8062 /* Float compare to zero -- Invalid Operation exception on all NaNs.  */
8063 static void
8064 fcmpzes (sim_cpu *cpu)
8065 {
8066   unsigned sn = INSTR ( 9,  5);
8067   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8068
8069   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8070 }
8071
8072 static void
8073 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8074 {
8075   uint32_t flags;
8076
8077   if (isnan (dval1) || isnan (dval2))
8078     flags = C|V;
8079   else
8080     {
8081       double result = dval1 - dval2;
8082
8083       if (result == 0.0)
8084         flags = Z|C;
8085       else if (result < 0)
8086         flags = N;
8087       else /* (result > 0).  */
8088         flags = C;
8089     }
8090
8091   aarch64_set_CPSR (cpu, flags);
8092 }
8093
8094 /* Double compare -- Invalid Operation exception only on signaling NaNs.  */
8095 static void
8096 fcmpd (sim_cpu *cpu)
8097 {
8098   unsigned sm = INSTR (20, 16);
8099   unsigned sn = INSTR ( 9,  5);
8100
8101   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8102   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8103
8104   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8105 }
8106
8107 /* Double compare to zero -- Invalid Operation exception
8108    only on signaling NaNs.  */
8109 static void
8110 fcmpzd (sim_cpu *cpu)
8111 {
8112   unsigned sn = INSTR ( 9,  5);
8113   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8114
8115   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8116 }
8117
8118 /* Double compare -- Invalid Operation exception on all NaNs.  */
8119 static void
8120 fcmped (sim_cpu *cpu)
8121 {
8122   unsigned sm = INSTR (20, 16);
8123   unsigned sn = INSTR ( 9,  5);
8124
8125   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8126   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8127
8128   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8129 }
8130
8131 /* Double compare to zero -- Invalid Operation exception on all NaNs.  */
8132 static void
8133 fcmpzed (sim_cpu *cpu)
8134 {
8135   unsigned sn = INSTR ( 9,  5);
8136   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8137
8138   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8139 }
8140
8141 static void
8142 dexSimpleFPCompare (sim_cpu *cpu)
8143 {
8144   /* assert instr[28,25] == 1111
8145      instr[30:24:21:13,10] = 0011000
8146      instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8147      instr[29] ==> S :  0 ==> OK, 1 ==> UNALLOC
8148      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8149      instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8150      instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8151                               01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8152                               ow ==> UNALLOC  */
8153   uint32_t dispatch;
8154   uint32_t M_S = (INSTR (31, 31) << 1)
8155     | INSTR (29, 29);
8156   uint32_t type = INSTR (23, 22);
8157   uint32_t op = INSTR (15, 14);
8158   uint32_t op2_2_0 = INSTR (2, 0);
8159
8160   if (op2_2_0 != 0)
8161     HALT_UNALLOC;
8162
8163   if (M_S != 0)
8164     HALT_UNALLOC;
8165
8166   if (type > 1)
8167     HALT_UNALLOC;
8168
8169   if (op != 0)
8170     HALT_UNALLOC;
8171
8172   /* dispatch on type and top 2 bits of opcode.  */
8173   dispatch = (type << 2) | INSTR (4, 3);
8174
8175   switch (dispatch)
8176     {
8177     case 0: fcmps (cpu); return;
8178     case 1: fcmpzs (cpu); return;
8179     case 2: fcmpes (cpu); return;
8180     case 3: fcmpzes (cpu); return;
8181     case 4: fcmpd (cpu); return;
8182     case 5: fcmpzd (cpu); return;
8183     case 6: fcmped (cpu); return;
8184     case 7: fcmpzed (cpu); return;
8185     }
8186 }
8187
8188 static void
8189 do_scalar_FADDP (sim_cpu *cpu)
8190 {
8191   /* instr [31,23] = 011111100
8192      instr [22]    = single(0)/double(1)
8193      instr [21,10] = 1100 0011 0110
8194      instr [9,5]   = Fn
8195      instr [4,0]   = Fd.  */
8196
8197   unsigned Fn = INSTR (9, 5);
8198   unsigned Fd = INSTR (4, 0);
8199
8200   NYI_assert (31, 23, 0x0FC);
8201   NYI_assert (21, 10, 0xC36);
8202
8203   if (INSTR (22, 22))
8204     {
8205       double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8206       double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8207
8208       aarch64_set_FP_double (cpu, Fd, val1 + val2);
8209     }
8210   else
8211     {
8212       float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8213       float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8214
8215       aarch64_set_FP_float (cpu, Fd, val1 + val2);
8216     }
8217 }
8218
8219 /* Floating point absolute difference.  */
8220
8221 static void
8222 do_scalar_FABD (sim_cpu *cpu)
8223 {
8224   /* instr [31,23] = 0111 1110 1
8225      instr [22]    = float(0)/double(1)
8226      instr [21]    = 1
8227      instr [20,16] = Rm
8228      instr [15,10] = 1101 01
8229      instr [9, 5]  = Rn
8230      instr [4, 0]  = Rd.  */
8231
8232   unsigned rm = INSTR (20, 16);
8233   unsigned rn = INSTR (9, 5);
8234   unsigned rd = INSTR (4, 0);
8235
8236   NYI_assert (31, 23, 0x0FD);
8237   NYI_assert (21, 21, 1);
8238   NYI_assert (15, 10, 0x35);
8239
8240   if (INSTR (22, 22))
8241     aarch64_set_FP_double (cpu, rd,
8242                            fabs (aarch64_get_FP_double (cpu, rn)
8243                                  - aarch64_get_FP_double (cpu, rm)));
8244   else
8245     aarch64_set_FP_float (cpu, rd,
8246                           fabsf (aarch64_get_FP_float (cpu, rn)
8247                                  - aarch64_get_FP_float (cpu, rm)));
8248 }
8249
8250 static void
8251 do_scalar_CMGT (sim_cpu *cpu)
8252 {
8253   /* instr [31,21] = 0101 1110 111
8254      instr [20,16] = Rm
8255      instr [15,10] = 00 1101
8256      instr [9, 5]  = Rn
8257      instr [4, 0]  = Rd.  */
8258
8259   unsigned rm = INSTR (20, 16);
8260   unsigned rn = INSTR (9, 5);
8261   unsigned rd = INSTR (4, 0);
8262
8263   NYI_assert (31, 21, 0x2F7);
8264   NYI_assert (15, 10, 0x0D);
8265
8266   aarch64_set_vec_u64 (cpu, rd, 0,
8267                        aarch64_get_vec_u64 (cpu, rn, 0) >
8268                        aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8269 }
8270
8271 static void
8272 do_scalar_USHR (sim_cpu *cpu)
8273 {
8274   /* instr [31,23] = 0111 1111 0
8275      instr [22,16] = shift amount
8276      instr [15,10] = 0000 01
8277      instr [9, 5]  = Rn
8278      instr [4, 0]  = Rd.  */
8279
8280   unsigned amount = 128 - INSTR (22, 16);
8281   unsigned rn = INSTR (9, 5);
8282   unsigned rd = INSTR (4, 0);
8283
8284   NYI_assert (31, 23, 0x0FE);
8285   NYI_assert (15, 10, 0x01);
8286
8287   aarch64_set_vec_u64 (cpu, rd, 0,
8288                        aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8289 }
8290
8291 static void
8292 do_scalar_SSHL (sim_cpu *cpu)
8293 {
8294   /* instr [31,21] = 0101 1110 111
8295      instr [20,16] = Rm
8296      instr [15,10] = 0100 01
8297      instr [9, 5]  = Rn
8298      instr [4, 0]  = Rd.  */
8299
8300   unsigned rm = INSTR (20, 16);
8301   unsigned rn = INSTR (9, 5);
8302   unsigned rd = INSTR (4, 0);
8303   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8304
8305   NYI_assert (31, 21, 0x2F7);
8306   NYI_assert (15, 10, 0x11);
8307
8308   if (shift >= 0)
8309     aarch64_set_vec_s64 (cpu, rd, 0,
8310                          aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8311   else
8312     aarch64_set_vec_s64 (cpu, rd, 0,
8313                          aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8314 }
8315
8316 static void
8317 do_scalar_shift (sim_cpu *cpu)
8318 {
8319   /* instr [31,23] = 0101 1111 0
8320      instr [22,16] = shift amount
8321      instr [15,10] = 0101 01   [SHL]
8322      instr [15,10] = 0000 01   [SSHR]
8323      instr [9, 5]  = Rn
8324      instr [4, 0]  = Rd.  */
8325
8326   unsigned rn = INSTR (9, 5);
8327   unsigned rd = INSTR (4, 0);
8328   unsigned amount;
8329
8330   NYI_assert (31, 23, 0x0BE);
8331
8332   if (INSTR (22, 22) == 0)
8333     HALT_UNALLOC;
8334
8335   switch (INSTR (15, 10))
8336     {
8337     case 0x01: /* SSHR */
8338       amount = 128 - INSTR (22, 16);
8339       aarch64_set_vec_s64 (cpu, rd, 0,
8340                            aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8341       return;
8342     case 0x15: /* SHL */
8343       amount = INSTR (22, 16) - 64;
8344       aarch64_set_vec_u64 (cpu, rd, 0,
8345                            aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8346       return;
8347     default:
8348       HALT_NYI;
8349     }
8350 }
8351
8352 /* FCMEQ FCMGT FCMGE.  */
8353 static void
8354 do_scalar_FCM (sim_cpu *cpu)
8355 {
8356   /* instr [31,30] = 01
8357      instr [29]    = U
8358      instr [28,24] = 1 1110
8359      instr [23]    = E
8360      instr [22]    = size
8361      instr [21]    = 1
8362      instr [20,16] = Rm
8363      instr [15,12] = 1110
8364      instr [11]    = AC
8365      instr [10]    = 1
8366      instr [9, 5]  = Rn
8367      instr [4, 0]  = Rd.  */
8368
8369   unsigned rm = INSTR (20, 16);
8370   unsigned rn = INSTR (9, 5);
8371   unsigned rd = INSTR (4, 0);
8372   unsigned EUac = (INSTR (23, 23) << 2)
8373     | (INSTR (29, 29) << 1)
8374     | INSTR (11, 11);
8375   unsigned result;
8376   float val1;
8377   float val2;
8378
8379   NYI_assert (31, 30, 1);
8380   NYI_assert (28, 24, 0x1E);
8381   NYI_assert (21, 21, 1);
8382   NYI_assert (15, 12, 0xE);
8383   NYI_assert (10, 10, 1);
8384
8385   if (INSTR (22, 22))
8386     {
8387       double val1 = aarch64_get_FP_double (cpu, rn);
8388       double val2 = aarch64_get_FP_double (cpu, rm);
8389
8390       switch (EUac)
8391         {
8392         case 0: /* 000 */
8393           result = val1 == val2;
8394           break;
8395
8396         case 3: /* 011 */
8397           val1 = fabs (val1);
8398           val2 = fabs (val2);
8399           /* Fall through. */
8400         case 2: /* 010 */
8401           result = val1 >= val2;
8402           break;
8403
8404         case 7: /* 111 */
8405           val1 = fabs (val1);
8406           val2 = fabs (val2);
8407           /* Fall through. */
8408         case 6: /* 110 */
8409           result = val1 > val2;
8410           break;
8411
8412         default:
8413           HALT_UNALLOC;
8414         }
8415
8416       aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8417       return;
8418     }
8419
8420   val1 = aarch64_get_FP_float (cpu, rn);
8421   val2 = aarch64_get_FP_float (cpu, rm);
8422
8423   switch (EUac)
8424     {
8425     case 0: /* 000 */
8426       result = val1 == val2;
8427       break;
8428
8429     case 3: /* 011 */
8430       val1 = fabsf (val1);
8431       val2 = fabsf (val2);
8432       /* Fall through. */
8433     case 2: /* 010 */
8434       result = val1 >= val2;
8435       break;
8436
8437     case 7: /* 111 */
8438       val1 = fabsf (val1);
8439       val2 = fabsf (val2);
8440       /* Fall through. */
8441     case 6: /* 110 */
8442       result = val1 > val2;
8443       break;
8444
8445     default:
8446       HALT_UNALLOC;
8447     }
8448
8449   aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8450 }
8451
8452 /* An alias of DUP.  */
8453 static void
8454 do_scalar_MOV (sim_cpu *cpu)
8455 {
8456   /* instr [31,21] = 0101 1110 000
8457      instr [20,16] = imm5
8458      instr [15,10] = 0000 01
8459      instr [9, 5]  = Rn
8460      instr [4, 0]  = Rd.  */
8461
8462   unsigned rn = INSTR (9, 5);
8463   unsigned rd = INSTR (4, 0);
8464   unsigned index;
8465
8466   NYI_assert (31, 21, 0x2F0);
8467   NYI_assert (15, 10, 0x01);
8468
8469   if (INSTR (16, 16))
8470     {
8471       /* 8-bit.  */
8472       index = INSTR (20, 17);
8473       aarch64_set_vec_u8
8474         (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8475     }
8476   else if (INSTR (17, 17))
8477     {
8478       /* 16-bit.  */
8479       index = INSTR (20, 18);
8480       aarch64_set_vec_u16
8481         (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8482     }
8483   else if (INSTR (18, 18))
8484     {
8485       /* 32-bit.  */
8486       index = INSTR (20, 19);
8487       aarch64_set_vec_u32
8488         (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8489     }
8490   else if (INSTR (19, 19))
8491     {
8492       /* 64-bit.  */
8493       index = INSTR (20, 20);
8494       aarch64_set_vec_u64
8495         (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8496     }
8497   else
8498     HALT_UNALLOC;
8499 }
8500
8501 static void
8502 do_scalar_NEG (sim_cpu *cpu)
8503 {
8504   /* instr [31,10] = 0111 1110 1110 0000 1011 10
8505      instr [9, 5]  = Rn
8506      instr [4, 0]  = Rd.  */
8507
8508   unsigned rn = INSTR (9, 5);
8509   unsigned rd = INSTR (4, 0);
8510
8511   NYI_assert (31, 10, 0x1FB82E);
8512
8513   aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
8514 }
8515
8516 static void
8517 do_scalar_USHL (sim_cpu *cpu)
8518 {
8519   /* instr [31,21] = 0111 1110 111
8520      instr [20,16] = Rm
8521      instr [15,10] = 0100 01
8522      instr [9, 5]  = Rn
8523      instr [4, 0]  = Rd.  */
8524
8525   unsigned rm = INSTR (20, 16);
8526   unsigned rn = INSTR (9, 5);
8527   unsigned rd = INSTR (4, 0);
8528   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8529
8530   NYI_assert (31, 21, 0x3F7);
8531   NYI_assert (15, 10, 0x11);
8532
8533   if (shift >= 0)
8534     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
8535   else
8536     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
8537 }
8538
8539 static void
8540 do_double_add (sim_cpu *cpu)
8541 {
8542   /* instr [31,21] = 0101 1110 111
8543      instr [20,16] = Fn
8544      instr [15,10] = 1000 01
8545      instr [9,5]   = Fm
8546      instr [4,0]   = Fd.  */
8547   unsigned Fd;
8548   unsigned Fm;
8549   unsigned Fn;
8550   double val1;
8551   double val2;
8552
8553   NYI_assert (31, 21, 0x2F7);
8554   NYI_assert (15, 10, 0x21);
8555
8556   Fd = INSTR (4, 0);
8557   Fm = INSTR (9, 5);
8558   Fn = INSTR (20, 16);
8559
8560   val1 = aarch64_get_FP_double (cpu, Fm);
8561   val2 = aarch64_get_FP_double (cpu, Fn);
8562
8563   aarch64_set_FP_double (cpu, Fd, val1 + val2);
8564 }
8565
8566 static void
8567 do_scalar_vec (sim_cpu *cpu)
8568 {
8569   /* instr [30] = 1.  */
8570   /* instr [28,25] = 1111.  */
8571   switch (INSTR (31, 23))
8572     {
8573     case 0xBC:
8574       switch (INSTR (15, 10))
8575         {
8576         case 0x01: do_scalar_MOV (cpu); return;
8577         case 0x39: do_scalar_FCM (cpu); return;
8578         case 0x3B: do_scalar_FCM (cpu); return;
8579         }
8580       break;
8581
8582     case 0xBE: do_scalar_shift (cpu); return;
8583
8584     case 0xFC:
8585       switch (INSTR (15, 10))
8586         {
8587         case 0x36: do_scalar_FADDP (cpu); return;
8588         case 0x39: do_scalar_FCM (cpu); return;
8589         case 0x3B: do_scalar_FCM (cpu); return;
8590         }
8591       break;
8592
8593     case 0xFD:
8594       switch (INSTR (15, 10))
8595         {
8596         case 0x0D: do_scalar_CMGT (cpu); return;
8597         case 0x11: do_scalar_USHL (cpu); return;
8598         case 0x2E: do_scalar_NEG (cpu); return;
8599         case 0x35: do_scalar_FABD (cpu); return;
8600         case 0x39: do_scalar_FCM (cpu); return;
8601         case 0x3B: do_scalar_FCM (cpu); return;
8602         default:
8603           HALT_NYI;
8604         }
8605
8606     case 0xFE: do_scalar_USHR (cpu); return;
8607
8608     case 0xBD:
8609       switch (INSTR (15, 10))
8610         {
8611         case 0x21: do_double_add (cpu); return;
8612         case 0x11: do_scalar_SSHL (cpu); return;
8613         default:
8614           HALT_NYI;
8615         }
8616
8617     default:
8618       HALT_NYI;
8619     }
8620 }
8621
8622 static void
8623 dexAdvSIMD1 (sim_cpu *cpu)
8624 {
8625   /* instr [28,25] = 1 111.  */
8626
8627   /* We are currently only interested in the basic
8628      scalar fp routines which all have bit 30 = 0.  */
8629   if (INSTR (30, 30))
8630     do_scalar_vec (cpu);
8631
8632   /* instr[24] is set for FP data processing 3-source and clear for
8633      all other basic scalar fp instruction groups.  */
8634   else if (INSTR (24, 24))
8635     dexSimpleFPDataProc3Source (cpu);
8636
8637   /* instr[21] is clear for floating <-> fixed conversions and set for
8638      all other basic scalar fp instruction groups.  */
8639   else if (!INSTR (21, 21))
8640     dexSimpleFPFixedConvert (cpu);
8641
8642   /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
8643      11 ==> cond select,  00 ==> other.  */
8644   else
8645     switch (INSTR (11, 10))
8646       {
8647       case 1: dexSimpleFPCondCompare (cpu); return;
8648       case 2: dexSimpleFPDataProc2Source (cpu); return;
8649       case 3: dexSimpleFPCondSelect (cpu); return;
8650
8651       default:
8652         /* Now an ordered cascade of tests.
8653            FP immediate has instr [12] == 1.
8654            FP compare has   instr [13] == 1.
8655            FP Data Proc 1 Source has instr [14] == 1.
8656            FP floating <--> integer conversions has instr [15] == 0.  */
8657         if (INSTR (12, 12))
8658           dexSimpleFPImmediate (cpu);
8659
8660         else if (INSTR (13, 13))
8661           dexSimpleFPCompare (cpu);
8662
8663         else if (INSTR (14, 14))
8664           dexSimpleFPDataProc1Source (cpu);
8665
8666         else if (!INSTR (15, 15))
8667           dexSimpleFPIntegerConvert (cpu);
8668
8669         else
8670           /* If we get here then instr[15] == 1 which means UNALLOC.  */
8671           HALT_UNALLOC;
8672       }
8673 }
8674
8675 /* PC relative addressing.  */
8676
8677 static void
8678 pcadr (sim_cpu *cpu)
8679 {
8680   /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
8681      instr[30,29] = immlo
8682      instr[23,5] = immhi.  */
8683   uint64_t address;
8684   unsigned rd = INSTR (4, 0);
8685   uint32_t isPage = INSTR (31, 31);
8686   union { int64_t u64; uint64_t s64; } imm;
8687   uint64_t offset;
8688
8689   imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
8690   offset = imm.u64;
8691   offset = (offset << 2) | INSTR (30, 29);
8692
8693   address = aarch64_get_PC (cpu);
8694
8695   if (isPage)
8696     {
8697       offset <<= 12;
8698       address &= ~0xfff;
8699     }
8700
8701   aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
8702 }
8703
8704 /* Specific decode and execute for group Data Processing Immediate.  */
8705
8706 static void
8707 dexPCRelAddressing (sim_cpu *cpu)
8708 {
8709   /* assert instr[28,24] = 10000.  */
8710   pcadr (cpu);
8711 }
8712
8713 /* Immediate logical.
8714    The bimm32/64 argument is constructed by replicating a 2, 4, 8,
8715    16, 32 or 64 bit sequence pulled out at decode and possibly
8716    inverting it..
8717
8718    N.B. the output register (dest) can normally be Xn or SP
8719    the exception occurs for flag setting instructions which may
8720    only use Xn for the output (dest).  The input register can
8721    never be SP.  */
8722
8723 /* 32 bit and immediate.  */
8724 static void
8725 and32 (sim_cpu *cpu, uint32_t bimm)
8726 {
8727   unsigned rn = INSTR (9, 5);
8728   unsigned rd = INSTR (4, 0);
8729
8730   aarch64_set_reg_u64 (cpu, rd, SP_OK,
8731                        aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
8732 }
8733
8734 /* 64 bit and immediate.  */
8735 static void
8736 and64 (sim_cpu *cpu, uint64_t bimm)
8737 {
8738   unsigned rn = INSTR (9, 5);
8739   unsigned rd = INSTR (4, 0);
8740
8741   aarch64_set_reg_u64 (cpu, rd, SP_OK,
8742                        aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
8743 }
8744
8745 /* 32 bit and immediate set flags.  */
8746 static void
8747 ands32 (sim_cpu *cpu, uint32_t bimm)
8748 {
8749   unsigned rn = INSTR (9, 5);
8750   unsigned rd = INSTR (4, 0);
8751
8752   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8753   uint32_t value2 = bimm;
8754
8755   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8756   set_flags_for_binop32 (cpu, value1 & value2);
8757 }
8758
8759 /* 64 bit and immediate set flags.  */
8760 static void
8761 ands64 (sim_cpu *cpu, uint64_t bimm)
8762 {
8763   unsigned rn = INSTR (9, 5);
8764   unsigned rd = INSTR (4, 0);
8765
8766   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8767   uint64_t value2 = bimm;
8768
8769   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8770   set_flags_for_binop64 (cpu, value1 & value2);
8771 }
8772
8773 /* 32 bit exclusive or immediate.  */
8774 static void
8775 eor32 (sim_cpu *cpu, uint32_t bimm)
8776 {
8777   unsigned rn = INSTR (9, 5);
8778   unsigned rd = INSTR (4, 0);
8779
8780   aarch64_set_reg_u64 (cpu, rd, SP_OK,
8781                        aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
8782 }
8783
8784 /* 64 bit exclusive or immediate.  */
8785 static void
8786 eor64 (sim_cpu *cpu, uint64_t bimm)
8787 {
8788   unsigned rn = INSTR (9, 5);
8789   unsigned rd = INSTR (4, 0);
8790
8791   aarch64_set_reg_u64 (cpu, rd, SP_OK,
8792                        aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
8793 }
8794
8795 /* 32 bit or immediate.  */
8796 static void
8797 orr32 (sim_cpu *cpu, uint32_t bimm)
8798 {
8799   unsigned rn = INSTR (9, 5);
8800   unsigned rd = INSTR (4, 0);
8801
8802   aarch64_set_reg_u64 (cpu, rd, SP_OK,
8803                        aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
8804 }
8805
8806 /* 64 bit or immediate.  */
8807 static void
8808 orr64 (sim_cpu *cpu, uint64_t bimm)
8809 {
8810   unsigned rn = INSTR (9, 5);
8811   unsigned rd = INSTR (4, 0);
8812
8813   aarch64_set_reg_u64 (cpu, rd, SP_OK,
8814                        aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
8815 }
8816
8817 /* Logical shifted register.
8818    These allow an optional LSL, ASR, LSR or ROR to the second source
8819    register with a count up to the register bit count.
8820    N.B register args may not be SP.  */
8821
8822 /* 32 bit AND shifted register.  */
8823 static void
8824 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8825 {
8826   unsigned rm = INSTR (20, 16);
8827   unsigned rn = INSTR (9, 5);
8828   unsigned rd = INSTR (4, 0);
8829
8830   aarch64_set_reg_u64
8831     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8832      & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8833 }
8834
8835 /* 64 bit AND shifted register.  */
8836 static void
8837 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8838 {
8839   unsigned rm = INSTR (20, 16);
8840   unsigned rn = INSTR (9, 5);
8841   unsigned rd = INSTR (4, 0);
8842
8843   aarch64_set_reg_u64
8844     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8845      & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8846 }
8847
8848 /* 32 bit AND shifted register setting flags.  */
8849 static void
8850 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8851 {
8852   unsigned rm = INSTR (20, 16);
8853   unsigned rn = INSTR (9, 5);
8854   unsigned rd = INSTR (4, 0);
8855
8856   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8857   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
8858                                shift, count);
8859
8860   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8861   set_flags_for_binop32 (cpu, value1 & value2);
8862 }
8863
8864 /* 64 bit AND shifted register setting flags.  */
8865 static void
8866 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8867 {
8868   unsigned rm = INSTR (20, 16);
8869   unsigned rn = INSTR (9, 5);
8870   unsigned rd = INSTR (4, 0);
8871
8872   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8873   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
8874                                shift, count);
8875
8876   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8877   set_flags_for_binop64 (cpu, value1 & value2);
8878 }
8879
8880 /* 32 bit BIC shifted register.  */
8881 static void
8882 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8883 {
8884   unsigned rm = INSTR (20, 16);
8885   unsigned rn = INSTR (9, 5);
8886   unsigned rd = INSTR (4, 0);
8887
8888   aarch64_set_reg_u64
8889     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8890      & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8891 }
8892
8893 /* 64 bit BIC shifted register.  */
8894 static void
8895 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8896 {
8897   unsigned rm = INSTR (20, 16);
8898   unsigned rn = INSTR (9, 5);
8899   unsigned rd = INSTR (4, 0);
8900
8901   aarch64_set_reg_u64
8902     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8903      & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8904 }
8905
8906 /* 32 bit BIC shifted register setting flags.  */
8907 static void
8908 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8909 {
8910   unsigned rm = INSTR (20, 16);
8911   unsigned rn = INSTR (9, 5);
8912   unsigned rd = INSTR (4, 0);
8913
8914   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8915   uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
8916                                  shift, count);
8917
8918   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8919   set_flags_for_binop32 (cpu, value1 & value2);
8920 }
8921
8922 /* 64 bit BIC shifted register setting flags.  */
8923 static void
8924 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8925 {
8926   unsigned rm = INSTR (20, 16);
8927   unsigned rn = INSTR (9, 5);
8928   unsigned rd = INSTR (4, 0);
8929
8930   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8931   uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
8932                                  shift, count);
8933
8934   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8935   set_flags_for_binop64 (cpu, value1 & value2);
8936 }
8937
8938 /* 32 bit EON shifted register.  */
8939 static void
8940 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8941 {
8942   unsigned rm = INSTR (20, 16);
8943   unsigned rn = INSTR (9, 5);
8944   unsigned rd = INSTR (4, 0);
8945
8946   aarch64_set_reg_u64
8947     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8948      ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8949 }
8950
8951 /* 64 bit EON shifted register.  */
8952 static void
8953 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8954 {
8955   unsigned rm = INSTR (20, 16);
8956   unsigned rn = INSTR (9, 5);
8957   unsigned rd = INSTR (4, 0);
8958
8959   aarch64_set_reg_u64
8960     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8961      ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8962 }
8963
8964 /* 32 bit EOR shifted register.  */
8965 static void
8966 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8967 {
8968   unsigned rm = INSTR (20, 16);
8969   unsigned rn = INSTR (9, 5);
8970   unsigned rd = INSTR (4, 0);
8971
8972   aarch64_set_reg_u64
8973     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8974      ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8975 }
8976
8977 /* 64 bit EOR shifted register.  */
8978 static void
8979 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8980 {
8981   unsigned rm = INSTR (20, 16);
8982   unsigned rn = INSTR (9, 5);
8983   unsigned rd = INSTR (4, 0);
8984
8985   aarch64_set_reg_u64
8986     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8987      ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8988 }
8989
8990 /* 32 bit ORR shifted register.  */
8991 static void
8992 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8993 {
8994   unsigned rm = INSTR (20, 16);
8995   unsigned rn = INSTR (9, 5);
8996   unsigned rd = INSTR (4, 0);
8997
8998   aarch64_set_reg_u64
8999     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9000      | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9001 }
9002
9003 /* 64 bit ORR shifted register.  */
9004 static void
9005 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9006 {
9007   unsigned rm = INSTR (20, 16);
9008   unsigned rn = INSTR (9, 5);
9009   unsigned rd = INSTR (4, 0);
9010
9011   aarch64_set_reg_u64
9012     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9013      | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9014 }
9015
9016 /* 32 bit ORN shifted register.  */
9017 static void
9018 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9019 {
9020   unsigned rm = INSTR (20, 16);
9021   unsigned rn = INSTR (9, 5);
9022   unsigned rd = INSTR (4, 0);
9023
9024   aarch64_set_reg_u64
9025     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9026      | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9027 }
9028
9029 /* 64 bit ORN shifted register.  */
9030 static void
9031 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9032 {
9033   unsigned rm = INSTR (20, 16);
9034   unsigned rn = INSTR (9, 5);
9035   unsigned rd = INSTR (4, 0);
9036
9037   aarch64_set_reg_u64
9038     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9039      | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9040 }
9041
9042 static void
9043 dexLogicalImmediate (sim_cpu *cpu)
9044 {
9045   /* assert instr[28,23] = 1001000
9046      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9047      instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9048      instr[22] = N : used to construct immediate mask
9049      instr[21,16] = immr
9050      instr[15,10] = imms
9051      instr[9,5] = Rn
9052      instr[4,0] = Rd  */
9053
9054   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9055   uint32_t size = INSTR (31, 31);
9056   uint32_t N = INSTR (22, 22);
9057   /* uint32_t immr = INSTR (21, 16);.  */
9058   /* uint32_t imms = INSTR (15, 10);.  */
9059   uint32_t index = INSTR (22, 10);
9060   uint64_t bimm64 = LITable [index];
9061   uint32_t dispatch = INSTR (30, 29);
9062
9063   if (~size & N)
9064     HALT_UNALLOC;
9065
9066   if (!bimm64)
9067     HALT_UNALLOC;
9068
9069   if (size == 0)
9070     {
9071       uint32_t bimm = (uint32_t) bimm64;
9072
9073       switch (dispatch)
9074         {
9075         case 0: and32 (cpu, bimm); return;
9076         case 1: orr32 (cpu, bimm); return;
9077         case 2: eor32 (cpu, bimm); return;
9078         case 3: ands32 (cpu, bimm); return;
9079         }
9080     }
9081   else
9082     {
9083       switch (dispatch)
9084         {
9085         case 0: and64 (cpu, bimm64); return;
9086         case 1: orr64 (cpu, bimm64); return;
9087         case 2: eor64 (cpu, bimm64); return;
9088         case 3: ands64 (cpu, bimm64); return;
9089         }
9090     }
9091   HALT_UNALLOC;
9092 }
9093
9094 /* Immediate move.
9095    The uimm argument is a 16 bit value to be inserted into the
9096    target register the pos argument locates the 16 bit word in the
9097    dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9098    3} for 64 bit.
9099    N.B register arg may not be SP so it should be.
9100    accessed using the setGZRegisterXXX accessors.  */
9101
9102 /* 32 bit move 16 bit immediate zero remaining shorts.  */
9103 static void
9104 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9105 {
9106   unsigned rd = INSTR (4, 0);
9107
9108   aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9109 }
9110
9111 /* 64 bit move 16 bit immediate zero remaining shorts.  */
9112 static void
9113 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9114 {
9115   unsigned rd = INSTR (4, 0);
9116
9117   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9118 }
9119
9120 /* 32 bit move 16 bit immediate negated.  */
9121 static void
9122 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9123 {
9124   unsigned rd = INSTR (4, 0);
9125
9126   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9127 }
9128
9129 /* 64 bit move 16 bit immediate negated.  */
9130 static void
9131 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9132 {
9133   unsigned rd = INSTR (4, 0);
9134
9135   aarch64_set_reg_u64
9136     (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9137                       ^ 0xffffffffffffffffULL));
9138 }
9139
9140 /* 32 bit move 16 bit immediate keep remaining shorts.  */
9141 static void
9142 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9143 {
9144   unsigned rd = INSTR (4, 0);
9145   uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9146   uint32_t value = val << (pos * 16);
9147   uint32_t mask = ~(0xffffU << (pos * 16));
9148
9149   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9150 }
9151
9152 /* 64 bit move 16 it immediate keep remaining shorts.  */
9153 static void
9154 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9155 {
9156   unsigned rd = INSTR (4, 0);
9157   uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9158   uint64_t value = (uint64_t) val << (pos * 16);
9159   uint64_t mask = ~(0xffffULL << (pos * 16));
9160
9161   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9162 }
9163
9164 static void
9165 dexMoveWideImmediate (sim_cpu *cpu)
9166 {
9167   /* assert instr[28:23] = 100101
9168      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9169      instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9170      instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9171      instr[20,5] = uimm16
9172      instr[4,0] = Rd  */
9173
9174   /* N.B. the (multiple of 16) shift is applied by the called routine,
9175      we just pass the multiplier.  */
9176
9177   uint32_t imm;
9178   uint32_t size = INSTR (31, 31);
9179   uint32_t op = INSTR (30, 29);
9180   uint32_t shift = INSTR (22, 21);
9181
9182   /* 32 bit can only shift 0 or 1 lot of 16.
9183      anything else is an unallocated instruction.  */
9184   if (size == 0 && (shift > 1))
9185     HALT_UNALLOC;
9186
9187   if (op == 1)
9188     HALT_UNALLOC;
9189
9190   imm = INSTR (20, 5);
9191
9192   if (size == 0)
9193     {
9194       if (op == 0)
9195         movn32 (cpu, imm, shift);
9196       else if (op == 2)
9197         movz32 (cpu, imm, shift);
9198       else
9199         movk32 (cpu, imm, shift);
9200     }
9201   else
9202     {
9203       if (op == 0)
9204         movn64 (cpu, imm, shift);
9205       else if (op == 2)
9206         movz64 (cpu, imm, shift);
9207       else
9208         movk64 (cpu, imm, shift);
9209     }
9210 }
9211
9212 /* Bitfield operations.
9213    These take a pair of bit positions r and s which are in {0..31}
9214    or {0..63} depending on the instruction word size.
9215    N.B register args may not be SP.  */
9216
9217 /* OK, we start with ubfm which just needs to pick
9218    some bits out of source zero the rest and write
9219    the result to dest.  Just need two logical shifts.  */
9220
9221 /* 32 bit bitfield move, left and right of affected zeroed
9222    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
9223 static void
9224 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9225 {
9226   unsigned rd;
9227   unsigned rn = INSTR (9, 5);
9228   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9229
9230   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
9231   if (r <= s)
9232     {
9233       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9234          We want only bits s:xxx:r at the bottom of the word
9235          so we LSL bit s up to bit 31 i.e. by 31 - s
9236          and then we LSR to bring bit 31 down to bit s - r
9237          i.e. by 31 + r - s.  */
9238       value <<= 31 - s;
9239       value >>= 31 + r - s;
9240     }
9241   else
9242     {
9243       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9244          We want only bits s:xxx:0 starting at it 31-(r-1)
9245          so we LSL bit s up to bit 31 i.e. by 31 - s
9246          and then we LSL to bring bit 31 down to 31-(r-1)+s
9247          i.e. by r - (s + 1).  */
9248       value <<= 31 - s;
9249       value >>= r - (s + 1);
9250     }
9251
9252   rd = INSTR (4, 0);
9253   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9254 }
9255
9256 /* 64 bit bitfield move, left and right of affected zeroed
9257    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
9258 static void
9259 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9260 {
9261   unsigned rd;
9262   unsigned rn = INSTR (9, 5);
9263   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9264
9265   if (r <= s)
9266     {
9267       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9268          We want only bits s:xxx:r at the bottom of the word.
9269          So we LSL bit s up to bit 63 i.e. by 63 - s
9270          and then we LSR to bring bit 63 down to bit s - r
9271          i.e. by 63 + r - s.  */
9272       value <<= 63 - s;
9273       value >>= 63 + r - s;
9274     }
9275   else
9276     {
9277       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9278          We want only bits s:xxx:0 starting at it 63-(r-1).
9279          So we LSL bit s up to bit 63 i.e. by 63 - s
9280          and then we LSL to bring bit 63 down to 63-(r-1)+s
9281          i.e. by r - (s + 1).  */
9282       value <<= 63 - s;
9283       value >>= r - (s + 1);
9284     }
9285
9286   rd = INSTR (4, 0);
9287   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9288 }
9289
9290 /* The signed versions need to insert sign bits
9291    on the left of the inserted bit field. so we do
9292    much the same as the unsigned version except we
9293    use an arithmetic shift right -- this just means
9294    we need to operate on signed values.  */
9295
9296 /* 32 bit bitfield move, left of affected sign-extended, right zeroed.  */
9297 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
9298 static void
9299 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9300 {
9301   unsigned rd;
9302   unsigned rn = INSTR (9, 5);
9303   /* as per ubfm32 but use an ASR instead of an LSR.  */
9304   int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9305
9306   if (r <= s)
9307     {
9308       value <<= 31 - s;
9309       value >>= 31 + r - s;
9310     }
9311   else
9312     {
9313       value <<= 31 - s;
9314       value >>= r - (s + 1);
9315     }
9316
9317   rd = INSTR (4, 0);
9318   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9319 }
9320
9321 /* 64 bit bitfield move, left of affected sign-extended, right zeroed.  */
9322 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
9323 static void
9324 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9325 {
9326   unsigned rd;
9327   unsigned rn = INSTR (9, 5);
9328   /* acpu per ubfm but use an ASR instead of an LSR.  */
9329   int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9330
9331   if (r <= s)
9332     {
9333       value <<= 63 - s;
9334       value >>= 63 + r - s;
9335     }
9336   else
9337     {
9338       value <<= 63 - s;
9339       value >>= r - (s + 1);
9340     }
9341
9342   rd = INSTR (4, 0);
9343   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9344 }
9345
9346 /* Finally, these versions leave non-affected bits
9347    as is. so we need to generate the bits as per
9348    ubfm and also generate a mask to pick the
9349    bits from the original and computed values.  */
9350
9351 /* 32 bit bitfield move, non-affected bits left as is.
9352    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
9353 static void
9354 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9355 {
9356   unsigned rn = INSTR (9, 5);
9357   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9358   uint32_t mask = -1;
9359   unsigned rd;
9360   uint32_t value2;
9361
9362   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
9363   if (r <= s)
9364     {
9365       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9366          We want only bits s:xxx:r at the bottom of the word
9367          so we LSL bit s up to bit 31 i.e. by 31 - s
9368          and then we LSR to bring bit 31 down to bit s - r
9369          i.e. by 31 + r - s.  */
9370       value <<= 31 - s;
9371       value >>= 31 + r - s;
9372       /* the mask must include the same bits.  */
9373       mask <<= 31 - s;
9374       mask >>= 31 + r - s;
9375     }
9376   else
9377     {
9378       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9379          We want only bits s:xxx:0 starting at it 31-(r-1)
9380          so we LSL bit s up to bit 31 i.e. by 31 - s
9381          and then we LSL to bring bit 31 down to 31-(r-1)+s
9382          i.e. by r - (s + 1).  */
9383       value <<= 31 - s;
9384       value >>= r - (s + 1);
9385       /* The mask must include the same bits.  */
9386       mask <<= 31 - s;
9387       mask >>= r - (s + 1);
9388     }
9389
9390   rd = INSTR (4, 0);
9391   value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9392
9393   value2 &= ~mask;
9394   value2 |= value;
9395
9396   aarch64_set_reg_u64
9397     (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9398 }
9399
9400 /* 64 bit bitfield move, non-affected bits left as is.
9401    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
9402 static void
9403 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9404 {
9405   unsigned rd;
9406   unsigned rn = INSTR (9, 5);
9407   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9408   uint64_t mask = 0xffffffffffffffffULL;
9409
9410   if (r <= s)
9411     {
9412       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9413          We want only bits s:xxx:r at the bottom of the word
9414          so we LSL bit s up to bit 63 i.e. by 63 - s
9415          and then we LSR to bring bit 63 down to bit s - r
9416          i.e. by 63 + r - s.  */
9417       value <<= 63 - s;
9418       value >>= 63 + r - s;
9419       /* The mask must include the same bits.  */
9420       mask <<= 63 - s;
9421       mask >>= 63 + r - s;
9422     }
9423   else
9424     {
9425       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9426          We want only bits s:xxx:0 starting at it 63-(r-1)
9427          so we LSL bit s up to bit 63 i.e. by 63 - s
9428          and then we LSL to bring bit 63 down to 63-(r-1)+s
9429          i.e. by r - (s + 1).  */
9430       value <<= 63 - s;
9431       value >>= r - (s + 1);
9432       /* The mask must include the same bits.  */
9433       mask <<= 63 - s;
9434       mask >>= r - (s + 1);
9435     }
9436
9437   rd = INSTR (4, 0);
9438   aarch64_set_reg_u64
9439     (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
9440 }
9441
9442 static void
9443 dexBitfieldImmediate (sim_cpu *cpu)
9444 {
9445   /* assert instr[28:23] = 100110
9446      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9447      instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
9448      instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
9449      instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
9450      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
9451      instr[9,5] = Rn
9452      instr[4,0] = Rd  */
9453
9454   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9455   uint32_t dispatch;
9456   uint32_t imms;
9457   uint32_t size = INSTR (31, 31);
9458   uint32_t N = INSTR (22, 22);
9459   /* 32 bit operations must have immr[5] = 0 and imms[5] = 0.  */
9460   /* or else we have an UNALLOC.  */
9461   uint32_t immr = INSTR (21, 16);
9462
9463   if (~size & N)
9464     HALT_UNALLOC;
9465
9466   if (!size && uimm (immr, 5, 5))
9467     HALT_UNALLOC;
9468
9469   imms = INSTR (15, 10);
9470   if (!size && uimm (imms, 5, 5))
9471     HALT_UNALLOC;
9472
9473   /* Switch on combined size and op.  */
9474   dispatch = INSTR (31, 29);
9475   switch (dispatch)
9476     {
9477     case 0: sbfm32 (cpu, immr, imms); return;
9478     case 1: bfm32 (cpu, immr, imms); return;
9479     case 2: ubfm32 (cpu, immr, imms); return;
9480     case 4: sbfm (cpu, immr, imms); return;
9481     case 5: bfm (cpu, immr, imms); return;
9482     case 6: ubfm (cpu, immr, imms); return;
9483     default: HALT_UNALLOC;
9484     }
9485 }
9486
9487 static void
9488 do_EXTR_32 (sim_cpu *cpu)
9489 {
9490   /* instr[31:21] = 00010011100
9491      instr[20,16] = Rm
9492      instr[15,10] = imms :  0xxxxx for 32 bit
9493      instr[9,5]   = Rn
9494      instr[4,0]   = Rd  */
9495   unsigned rm   = INSTR (20, 16);
9496   unsigned imms = INSTR (15, 10) & 31;
9497   unsigned rn   = INSTR ( 9,  5);
9498   unsigned rd   = INSTR ( 4,  0);
9499   uint64_t val1;
9500   uint64_t val2;
9501
9502   val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
9503   val1 >>= imms;
9504   val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9505   val2 <<= (32 - imms);
9506
9507   aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
9508 }
9509
9510 static void
9511 do_EXTR_64 (sim_cpu *cpu)
9512 {
9513   /* instr[31:21] = 10010011100
9514      instr[20,16] = Rm
9515      instr[15,10] = imms
9516      instr[9,5]   = Rn
9517      instr[4,0]   = Rd  */
9518   unsigned rm   = INSTR (20, 16);
9519   unsigned imms = INSTR (15, 10) & 63;
9520   unsigned rn   = INSTR ( 9,  5);
9521   unsigned rd   = INSTR ( 4,  0);
9522   uint64_t val;
9523
9524   val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
9525   val >>= imms;
9526   val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
9527
9528   aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
9529 }
9530
9531 static void
9532 dexExtractImmediate (sim_cpu *cpu)
9533 {
9534   /* assert instr[28:23] = 100111
9535      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
9536      instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
9537      instr[22]    = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
9538      instr[21]    = op0 : must be 0 or UNALLOC
9539      instr[20,16] = Rm
9540      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
9541      instr[9,5]   = Rn
9542      instr[4,0]   = Rd  */
9543
9544   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9545   /* 64 bit operations must have N = 1 or else we have an UNALLOC.  */
9546   uint32_t dispatch;
9547   uint32_t size = INSTR (31, 31);
9548   uint32_t N = INSTR (22, 22);
9549   /* 32 bit operations must have imms[5] = 0
9550      or else we have an UNALLOC.  */
9551   uint32_t imms = INSTR (15, 10);
9552
9553   if (size ^ N)
9554     HALT_UNALLOC;
9555
9556   if (!size && uimm (imms, 5, 5))
9557     HALT_UNALLOC;
9558
9559   /* Switch on combined size and op.  */
9560   dispatch = INSTR (31, 29);
9561
9562   if (dispatch == 0)
9563     do_EXTR_32 (cpu);
9564
9565   else if (dispatch == 4)
9566     do_EXTR_64 (cpu);
9567
9568   else if (dispatch == 1)
9569     HALT_NYI;
9570   else
9571     HALT_UNALLOC;
9572 }
9573
9574 static void
9575 dexDPImm (sim_cpu *cpu)
9576 {
9577   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
9578      assert  group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
9579      bits [25,23] of a DPImm are the secondary dispatch vector.  */
9580   uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
9581
9582   switch (group2)
9583     {
9584     case DPIMM_PCADR_000:
9585     case DPIMM_PCADR_001:
9586       dexPCRelAddressing (cpu);
9587       return;
9588
9589     case DPIMM_ADDSUB_010:
9590     case DPIMM_ADDSUB_011:
9591       dexAddSubtractImmediate (cpu);
9592       return;
9593
9594     case DPIMM_LOG_100:
9595       dexLogicalImmediate (cpu);
9596       return;
9597
9598     case DPIMM_MOV_101:
9599       dexMoveWideImmediate (cpu);
9600       return;
9601
9602     case DPIMM_BITF_110:
9603       dexBitfieldImmediate (cpu);
9604       return;
9605
9606     case DPIMM_EXTR_111:
9607       dexExtractImmediate (cpu);
9608       return;
9609
9610     default:
9611       /* Should never reach here.  */
9612       HALT_NYI;
9613     }
9614 }
9615
9616 static void
9617 dexLoadUnscaledImmediate (sim_cpu *cpu)
9618 {
9619   /* instr[29,24] == 111_00
9620      instr[21] == 0
9621      instr[11,10] == 00
9622      instr[31,30] = size
9623      instr[26] = V
9624      instr[23,22] = opc
9625      instr[20,12] = simm9
9626      instr[9,5] = rn may be SP.  */
9627   /* unsigned rt = INSTR (4, 0);  */
9628   uint32_t V = INSTR (26, 26);
9629   uint32_t dispatch = ( (INSTR (31, 30) << 2)
9630                         | INSTR (23, 22));
9631   int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
9632
9633   if (!V)
9634     {
9635       /* GReg operations.  */
9636       switch (dispatch)
9637         {
9638         case 0:  sturb (cpu, imm); return;
9639         case 1:  ldurb32 (cpu, imm); return;
9640         case 2:  ldursb64 (cpu, imm); return;
9641         case 3:  ldursb32 (cpu, imm); return;
9642         case 4:  sturh (cpu, imm); return;
9643         case 5:  ldurh32 (cpu, imm); return;
9644         case 6:  ldursh64 (cpu, imm); return;
9645         case 7:  ldursh32 (cpu, imm); return;
9646         case 8:  stur32 (cpu, imm); return;
9647         case 9:  ldur32 (cpu, imm); return;
9648         case 10: ldursw (cpu, imm); return;
9649         case 12: stur64 (cpu, imm); return;
9650         case 13: ldur64 (cpu, imm); return;
9651
9652         case 14:
9653           /* PRFUM NYI.  */
9654           HALT_NYI;
9655
9656         default:
9657         case 11:
9658         case 15:
9659           HALT_UNALLOC;
9660         }
9661     }
9662
9663   /* FReg operations.  */
9664   switch (dispatch)
9665     {
9666     case 2:  fsturq (cpu, imm); return;
9667     case 3:  fldurq (cpu, imm); return;
9668     case 8:  fsturs (cpu, imm); return;
9669     case 9:  fldurs (cpu, imm); return;
9670     case 12: fsturd (cpu, imm); return;
9671     case 13: fldurd (cpu, imm); return;
9672
9673     case 0: /* STUR 8 bit FP.  */
9674     case 1: /* LDUR 8 bit FP.  */
9675     case 4: /* STUR 16 bit FP.  */
9676     case 5: /* LDUR 8 bit FP.  */
9677       HALT_NYI;
9678
9679     default:
9680     case 6:
9681     case 7:
9682     case 10:
9683     case 11:
9684     case 14:
9685     case 15:
9686       HALT_UNALLOC;
9687     }
9688 }
9689
9690 /*  N.B. A preliminary note regarding all the ldrs<x>32
9691     instructions
9692
9693    The signed value loaded by these instructions is cast to unsigned
9694    before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
9695    64 bit element of the GReg union. this performs a 32 bit sign extension
9696    (as required) but avoids 64 bit sign extension, thus ensuring that the
9697    top half of the register word is zero. this is what the spec demands
9698    when a 32 bit load occurs.  */
9699
9700 /* 32 bit load sign-extended byte scaled unsigned 12 bit.  */
9701 static void
9702 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
9703 {
9704   unsigned int rn = INSTR (9, 5);
9705   unsigned int rt = INSTR (4, 0);
9706
9707   /* The target register may not be SP but the source may be
9708      there is no scaling required for a byte load.  */
9709   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
9710   aarch64_set_reg_u64 (cpu, rt, NO_SP,
9711                        (int64_t) aarch64_get_mem_s8 (cpu, address));
9712 }
9713
9714 /* 32 bit load sign-extended byte scaled or unscaled zero-
9715    or sign-extended 32-bit register offset.  */
9716 static void
9717 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9718 {
9719   unsigned int rm = INSTR (20, 16);
9720   unsigned int rn = INSTR (9, 5);
9721   unsigned int rt = INSTR (4, 0);
9722
9723   /* rn may reference SP, rm and rt must reference ZR.  */
9724
9725   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9726   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9727                                  extension);
9728
9729   /* There is no scaling required for a byte load.  */
9730   aarch64_set_reg_u64
9731     (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
9732                                                    + displacement));
9733 }
9734
9735 /* 32 bit load sign-extended byte unscaled signed 9 bit with
9736    pre- or post-writeback.  */
9737 static void
9738 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9739 {
9740   uint64_t address;
9741   unsigned int rn = INSTR (9, 5);
9742   unsigned int rt = INSTR (4, 0);
9743
9744   if (rn == rt && wb != NoWriteBack)
9745     HALT_UNALLOC;
9746
9747   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9748
9749   if (wb == Pre)
9750       address += offset;
9751
9752   aarch64_set_reg_u64 (cpu, rt, NO_SP,
9753                        (int64_t) aarch64_get_mem_s8 (cpu, address));
9754
9755   if (wb == Post)
9756     address += offset;
9757
9758   if (wb != NoWriteBack)
9759     aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
9760 }
9761
9762 /* 8 bit store scaled.  */
9763 static void
9764 fstrb_abs (sim_cpu *cpu, uint32_t offset)
9765 {
9766   unsigned st = INSTR (4, 0);
9767   unsigned rn = INSTR (9, 5);
9768
9769   aarch64_set_mem_u8 (cpu,
9770                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
9771                       aarch64_get_vec_u8 (cpu, st, 0));
9772 }
9773
9774 /* 8 bit store scaled or unscaled zero- or
9775    sign-extended 8-bit register offset.  */
9776 static void
9777 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9778 {
9779   unsigned rm = INSTR (20, 16);
9780   unsigned rn = INSTR (9, 5);
9781   unsigned st = INSTR (4, 0);
9782
9783   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9784   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9785                                extension);
9786   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
9787
9788   aarch64_set_mem_u8
9789     (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
9790 }
9791
9792 /* 16 bit store scaled.  */
9793 static void
9794 fstrh_abs (sim_cpu *cpu, uint32_t offset)
9795 {
9796   unsigned st = INSTR (4, 0);
9797   unsigned rn = INSTR (9, 5);
9798
9799   aarch64_set_mem_u16
9800     (cpu,
9801      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
9802      aarch64_get_vec_u16 (cpu, st, 0));
9803 }
9804
9805 /* 16 bit store scaled or unscaled zero-
9806    or sign-extended 16-bit register offset.  */
9807 static void
9808 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9809 {
9810   unsigned rm = INSTR (20, 16);
9811   unsigned rn = INSTR (9, 5);
9812   unsigned st = INSTR (4, 0);
9813
9814   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9815   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9816                                extension);
9817   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
9818
9819   aarch64_set_mem_u16
9820     (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
9821 }
9822
9823 /* 32 bit store scaled unsigned 12 bit.  */
9824 static void
9825 fstrs_abs (sim_cpu *cpu, uint32_t offset)
9826 {
9827   unsigned st = INSTR (4, 0);
9828   unsigned rn = INSTR (9, 5);
9829
9830   aarch64_set_mem_u32
9831     (cpu,
9832      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
9833      aarch64_get_vec_u32 (cpu, st, 0));
9834 }
9835
9836 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
9837 static void
9838 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9839 {
9840   unsigned rn = INSTR (9, 5);
9841   unsigned st = INSTR (4, 0);
9842
9843   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9844
9845   if (wb != Post)
9846     address += offset;
9847
9848   aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
9849
9850   if (wb == Post)
9851     address += offset;
9852
9853   if (wb != NoWriteBack)
9854     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
9855 }
9856
9857 /* 32 bit store scaled or unscaled zero-
9858    or sign-extended 32-bit register offset.  */
9859 static void
9860 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9861 {
9862   unsigned rm = INSTR (20, 16);
9863   unsigned rn = INSTR (9, 5);
9864   unsigned st = INSTR (4, 0);
9865
9866   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9867   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9868                                extension);
9869   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
9870
9871   aarch64_set_mem_u32
9872     (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
9873 }
9874
9875 /* 64 bit store scaled unsigned 12 bit.  */
9876 static void
9877 fstrd_abs (sim_cpu *cpu, uint32_t offset)
9878 {
9879   unsigned st = INSTR (4, 0);
9880   unsigned rn = INSTR (9, 5);
9881
9882   aarch64_set_mem_u64
9883     (cpu,
9884      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
9885      aarch64_get_vec_u64 (cpu, st, 0));
9886 }
9887
9888 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
9889 static void
9890 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9891 {
9892   unsigned rn = INSTR (9, 5);
9893   unsigned st = INSTR (4, 0);
9894
9895   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9896
9897   if (wb != Post)
9898     address += offset;
9899
9900   aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
9901
9902   if (wb == Post)
9903     address += offset;
9904
9905   if (wb != NoWriteBack)
9906     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
9907 }
9908
9909 /* 64 bit store scaled or unscaled zero-
9910    or sign-extended 32-bit register offset.  */
9911 static void
9912 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9913 {
9914   unsigned rm = INSTR (20, 16);
9915   unsigned rn = INSTR (9, 5);
9916   unsigned st = INSTR (4, 0);
9917
9918   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9919   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9920                                extension);
9921   uint64_t  displacement = OPT_SCALE (extended, 64, scaling);
9922
9923   aarch64_set_mem_u64
9924     (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
9925 }
9926
9927 /* 128 bit store scaled unsigned 12 bit.  */
9928 static void
9929 fstrq_abs (sim_cpu *cpu, uint32_t offset)
9930 {
9931   FRegister a;
9932   unsigned st = INSTR (4, 0);
9933   unsigned rn = INSTR (9, 5);
9934   uint64_t addr;
9935
9936   aarch64_get_FP_long_double (cpu, st, & a);
9937
9938   addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
9939   aarch64_set_mem_long_double (cpu, addr, a);
9940 }
9941
9942 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback.  */
9943 static void
9944 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9945 {
9946   FRegister a;
9947   unsigned rn = INSTR (9, 5);
9948   unsigned st = INSTR (4, 0);
9949   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9950
9951   if (wb != Post)
9952     address += offset;
9953
9954   aarch64_get_FP_long_double (cpu, st, & a);
9955   aarch64_set_mem_long_double (cpu, address, a);
9956
9957   if (wb == Post)
9958     address += offset;
9959
9960   if (wb != NoWriteBack)
9961     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
9962 }
9963
9964 /* 128 bit store scaled or unscaled zero-
9965    or sign-extended 32-bit register offset.  */
9966 static void
9967 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9968 {
9969   unsigned rm = INSTR (20, 16);
9970   unsigned rn = INSTR (9, 5);
9971   unsigned st = INSTR (4, 0);
9972
9973   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9974   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9975                                extension);
9976   uint64_t  displacement = OPT_SCALE (extended, 128, scaling);
9977
9978   FRegister a;
9979
9980   aarch64_get_FP_long_double (cpu, st, & a);
9981   aarch64_set_mem_long_double (cpu, address + displacement, a);
9982 }
9983
9984 static void
9985 dexLoadImmediatePrePost (sim_cpu *cpu)
9986 {
9987   /* instr[31,30] = size
9988      instr[29,27] = 111
9989      instr[26]    = V
9990      instr[25,24] = 00
9991      instr[23,22] = opc
9992      instr[21]    = 0
9993      instr[20,12] = simm9
9994      instr[11]    = wb : 0 ==> Post, 1 ==> Pre
9995      instr[10]    = 0
9996      instr[9,5]   = Rn may be SP.
9997      instr[4,0]   = Rt */
9998
9999   uint32_t  V        = INSTR (26, 26);
10000   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10001   int32_t   imm      = simm32 (aarch64_get_instr (cpu), 20, 12);
10002   WriteBack wb       = INSTR (11, 11);
10003
10004   if (!V)
10005     {
10006       /* GReg operations.  */
10007       switch (dispatch)
10008         {
10009         case 0:  strb_wb (cpu, imm, wb); return;
10010         case 1:  ldrb32_wb (cpu, imm, wb); return;
10011         case 2:  ldrsb_wb (cpu, imm, wb); return;
10012         case 3:  ldrsb32_wb (cpu, imm, wb); return;
10013         case 4:  strh_wb (cpu, imm, wb); return;
10014         case 5:  ldrh32_wb (cpu, imm, wb); return;
10015         case 6:  ldrsh64_wb (cpu, imm, wb); return;
10016         case 7:  ldrsh32_wb (cpu, imm, wb); return;
10017         case 8:  str32_wb (cpu, imm, wb); return;
10018         case 9:  ldr32_wb (cpu, imm, wb); return;
10019         case 10: ldrsw_wb (cpu, imm, wb); return;
10020         case 12: str_wb (cpu, imm, wb); return;
10021         case 13: ldr_wb (cpu, imm, wb); return;
10022
10023         default:
10024         case 11:
10025         case 14:
10026         case 15:
10027           HALT_UNALLOC;
10028         }
10029     }
10030
10031   /* FReg operations.  */
10032   switch (dispatch)
10033     {
10034     case 2:  fstrq_wb (cpu, imm, wb); return;
10035     case 3:  fldrq_wb (cpu, imm, wb); return;
10036     case 8:  fstrs_wb (cpu, imm, wb); return;
10037     case 9:  fldrs_wb (cpu, imm, wb); return;
10038     case 12: fstrd_wb (cpu, imm, wb); return;
10039     case 13: fldrd_wb (cpu, imm, wb); return;
10040
10041     case 0:       /* STUR 8 bit FP.  */
10042     case 1:       /* LDUR 8 bit FP.  */
10043     case 4:       /* STUR 16 bit FP.  */
10044     case 5:       /* LDUR 8 bit FP.  */
10045       HALT_NYI;
10046
10047     default:
10048     case 6:
10049     case 7:
10050     case 10:
10051     case 11:
10052     case 14:
10053     case 15:
10054       HALT_UNALLOC;
10055     }
10056 }
10057
10058 static void
10059 dexLoadRegisterOffset (sim_cpu *cpu)
10060 {
10061   /* instr[31,30] = size
10062      instr[29,27] = 111
10063      instr[26]    = V
10064      instr[25,24] = 00
10065      instr[23,22] = opc
10066      instr[21]    = 1
10067      instr[20,16] = rm
10068      instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10069                              110 ==> SXTW, 111 ==> SXTX,
10070                              ow ==> RESERVED
10071      instr[12]    = scaled
10072      instr[11,10] = 10
10073      instr[9,5]   = rn
10074      instr[4,0]   = rt.  */
10075
10076   uint32_t  V = INSTR (26, 26);
10077   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10078   Scaling   scale = INSTR (12, 12);
10079   Extension extensionType = INSTR (15, 13);
10080
10081   /* Check for illegal extension types.  */
10082   if (uimm (extensionType, 1, 1) == 0)
10083     HALT_UNALLOC;
10084
10085   if (extensionType == UXTX || extensionType == SXTX)
10086     extensionType = NoExtension;
10087
10088   if (!V)
10089     {
10090       /* GReg operations.  */
10091       switch (dispatch)
10092         {
10093         case 0:  strb_scale_ext (cpu, scale, extensionType); return;
10094         case 1:  ldrb32_scale_ext (cpu, scale, extensionType); return;
10095         case 2:  ldrsb_scale_ext (cpu, scale, extensionType); return;
10096         case 3:  ldrsb32_scale_ext (cpu, scale, extensionType); return;
10097         case 4:  strh_scale_ext (cpu, scale, extensionType); return;
10098         case 5:  ldrh32_scale_ext (cpu, scale, extensionType); return;
10099         case 6:  ldrsh_scale_ext (cpu, scale, extensionType); return;
10100         case 7:  ldrsh32_scale_ext (cpu, scale, extensionType); return;
10101         case 8:  str32_scale_ext (cpu, scale, extensionType); return;
10102         case 9:  ldr32_scale_ext (cpu, scale, extensionType); return;
10103         case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10104         case 12: str_scale_ext (cpu, scale, extensionType); return;
10105         case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10106         case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10107
10108         default:
10109         case 11:
10110         case 15:
10111           HALT_UNALLOC;
10112         }
10113     }
10114
10115   /* FReg operations.  */
10116   switch (dispatch)
10117     {
10118     case 1: /* LDUR 8 bit FP.  */
10119       HALT_NYI;
10120     case 3:  fldrq_scale_ext (cpu, scale, extensionType); return;
10121     case 5: /* LDUR 8 bit FP.  */
10122       HALT_NYI;
10123     case 9:  fldrs_scale_ext (cpu, scale, extensionType); return;
10124     case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10125
10126     case 0:  fstrb_scale_ext (cpu, scale, extensionType); return;
10127     case 2:  fstrq_scale_ext (cpu, scale, extensionType); return;
10128     case 4:  fstrh_scale_ext (cpu, scale, extensionType); return;
10129     case 8:  fstrs_scale_ext (cpu, scale, extensionType); return;
10130     case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10131
10132     default:
10133     case 6:
10134     case 7:
10135     case 10:
10136     case 11:
10137     case 14:
10138     case 15:
10139       HALT_UNALLOC;
10140     }
10141 }
10142
10143 static void
10144 dexLoadUnsignedImmediate (sim_cpu *cpu)
10145 {
10146   /* instr[29,24] == 111_01
10147      instr[31,30] = size
10148      instr[26]    = V
10149      instr[23,22] = opc
10150      instr[21,10] = uimm12 : unsigned immediate offset
10151      instr[9,5]   = rn may be SP.
10152      instr[4,0]   = rt.  */
10153
10154   uint32_t V = INSTR (26,26);
10155   uint32_t dispatch = ( (INSTR (31, 30) << 2)
10156                        | INSTR (23, 22));
10157   uint32_t imm = INSTR (21, 10);
10158
10159   if (!V)
10160     {
10161       /* GReg operations.  */
10162       switch (dispatch)
10163         {
10164         case 0:  strb_abs (cpu, imm); return;
10165         case 1:  ldrb32_abs (cpu, imm); return;
10166         case 2:  ldrsb_abs (cpu, imm); return;
10167         case 3:  ldrsb32_abs (cpu, imm); return;
10168         case 4:  strh_abs (cpu, imm); return;
10169         case 5:  ldrh32_abs (cpu, imm); return;
10170         case 6:  ldrsh_abs (cpu, imm); return;
10171         case 7:  ldrsh32_abs (cpu, imm); return;
10172         case 8:  str32_abs (cpu, imm); return;
10173         case 9:  ldr32_abs (cpu, imm); return;
10174         case 10: ldrsw_abs (cpu, imm); return;
10175         case 12: str_abs (cpu, imm); return;
10176         case 13: ldr_abs (cpu, imm); return;
10177         case 14: prfm_abs (cpu, imm); return;
10178
10179         default:
10180         case 11:
10181         case 15:
10182           HALT_UNALLOC;
10183         }
10184     }
10185
10186   /* FReg operations.  */
10187   switch (dispatch)
10188     {
10189     case 0:  fstrb_abs (cpu, imm); return;
10190     case 4:  fstrh_abs (cpu, imm); return;
10191     case 8:  fstrs_abs (cpu, imm); return;
10192     case 12: fstrd_abs (cpu, imm); return;
10193     case 2:  fstrq_abs (cpu, imm); return;
10194
10195     case 1:  fldrb_abs (cpu, imm); return;
10196     case 5:  fldrh_abs (cpu, imm); return;
10197     case 9:  fldrs_abs (cpu, imm); return;
10198     case 13: fldrd_abs (cpu, imm); return;
10199     case 3:  fldrq_abs (cpu, imm); return;
10200
10201     default:
10202     case 6:
10203     case 7:
10204     case 10:
10205     case 11:
10206     case 14:
10207     case 15:
10208       HALT_UNALLOC;
10209     }
10210 }
10211
10212 static void
10213 dexLoadExclusive (sim_cpu *cpu)
10214 {
10215   /* assert instr[29:24] = 001000;
10216      instr[31,30] = size
10217      instr[23] = 0 if exclusive
10218      instr[22] = L : 1 if load, 0 if store
10219      instr[21] = 1 if pair
10220      instr[20,16] = Rs
10221      instr[15] = o0 : 1 if ordered
10222      instr[14,10] = Rt2
10223      instr[9,5] = Rn
10224      instr[4.0] = Rt.  */
10225
10226   switch (INSTR (22, 21))
10227     {
10228     case 2:   ldxr (cpu); return;
10229     case 0:   stxr (cpu); return;
10230     default:  HALT_NYI;
10231     }
10232 }
10233
10234 static void
10235 dexLoadOther (sim_cpu *cpu)
10236 {
10237   uint32_t dispatch;
10238
10239   /* instr[29,25] = 111_0
10240      instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10241      instr[21:11,10] is the secondary dispatch.  */
10242   if (INSTR (24, 24))
10243     {
10244       dexLoadUnsignedImmediate (cpu);
10245       return;
10246     }
10247
10248   dispatch = ( (INSTR (21, 21) << 2)
10249               | INSTR (11, 10));
10250   switch (dispatch)
10251     {
10252     case 0: dexLoadUnscaledImmediate (cpu); return;
10253     case 1: dexLoadImmediatePrePost (cpu); return;
10254     case 3: dexLoadImmediatePrePost (cpu); return;
10255     case 6: dexLoadRegisterOffset (cpu); return;
10256
10257     default:
10258     case 2:
10259     case 4:
10260     case 5:
10261     case 7:
10262       HALT_NYI;
10263     }
10264 }
10265
10266 static void
10267 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10268 {
10269   unsigned rn = INSTR (14, 10);
10270   unsigned rd = INSTR (9, 5);
10271   unsigned rm = INSTR (4, 0);
10272   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10273
10274   if ((rn == rd || rm == rd) && wb != NoWriteBack)
10275     HALT_UNALLOC; /* ??? */
10276
10277   offset <<= 2;
10278
10279   if (wb != Post)
10280     address += offset;
10281
10282   aarch64_set_mem_u32 (cpu, address,
10283                        aarch64_get_reg_u32 (cpu, rm, NO_SP));
10284   aarch64_set_mem_u32 (cpu, address + 4,
10285                        aarch64_get_reg_u32 (cpu, rn, NO_SP));
10286
10287   if (wb == Post)
10288     address += offset;
10289
10290   if (wb != NoWriteBack)
10291     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10292 }
10293
10294 static void
10295 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10296 {
10297   unsigned rn = INSTR (14, 10);
10298   unsigned rd = INSTR (9, 5);
10299   unsigned rm = INSTR (4, 0);
10300   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10301
10302   if ((rn == rd || rm == rd) && wb != NoWriteBack)
10303     HALT_UNALLOC; /* ??? */
10304
10305   offset <<= 3;
10306
10307   if (wb != Post)
10308     address += offset;
10309
10310   aarch64_set_mem_u64 (cpu, address,
10311                        aarch64_get_reg_u64 (cpu, rm, SP_OK));
10312   aarch64_set_mem_u64 (cpu, address + 8,
10313                        aarch64_get_reg_u64 (cpu, rn, SP_OK));
10314
10315   if (wb == Post)
10316     address += offset;
10317
10318   if (wb != NoWriteBack)
10319     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10320 }
10321
10322 static void
10323 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10324 {
10325   unsigned rn = INSTR (14, 10);
10326   unsigned rd = INSTR (9, 5);
10327   unsigned rm = INSTR (4, 0);
10328   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10329
10330   /* treat this as unalloc to make sure we don't do it.  */
10331   if (rn == rm)
10332     HALT_UNALLOC;
10333
10334   offset <<= 2;
10335
10336   if (wb != Post)
10337     address += offset;
10338
10339   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10340   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10341
10342   if (wb == Post)
10343     address += offset;
10344
10345   if (wb != NoWriteBack)
10346     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10347 }
10348
10349 static void
10350 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10351 {
10352   unsigned rn = INSTR (14, 10);
10353   unsigned rd = INSTR (9, 5);
10354   unsigned rm = INSTR (4, 0);
10355   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10356
10357   /* Treat this as unalloc to make sure we don't do it.  */
10358   if (rn == rm)
10359     HALT_UNALLOC;
10360
10361   offset <<= 2;
10362
10363   if (wb != Post)
10364     address += offset;
10365
10366   aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10367   aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10368
10369   if (wb == Post)
10370     address += offset;
10371
10372   if (wb != NoWriteBack)
10373     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10374 }
10375
10376 static void
10377 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10378 {
10379   unsigned rn = INSTR (14, 10);
10380   unsigned rd = INSTR (9, 5);
10381   unsigned rm = INSTR (4, 0);
10382   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10383
10384   /* Treat this as unalloc to make sure we don't do it.  */
10385   if (rn == rm)
10386     HALT_UNALLOC;
10387
10388   offset <<= 3;
10389
10390   if (wb != Post)
10391     address += offset;
10392
10393   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10394   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10395
10396   if (wb == Post)
10397     address += offset;
10398
10399   if (wb != NoWriteBack)
10400     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10401 }
10402
10403 static void
10404 dex_load_store_pair_gr (sim_cpu *cpu)
10405 {
10406   /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10407      instr[29,25] = instruction encoding: 101_0
10408      instr[26]    = V : 1 if fp 0 if gp
10409      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10410      instr[22]    = load/store (1=> load)
10411      instr[21,15] = signed, scaled, offset
10412      instr[14,10] = Rn
10413      instr[ 9, 5] = Rd
10414      instr[ 4, 0] = Rm.  */
10415
10416   uint32_t dispatch = ((INSTR (31, 30) << 3)
10417                         | INSTR (24, 22));
10418   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10419
10420   switch (dispatch)
10421     {
10422     case 2: store_pair_u32 (cpu, offset, Post); return;
10423     case 3: load_pair_u32  (cpu, offset, Post); return;
10424     case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10425     case 5: load_pair_u32  (cpu, offset, NoWriteBack); return;
10426     case 6: store_pair_u32 (cpu, offset, Pre); return;
10427     case 7: load_pair_u32  (cpu, offset, Pre); return;
10428
10429     case 11: load_pair_s32  (cpu, offset, Post); return;
10430     case 13: load_pair_s32  (cpu, offset, NoWriteBack); return;
10431     case 15: load_pair_s32  (cpu, offset, Pre); return;
10432
10433     case 18: store_pair_u64 (cpu, offset, Post); return;
10434     case 19: load_pair_u64  (cpu, offset, Post); return;
10435     case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
10436     case 21: load_pair_u64  (cpu, offset, NoWriteBack); return;
10437     case 22: store_pair_u64 (cpu, offset, Pre); return;
10438     case 23: load_pair_u64  (cpu, offset, Pre); return;
10439
10440     default:
10441       HALT_UNALLOC;
10442     }
10443 }
10444
10445 static void
10446 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10447 {
10448   unsigned rn = INSTR (14, 10);
10449   unsigned rd = INSTR (9, 5);
10450   unsigned rm = INSTR (4, 0);
10451   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10452
10453   offset <<= 2;
10454
10455   if (wb != Post)
10456     address += offset;
10457
10458   aarch64_set_mem_u32 (cpu, address,     aarch64_get_vec_u32 (cpu, rm, 0));
10459   aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
10460
10461   if (wb == Post)
10462     address += offset;
10463
10464   if (wb != NoWriteBack)
10465     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10466 }
10467
10468 static void
10469 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10470 {
10471   unsigned rn = INSTR (14, 10);
10472   unsigned rd = INSTR (9, 5);
10473   unsigned rm = INSTR (4, 0);
10474   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10475
10476   offset <<= 3;
10477
10478   if (wb != Post)
10479     address += offset;
10480
10481   aarch64_set_mem_u64 (cpu, address,     aarch64_get_vec_u64 (cpu, rm, 0));
10482   aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
10483
10484   if (wb == Post)
10485     address += offset;
10486
10487   if (wb != NoWriteBack)
10488     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10489 }
10490
10491 static void
10492 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10493 {
10494   FRegister a;
10495   unsigned rn = INSTR (14, 10);
10496   unsigned rd = INSTR (9, 5);
10497   unsigned rm = INSTR (4, 0);
10498   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10499
10500   offset <<= 4;
10501
10502   if (wb != Post)
10503     address += offset;
10504
10505   aarch64_get_FP_long_double (cpu, rm, & a);
10506   aarch64_set_mem_long_double (cpu, address, a);
10507   aarch64_get_FP_long_double (cpu, rn, & a);
10508   aarch64_set_mem_long_double (cpu, address + 16, a);
10509
10510   if (wb == Post)
10511     address += offset;
10512
10513   if (wb != NoWriteBack)
10514     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10515 }
10516
10517 static void
10518 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10519 {
10520   unsigned rn = INSTR (14, 10);
10521   unsigned rd = INSTR (9, 5);
10522   unsigned rm = INSTR (4, 0);
10523   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10524
10525   if (rm == rn)
10526     HALT_UNALLOC;
10527
10528   offset <<= 2;
10529
10530   if (wb != Post)
10531     address += offset;
10532
10533   aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
10534   aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
10535
10536   if (wb == Post)
10537     address += offset;
10538
10539   if (wb != NoWriteBack)
10540     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10541 }
10542
10543 static void
10544 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10545 {
10546   unsigned rn = INSTR (14, 10);
10547   unsigned rd = INSTR (9, 5);
10548   unsigned rm = INSTR (4, 0);
10549   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10550
10551   if (rm == rn)
10552     HALT_UNALLOC;
10553
10554   offset <<= 3;
10555
10556   if (wb != Post)
10557     address += offset;
10558
10559   aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
10560   aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
10561
10562   if (wb == Post)
10563     address += offset;
10564
10565   if (wb != NoWriteBack)
10566     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10567 }
10568
10569 static void
10570 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10571 {
10572   FRegister a;
10573   unsigned rn = INSTR (14, 10);
10574   unsigned rd = INSTR (9, 5);
10575   unsigned rm = INSTR (4, 0);
10576   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10577
10578   if (rm == rn)
10579     HALT_UNALLOC;
10580
10581   offset <<= 4;
10582
10583   if (wb != Post)
10584     address += offset;
10585
10586   aarch64_get_mem_long_double (cpu, address, & a);
10587   aarch64_set_FP_long_double (cpu, rm, a);
10588   aarch64_get_mem_long_double (cpu, address + 16, & a);
10589   aarch64_set_FP_long_double (cpu, rn, a);
10590
10591   if (wb == Post)
10592     address += offset;
10593
10594   if (wb != NoWriteBack)
10595     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10596 }
10597
10598 static void
10599 dex_load_store_pair_fp (sim_cpu *cpu)
10600 {
10601   /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
10602      instr[29,25] = instruction encoding
10603      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10604      instr[22]    = load/store (1=> load)
10605      instr[21,15] = signed, scaled, offset
10606      instr[14,10] = Rn
10607      instr[ 9, 5] = Rd
10608      instr[ 4, 0] = Rm  */
10609
10610   uint32_t dispatch = ((INSTR (31, 30) << 3)
10611                         | INSTR (24, 22));
10612   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10613
10614   switch (dispatch)
10615     {
10616     case 2: store_pair_float (cpu, offset, Post); return;
10617     case 3: load_pair_float  (cpu, offset, Post); return;
10618     case 4: store_pair_float (cpu, offset, NoWriteBack); return;
10619     case 5: load_pair_float  (cpu, offset, NoWriteBack); return;
10620     case 6: store_pair_float (cpu, offset, Pre); return;
10621     case 7: load_pair_float  (cpu, offset, Pre); return;
10622
10623     case 10: store_pair_double (cpu, offset, Post); return;
10624     case 11: load_pair_double  (cpu, offset, Post); return;
10625     case 12: store_pair_double (cpu, offset, NoWriteBack); return;
10626     case 13: load_pair_double  (cpu, offset, NoWriteBack); return;
10627     case 14: store_pair_double (cpu, offset, Pre); return;
10628     case 15: load_pair_double  (cpu, offset, Pre); return;
10629
10630     case 18: store_pair_long_double (cpu, offset, Post); return;
10631     case 19: load_pair_long_double  (cpu, offset, Post); return;
10632     case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
10633     case 21: load_pair_long_double  (cpu, offset, NoWriteBack); return;
10634     case 22: store_pair_long_double (cpu, offset, Pre); return;
10635     case 23: load_pair_long_double  (cpu, offset, Pre); return;
10636
10637     default:
10638       HALT_UNALLOC;
10639     }
10640 }
10641
10642 static inline unsigned
10643 vec_reg (unsigned v, unsigned o)
10644 {
10645   return (v + o) & 0x3F;
10646 }
10647
10648 /* Load multiple N-element structures to N consecutive registers.  */
10649 static void
10650 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
10651 {
10652   int      all  = INSTR (30, 30);
10653   unsigned size = INSTR (11, 10);
10654   unsigned vd   = INSTR (4, 0);
10655   unsigned i;
10656
10657   switch (size)
10658     {
10659     case 0: /* 8-bit operations.  */
10660       if (all)
10661         for (i = 0; i < (16 * N); i++)
10662           aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
10663                               aarch64_get_mem_u8 (cpu, address + i));
10664       else
10665         for (i = 0; i < (8 * N); i++)
10666           aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
10667                               aarch64_get_mem_u8 (cpu, address + i));
10668       return;
10669
10670     case 1: /* 16-bit operations.  */
10671       if (all)
10672         for (i = 0; i < (8 * N); i++)
10673           aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
10674                                aarch64_get_mem_u16 (cpu, address + i * 2));
10675       else
10676         for (i = 0; i < (4 * N); i++)
10677           aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
10678                                aarch64_get_mem_u16 (cpu, address + i * 2));
10679       return;
10680
10681     case 2: /* 32-bit operations.  */
10682       if (all)
10683         for (i = 0; i < (4 * N); i++)
10684           aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
10685                                aarch64_get_mem_u32 (cpu, address + i * 4));
10686       else
10687         for (i = 0; i < (2 * N); i++)
10688           aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
10689                                aarch64_get_mem_u32 (cpu, address + i * 4));
10690       return;
10691
10692     case 3: /* 64-bit operations.  */
10693       if (all)
10694         for (i = 0; i < (2 * N); i++)
10695           aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
10696                                aarch64_get_mem_u64 (cpu, address + i * 8));
10697       else
10698         for (i = 0; i < N; i++)
10699           aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
10700                                aarch64_get_mem_u64 (cpu, address + i * 8));
10701       return;
10702     }
10703 }
10704
10705 /* LD4: load multiple 4-element to four consecutive registers.  */
10706 static void
10707 LD4 (sim_cpu *cpu, uint64_t address)
10708 {
10709   vec_load (cpu, address, 4);
10710 }
10711
10712 /* LD3: load multiple 3-element structures to three consecutive registers.  */
10713 static void
10714 LD3 (sim_cpu *cpu, uint64_t address)
10715 {
10716   vec_load (cpu, address, 3);
10717 }
10718
10719 /* LD2: load multiple 2-element structures to two consecutive registers.  */
10720 static void
10721 LD2 (sim_cpu *cpu, uint64_t address)
10722 {
10723   vec_load (cpu, address, 2);
10724 }
10725
10726 /* Load multiple 1-element structures into one register.  */
10727 static void
10728 LD1_1 (sim_cpu *cpu, uint64_t address)
10729 {
10730   int      all  = INSTR (30, 30);
10731   unsigned size = INSTR (11, 10);
10732   unsigned vd   = INSTR (4, 0);
10733   unsigned i;
10734
10735   switch (size)
10736     {
10737     case 0:
10738       /* LD1 {Vd.16b}, addr, #16 */
10739       /* LD1 {Vd.8b}, addr, #8 */
10740       for (i = 0; i < (all ? 16 : 8); i++)
10741         aarch64_set_vec_u8 (cpu, vd, i,
10742                             aarch64_get_mem_u8 (cpu, address + i));
10743       return;
10744
10745     case 1:
10746       /* LD1 {Vd.8h}, addr, #16 */
10747       /* LD1 {Vd.4h}, addr, #8 */
10748       for (i = 0; i < (all ? 8 : 4); i++)
10749         aarch64_set_vec_u16 (cpu, vd, i,
10750                              aarch64_get_mem_u16 (cpu, address + i * 2));
10751       return;
10752
10753     case 2:
10754       /* LD1 {Vd.4s}, addr, #16 */
10755       /* LD1 {Vd.2s}, addr, #8 */
10756       for (i = 0; i < (all ? 4 : 2); i++)
10757         aarch64_set_vec_u32 (cpu, vd, i,
10758                              aarch64_get_mem_u32 (cpu, address + i * 4));
10759       return;
10760
10761     case 3:
10762       /* LD1 {Vd.2d}, addr, #16 */
10763       /* LD1 {Vd.1d}, addr, #8 */
10764       for (i = 0; i < (all ? 2 : 1); i++)
10765         aarch64_set_vec_u64 (cpu, vd, i,
10766                              aarch64_get_mem_u64 (cpu, address + i * 8));
10767       return;
10768     }
10769 }
10770
10771 /* Load multiple 1-element structures into two registers.  */
10772 static void
10773 LD1_2 (sim_cpu *cpu, uint64_t address)
10774 {
10775   /* FIXME: This algorithm is *exactly* the same as the LD2 version.
10776      So why have two different instructions ?  There must be something
10777      wrong somewhere.  */
10778   vec_load (cpu, address, 2);
10779 }
10780
10781 /* Load multiple 1-element structures into three registers.  */
10782 static void
10783 LD1_3 (sim_cpu *cpu, uint64_t address)
10784 {
10785   /* FIXME: This algorithm is *exactly* the same as the LD3 version.
10786      So why have two different instructions ?  There must be something
10787      wrong somewhere.  */
10788   vec_load (cpu, address, 3);
10789 }
10790
10791 /* Load multiple 1-element structures into four registers.  */
10792 static void
10793 LD1_4 (sim_cpu *cpu, uint64_t address)
10794 {
10795   /* FIXME: This algorithm is *exactly* the same as the LD4 version.
10796      So why have two different instructions ?  There must be something
10797      wrong somewhere.  */
10798   vec_load (cpu, address, 4);
10799 }
10800
10801 /* Store multiple N-element structures to N consecutive registers.  */
10802 static void
10803 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
10804 {
10805   int      all  = INSTR (30, 30);
10806   unsigned size = INSTR (11, 10);
10807   unsigned vd   = INSTR (4, 0);
10808   unsigned i;
10809
10810   switch (size)
10811     {
10812     case 0: /* 8-bit operations.  */
10813       if (all)
10814         for (i = 0; i < (16 * N); i++)
10815           aarch64_set_mem_u8
10816             (cpu, address + i,
10817              aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
10818       else
10819         for (i = 0; i < (8 * N); i++)
10820           aarch64_set_mem_u8
10821             (cpu, address + i,
10822              aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
10823       return;
10824
10825     case 1: /* 16-bit operations.  */
10826       if (all)
10827         for (i = 0; i < (8 * N); i++)
10828           aarch64_set_mem_u16
10829             (cpu, address + i * 2,
10830              aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
10831       else
10832         for (i = 0; i < (4 * N); i++)
10833           aarch64_set_mem_u16
10834             (cpu, address + i * 2,
10835              aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
10836       return;
10837
10838     case 2: /* 32-bit operations.  */
10839       if (all)
10840         for (i = 0; i < (4 * N); i++)
10841           aarch64_set_mem_u32
10842             (cpu, address + i * 4,
10843              aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
10844       else
10845         for (i = 0; i < (2 * N); i++)
10846           aarch64_set_mem_u32
10847             (cpu, address + i * 4,
10848              aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
10849       return;
10850
10851     case 3: /* 64-bit operations.  */
10852       if (all)
10853         for (i = 0; i < (2 * N); i++)
10854           aarch64_set_mem_u64
10855             (cpu, address + i * 8,
10856              aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
10857       else
10858         for (i = 0; i < N; i++)
10859           aarch64_set_mem_u64
10860             (cpu, address + i * 8,
10861              aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
10862       return;
10863     }
10864 }
10865
10866 /* Store multiple 4-element structure to four consecutive registers.  */
10867 static void
10868 ST4 (sim_cpu *cpu, uint64_t address)
10869 {
10870   vec_store (cpu, address, 4);
10871 }
10872
10873 /* Store multiple 3-element structures to three consecutive registers.  */
10874 static void
10875 ST3 (sim_cpu *cpu, uint64_t address)
10876 {
10877   vec_store (cpu, address, 3);
10878 }
10879
10880 /* Store multiple 2-element structures to two consecutive registers.  */
10881 static void
10882 ST2 (sim_cpu *cpu, uint64_t address)
10883 {
10884   vec_store (cpu, address, 2);
10885 }
10886
10887 /* Store multiple 1-element structures into one register.  */
10888 static void
10889 ST1_1 (sim_cpu *cpu, uint64_t address)
10890 {
10891   int      all  = INSTR (30, 30);
10892   unsigned size = INSTR (11, 10);
10893   unsigned vd   = INSTR (4, 0);
10894   unsigned i;
10895
10896   switch (size)
10897     {
10898     case 0:
10899       for (i = 0; i < (all ? 16 : 8); i++)
10900         aarch64_set_mem_u8 (cpu, address + i,
10901                             aarch64_get_vec_u8 (cpu, vd, i));
10902       return;
10903
10904     case 1:
10905       for (i = 0; i < (all ? 8 : 4); i++)
10906         aarch64_set_mem_u16 (cpu, address + i * 2,
10907                              aarch64_get_vec_u16 (cpu, vd, i));
10908       return;
10909
10910     case 2:
10911       for (i = 0; i < (all ? 4 : 2); i++)
10912         aarch64_set_mem_u32 (cpu, address + i * 4,
10913                              aarch64_get_vec_u32 (cpu, vd, i));
10914       return;
10915
10916     case 3:
10917       for (i = 0; i < (all ? 2 : 1); i++)
10918         aarch64_set_mem_u64 (cpu, address + i * 8,
10919                              aarch64_get_vec_u64 (cpu, vd, i));
10920       return;
10921     }
10922 }
10923
10924 /* Store multiple 1-element structures into two registers.  */
10925 static void
10926 ST1_2 (sim_cpu *cpu, uint64_t address)
10927 {
10928   /* FIXME: This algorithm is *exactly* the same as the ST2 version.
10929      So why have two different instructions ?  There must be
10930      something wrong somewhere.  */
10931   vec_store (cpu, address, 2);
10932 }
10933
10934 /* Store multiple 1-element structures into three registers.  */
10935 static void
10936 ST1_3 (sim_cpu *cpu, uint64_t address)
10937 {
10938   /* FIXME: This algorithm is *exactly* the same as the ST3 version.
10939      So why have two different instructions ?  There must be
10940      something wrong somewhere.  */
10941   vec_store (cpu, address, 3);
10942 }
10943
10944 /* Store multiple 1-element structures into four registers.  */
10945 static void
10946 ST1_4 (sim_cpu *cpu, uint64_t address)
10947 {
10948   /* FIXME: This algorithm is *exactly* the same as the ST4 version.
10949      So why have two different instructions ?  There must be
10950      something wrong somewhere.  */
10951   vec_store (cpu, address, 4);
10952 }
10953
10954 static void
10955 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
10956 {
10957   /* instr[31]    = 0
10958      instr[30]    = element selector 0=>half, 1=>all elements
10959      instr[29,24] = 00 1101
10960      instr[23]    = 0=>simple, 1=>post
10961      instr[22]    = 1
10962      instr[21]    = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
10963      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
10964                       11111 (immediate post inc)
10965      instr[15,14] = 11
10966      instr[13]    = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
10967      instr[12]    = 0
10968      instr[11,10] = element size 00=> byte(b), 01=> half(h),
10969                                  10=> word(s), 11=> double(d)
10970      instr[9,5]   = address
10971      instr[4,0]   = Vd  */
10972
10973   unsigned full = INSTR (30, 30);
10974   unsigned vd = INSTR (4, 0);
10975   unsigned size = INSTR (11, 10);
10976   int i;
10977
10978   NYI_assert (29, 24, 0x0D);
10979   NYI_assert (22, 22, 1);
10980   NYI_assert (15, 14, 3);
10981   NYI_assert (12, 12, 0);
10982
10983   switch ((INSTR (13, 13) << 1)
10984           | INSTR (21, 21))
10985     {
10986     case 0: /* LD1R.  */
10987       switch (size)
10988         {
10989         case 0:
10990           {
10991             uint8_t val = aarch64_get_mem_u8 (cpu, address);
10992             for (i = 0; i < (full ? 16 : 8); i++)
10993               aarch64_set_vec_u8 (cpu, vd, i, val);
10994             break;
10995           }
10996
10997         case 1:
10998           {
10999             uint16_t val = aarch64_get_mem_u16 (cpu, address);
11000             for (i = 0; i < (full ? 8 : 4); i++)
11001               aarch64_set_vec_u16 (cpu, vd, i, val);
11002             break;
11003           }
11004
11005         case 2:
11006           {
11007             uint32_t val = aarch64_get_mem_u32 (cpu, address);
11008             for (i = 0; i < (full ? 4 : 2); i++)
11009               aarch64_set_vec_u32 (cpu, vd, i, val);
11010             break;
11011           }
11012
11013         case 3:
11014           {
11015             uint64_t val = aarch64_get_mem_u64 (cpu, address);
11016             for (i = 0; i < (full ? 2 : 1); i++)
11017               aarch64_set_vec_u64 (cpu, vd, i, val);
11018             break;
11019           }
11020
11021         default:
11022           HALT_UNALLOC;
11023         }
11024       break;
11025
11026     case 1: /* LD2R.  */
11027       switch (size)
11028         {
11029         case 0:
11030           {
11031             uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11032             uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11033
11034             for (i = 0; i < (full ? 16 : 8); i++)
11035               {
11036                 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11037                 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11038               }
11039             break;
11040           }
11041
11042         case 1:
11043           {
11044             uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11045             uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11046
11047             for (i = 0; i < (full ? 8 : 4); i++)
11048               {
11049                 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11050                 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11051               }
11052             break;
11053           }
11054
11055         case 2:
11056           {
11057             uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11058             uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11059
11060             for (i = 0; i < (full ? 4 : 2); i++)
11061               {
11062                 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11063                 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11064               }
11065             break;
11066           }
11067
11068         case 3:
11069           {
11070             uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11071             uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11072
11073             for (i = 0; i < (full ? 2 : 1); i++)
11074               {
11075                 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11076                 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11077               }
11078             break;
11079           }
11080
11081         default:
11082           HALT_UNALLOC;
11083         }
11084       break;
11085
11086     case 2: /* LD3R.  */
11087       switch (size)
11088         {
11089         case 0:
11090           {
11091             uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11092             uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11093             uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11094
11095             for (i = 0; i < (full ? 16 : 8); i++)
11096               {
11097                 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11098                 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11099                 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11100               }
11101           }
11102           break;
11103
11104         case 1:
11105           {
11106             uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11107             uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11108             uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11109
11110             for (i = 0; i < (full ? 8 : 4); i++)
11111               {
11112                 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11113                 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11114                 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11115               }
11116           }
11117           break;
11118
11119         case 2:
11120           {
11121             uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11122             uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11123             uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11124
11125             for (i = 0; i < (full ? 4 : 2); i++)
11126               {
11127                 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11128                 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11129                 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11130               }
11131           }
11132           break;
11133
11134         case 3:
11135           {
11136             uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11137             uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11138             uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11139
11140             for (i = 0; i < (full ? 2 : 1); i++)
11141               {
11142                 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11143                 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11144                 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11145               }
11146           }
11147           break;
11148
11149         default:
11150           HALT_UNALLOC;
11151         }
11152       break;
11153
11154     case 3: /* LD4R.  */
11155       switch (size)
11156         {
11157         case 0:
11158           {
11159             uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11160             uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11161             uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11162             uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11163
11164             for (i = 0; i < (full ? 16 : 8); i++)
11165               {
11166                 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11167                 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11168                 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11169                 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11170               }
11171           }
11172           break;
11173
11174         case 1:
11175           {
11176             uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11177             uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11178             uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11179             uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11180
11181             for (i = 0; i < (full ? 8 : 4); i++)
11182               {
11183                 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11184                 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11185                 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11186                 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11187               }
11188           }
11189           break;
11190
11191         case 2:
11192           {
11193             uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11194             uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11195             uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11196             uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11197
11198             for (i = 0; i < (full ? 4 : 2); i++)
11199               {
11200                 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11201                 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11202                 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11203                 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11204               }
11205           }
11206           break;
11207
11208         case 3:
11209           {
11210             uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11211             uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11212             uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11213             uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11214
11215             for (i = 0; i < (full ? 2 : 1); i++)
11216               {
11217                 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11218                 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11219                 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11220                 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11221               }
11222           }
11223           break;
11224
11225         default:
11226           HALT_UNALLOC;
11227         }
11228       break;
11229
11230     default:
11231       HALT_UNALLOC;
11232     }
11233 }
11234
11235 static void
11236 do_vec_load_store (sim_cpu *cpu)
11237 {
11238   /* {LD|ST}<N>   {Vd..Vd+N}, vaddr
11239
11240      instr[31]    = 0
11241      instr[30]    = element selector 0=>half, 1=>all elements
11242      instr[29,25] = 00110
11243      instr[24]    = ?
11244      instr[23]    = 0=>simple, 1=>post
11245      instr[22]    = 0=>store, 1=>load
11246      instr[21]    = 0 (LDn) / small(0)-large(1) selector (LDnR)
11247      instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11248                     11111 (immediate post inc)
11249      instr[15,12] = elements and destinations.  eg for load:
11250                      0000=>LD4 => load multiple 4-element to
11251                      four consecutive registers
11252                      0100=>LD3 => load multiple 3-element to
11253                      three consecutive registers
11254                      1000=>LD2 => load multiple 2-element to
11255                      two consecutive registers
11256                      0010=>LD1 => load multiple 1-element to
11257                      four consecutive registers
11258                      0110=>LD1 => load multiple 1-element to
11259                      three consecutive registers
11260                      1010=>LD1 => load multiple 1-element to
11261                      two consecutive registers
11262                      0111=>LD1 => load multiple 1-element to
11263                      one register
11264                      1100=>LDR1,LDR2
11265                      1110=>LDR3,LDR4
11266      instr[11,10] = element size 00=> byte(b), 01=> half(h),
11267                                  10=> word(s), 11=> double(d)
11268      instr[9,5]   = Vn, can be SP
11269      instr[4,0]   = Vd  */
11270
11271   int post;
11272   int load;
11273   unsigned vn;
11274   uint64_t address;
11275   int type;
11276
11277   if (INSTR (31, 31) != 0
11278       || INSTR (29, 25) != 0x06)
11279     HALT_NYI;
11280
11281   type = INSTR (15, 12);
11282   if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11283     HALT_NYI;
11284
11285   post = INSTR (23, 23);
11286   load = INSTR (22, 22);
11287   vn = INSTR (9, 5);
11288   address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11289
11290   if (post)
11291     {
11292       unsigned vm = INSTR (20, 16);
11293
11294       if (vm == R31)
11295         {
11296           unsigned sizeof_operation;
11297
11298           switch (type)
11299             {
11300             case 0: sizeof_operation = 32; break;
11301             case 4: sizeof_operation = 24; break;
11302             case 8: sizeof_operation = 16; break;
11303
11304             case 0xC:
11305               sizeof_operation = INSTR (21, 21) ? 2 : 1;
11306               sizeof_operation <<= INSTR (11, 10);
11307               break;
11308
11309             case 0xE:
11310               sizeof_operation = INSTR (21, 21) ? 8 : 4;
11311               sizeof_operation <<= INSTR (11, 10);
11312               break;
11313
11314             case 7:
11315               /* One register, immediate offset variant.  */
11316               sizeof_operation = 8;
11317               break;
11318
11319             case 10:
11320               /* Two registers, immediate offset variant.  */
11321               sizeof_operation = 16;
11322               break;
11323
11324             case 6:
11325               /* Three registers, immediate offset variant.  */
11326               sizeof_operation = 24;
11327               break;
11328
11329             case 2:
11330               /* Four registers, immediate offset variant.  */
11331               sizeof_operation = 32;
11332               break;
11333
11334             default:
11335               HALT_UNALLOC;
11336             }
11337
11338           if (INSTR (30, 30))
11339             sizeof_operation *= 2;
11340
11341           aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11342         }
11343       else
11344         aarch64_set_reg_u64 (cpu, vn, SP_OK,
11345                              address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11346     }
11347   else
11348     {
11349       NYI_assert (20, 16, 0);
11350     }
11351
11352   if (load)
11353     {
11354       switch (type)
11355         {
11356         case 0:  LD4 (cpu, address); return;
11357         case 4:  LD3 (cpu, address); return;
11358         case 8:  LD2 (cpu, address); return;
11359         case 2:  LD1_4 (cpu, address); return;
11360         case 6:  LD1_3 (cpu, address); return;
11361         case 10: LD1_2 (cpu, address); return;
11362         case 7:  LD1_1 (cpu, address); return;
11363
11364         case 0xE:
11365         case 0xC: do_vec_LDnR (cpu, address); return;
11366
11367         default:
11368           HALT_NYI;
11369         }
11370     }
11371
11372   /* Stores.  */
11373   switch (type)
11374     {
11375     case 0:  ST4 (cpu, address); return;
11376     case 4:  ST3 (cpu, address); return;
11377     case 8:  ST2 (cpu, address); return;
11378     case 2:  ST1_4 (cpu, address); return;
11379     case 6:  ST1_3 (cpu, address); return;
11380     case 10: ST1_2 (cpu, address); return;
11381     case 7:  ST1_1 (cpu, address); return;
11382     default:
11383       HALT_NYI;
11384     }
11385 }
11386
11387 static void
11388 dexLdSt (sim_cpu *cpu)
11389 {
11390   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11391      assert  group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11392              group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11393      bits [29,28:26] of a LS are the secondary dispatch vector.  */
11394   uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11395
11396   switch (group2)
11397     {
11398     case LS_EXCL_000:
11399       dexLoadExclusive (cpu); return;
11400
11401     case LS_LIT_010:
11402     case LS_LIT_011:
11403       dexLoadLiteral (cpu); return;
11404
11405     case LS_OTHER_110:
11406     case LS_OTHER_111:
11407       dexLoadOther (cpu); return;
11408
11409     case LS_ADVSIMD_001:
11410       do_vec_load_store (cpu); return;
11411
11412     case LS_PAIR_100:
11413       dex_load_store_pair_gr (cpu); return;
11414
11415     case LS_PAIR_101:
11416       dex_load_store_pair_fp (cpu); return;
11417
11418     default:
11419       /* Should never reach here.  */
11420       HALT_NYI;
11421     }
11422 }
11423
11424 /* Specific decode and execute for group Data Processing Register.  */
11425
11426 static void
11427 dexLogicalShiftedRegister (sim_cpu *cpu)
11428 {
11429   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
11430      instr[30,29] = op
11431      instr[28:24] = 01010
11432      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11433      instr[21]    = N
11434      instr[20,16] = Rm
11435      instr[15,10] = count : must be 0xxxxx for 32 bit
11436      instr[9,5]   = Rn
11437      instr[4,0]   = Rd  */
11438
11439   uint32_t size      = INSTR (31, 31);
11440   Shift    shiftType = INSTR (23, 22);
11441   uint32_t count     = INSTR (15, 10);
11442
11443   /* 32 bit operations must have count[5] = 0.
11444      or else we have an UNALLOC.  */
11445   if (size == 0 && uimm (count, 5, 5))
11446     HALT_UNALLOC;
11447
11448   /* Dispatch on size:op:N.  */
11449   switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
11450     {
11451     case 0: and32_shift  (cpu, shiftType, count); return;
11452     case 1: bic32_shift  (cpu, shiftType, count); return;
11453     case 2: orr32_shift  (cpu, shiftType, count); return;
11454     case 3: orn32_shift  (cpu, shiftType, count); return;
11455     case 4: eor32_shift  (cpu, shiftType, count); return;
11456     case 5: eon32_shift  (cpu, shiftType, count); return;
11457     case 6: ands32_shift (cpu, shiftType, count); return;
11458     case 7: bics32_shift (cpu, shiftType, count); return;
11459     case 8: and64_shift  (cpu, shiftType, count); return;
11460     case 9: bic64_shift  (cpu, shiftType, count); return;
11461     case 10:orr64_shift  (cpu, shiftType, count); return;
11462     case 11:orn64_shift  (cpu, shiftType, count); return;
11463     case 12:eor64_shift  (cpu, shiftType, count); return;
11464     case 13:eon64_shift  (cpu, shiftType, count); return;
11465     case 14:ands64_shift (cpu, shiftType, count); return;
11466     case 15:bics64_shift (cpu, shiftType, count); return;
11467     }
11468 }
11469
11470 /* 32 bit conditional select.  */
11471 static void
11472 csel32 (sim_cpu *cpu, CondCode cc)
11473 {
11474   unsigned rm = INSTR (20, 16);
11475   unsigned rn = INSTR (9, 5);
11476   unsigned rd = INSTR (4, 0);
11477
11478   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11479                        testConditionCode (cpu, cc)
11480                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11481                        : aarch64_get_reg_u32 (cpu, rm, NO_SP));
11482 }
11483
11484 /* 64 bit conditional select.  */
11485 static void
11486 csel64 (sim_cpu *cpu, CondCode cc)
11487 {
11488   unsigned rm = INSTR (20, 16);
11489   unsigned rn = INSTR (9, 5);
11490   unsigned rd = INSTR (4, 0);
11491
11492   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11493                        testConditionCode (cpu, cc)
11494                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11495                        : aarch64_get_reg_u64 (cpu, rm, NO_SP));
11496 }
11497
11498 /* 32 bit conditional increment.  */
11499 static void
11500 csinc32 (sim_cpu *cpu, CondCode cc)
11501 {
11502   unsigned rm = INSTR (20, 16);
11503   unsigned rn = INSTR (9, 5);
11504   unsigned rd = INSTR (4, 0);
11505
11506   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11507                        testConditionCode (cpu, cc)
11508                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11509                        : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
11510 }
11511
11512 /* 64 bit conditional increment.  */
11513 static void
11514 csinc64 (sim_cpu *cpu, CondCode cc)
11515 {
11516   unsigned rm = INSTR (20, 16);
11517   unsigned rn = INSTR (9, 5);
11518   unsigned rd = INSTR (4, 0);
11519
11520   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11521                        testConditionCode (cpu, cc)
11522                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11523                        : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
11524 }
11525
11526 /* 32 bit conditional invert.  */
11527 static void
11528 csinv32 (sim_cpu *cpu, CondCode cc)
11529 {
11530   unsigned rm = INSTR (20, 16);
11531   unsigned rn = INSTR (9, 5);
11532   unsigned rd = INSTR (4, 0);
11533
11534   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11535                        testConditionCode (cpu, cc)
11536                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11537                        : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
11538 }
11539
11540 /* 64 bit conditional invert.  */
11541 static void
11542 csinv64 (sim_cpu *cpu, CondCode cc)
11543 {
11544   unsigned rm = INSTR (20, 16);
11545   unsigned rn = INSTR (9, 5);
11546   unsigned rd = INSTR (4, 0);
11547
11548   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11549                        testConditionCode (cpu, cc)
11550                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11551                        : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
11552 }
11553
11554 /* 32 bit conditional negate.  */
11555 static void
11556 csneg32 (sim_cpu *cpu, CondCode cc)
11557 {
11558   unsigned rm = INSTR (20, 16);
11559   unsigned rn = INSTR (9, 5);
11560   unsigned rd = INSTR (4, 0);
11561
11562   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11563                        testConditionCode (cpu, cc)
11564                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11565                        : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
11566 }
11567
11568 /* 64 bit conditional negate.  */
11569 static void
11570 csneg64 (sim_cpu *cpu, CondCode cc)
11571 {
11572   unsigned rm = INSTR (20, 16);
11573   unsigned rn = INSTR (9, 5);
11574   unsigned rd = INSTR (4, 0);
11575
11576   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11577                        testConditionCode (cpu, cc)
11578                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11579                        : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
11580 }
11581
11582 static void
11583 dexCondSelect (sim_cpu *cpu)
11584 {
11585   /* instr[28,21] = 11011011
11586      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
11587      instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
11588                             100 ==> CSINV, 101 ==> CSNEG,
11589                             _1_ ==> UNALLOC
11590      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
11591      instr[15,12] = cond
11592      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC  */
11593
11594   CondCode cc = INSTR (15, 12);
11595   uint32_t S = INSTR (29, 29);
11596   uint32_t op2 = INSTR (11, 10);
11597
11598   if (S == 1)
11599     HALT_UNALLOC;
11600
11601   if (op2 & 0x2)
11602     HALT_UNALLOC;
11603
11604   switch ((INSTR (31, 30) << 1) | op2)
11605     {
11606     case 0: csel32  (cpu, cc); return;
11607     case 1: csinc32 (cpu, cc); return;
11608     case 2: csinv32 (cpu, cc); return;
11609     case 3: csneg32 (cpu, cc); return;
11610     case 4: csel64  (cpu, cc); return;
11611     case 5: csinc64 (cpu, cc); return;
11612     case 6: csinv64 (cpu, cc); return;
11613     case 7: csneg64 (cpu, cc); return;
11614     }
11615 }
11616
11617 /* Some helpers for counting leading 1 or 0 bits.  */
11618
11619 /* Counts the number of leading bits which are the same
11620    in a 32 bit value in the range 1 to 32.  */
11621 static uint32_t
11622 leading32 (uint32_t value)
11623 {
11624   int32_t mask= 0xffff0000;
11625   uint32_t count= 16; /* Counts number of bits set in mask.  */
11626   uint32_t lo = 1;    /* Lower bound for number of sign bits.  */
11627   uint32_t hi = 32;   /* Upper bound for number of sign bits.  */
11628
11629   while (lo + 1 < hi)
11630     {
11631       int32_t test = (value & mask);
11632
11633       if (test == 0 || test == mask)
11634         {
11635           lo = count;
11636           count = (lo + hi) / 2;
11637           mask >>= (count - lo);
11638         }
11639       else
11640         {
11641           hi = count;
11642           count = (lo + hi) / 2;
11643           mask <<= hi - count;
11644         }
11645     }
11646
11647   if (lo != hi)
11648     {
11649       int32_t test;
11650
11651       mask >>= 1;
11652       test = (value & mask);
11653
11654       if (test == 0 || test == mask)
11655         count = hi;
11656       else
11657         count = lo;
11658     }
11659
11660   return count;
11661 }
11662
11663 /* Counts the number of leading bits which are the same
11664    in a 64 bit value in the range 1 to 64.  */
11665 static uint64_t
11666 leading64 (uint64_t value)
11667 {
11668   int64_t mask= 0xffffffff00000000LL;
11669   uint64_t count = 32; /* Counts number of bits set in mask.  */
11670   uint64_t lo = 1;     /* Lower bound for number of sign bits.  */
11671   uint64_t hi = 64;    /* Upper bound for number of sign bits.  */
11672
11673   while (lo + 1 < hi)
11674     {
11675       int64_t test = (value & mask);
11676
11677       if (test == 0 || test == mask)
11678         {
11679           lo = count;
11680           count = (lo + hi) / 2;
11681           mask >>= (count - lo);
11682         }
11683       else
11684         {
11685           hi = count;
11686           count = (lo + hi) / 2;
11687           mask <<= hi - count;
11688         }
11689     }
11690
11691   if (lo != hi)
11692     {
11693       int64_t test;
11694
11695       mask >>= 1;
11696       test = (value & mask);
11697
11698       if (test == 0 || test == mask)
11699         count = hi;
11700       else
11701         count = lo;
11702     }
11703
11704   return count;
11705 }
11706
11707 /* Bit operations.  */
11708 /* N.B register args may not be SP.  */
11709
11710 /* 32 bit count leading sign bits.  */
11711 static void
11712 cls32 (sim_cpu *cpu)
11713 {
11714   unsigned rn = INSTR (9, 5);
11715   unsigned rd = INSTR (4, 0);
11716
11717   /* N.B. the result needs to exclude the leading bit.  */
11718   aarch64_set_reg_u64
11719     (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
11720 }
11721
11722 /* 64 bit count leading sign bits.  */
11723 static void
11724 cls64 (sim_cpu *cpu)
11725 {
11726   unsigned rn = INSTR (9, 5);
11727   unsigned rd = INSTR (4, 0);
11728
11729   /* N.B. the result needs to exclude the leading bit.  */
11730   aarch64_set_reg_u64
11731     (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
11732 }
11733
11734 /* 32 bit count leading zero bits.  */
11735 static void
11736 clz32 (sim_cpu *cpu)
11737 {
11738   unsigned rn = INSTR (9, 5);
11739   unsigned rd = INSTR (4, 0);
11740   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11741
11742   /* if the sign (top) bit is set then the count is 0.  */
11743   if (pick32 (value, 31, 31))
11744     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11745   else
11746     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
11747 }
11748
11749 /* 64 bit count leading zero bits.  */
11750 static void
11751 clz64 (sim_cpu *cpu)
11752 {
11753   unsigned rn = INSTR (9, 5);
11754   unsigned rd = INSTR (4, 0);
11755   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11756
11757   /* if the sign (top) bit is set then the count is 0.  */
11758   if (pick64 (value, 63, 63))
11759     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11760   else
11761     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
11762 }
11763
11764 /* 32 bit reverse bits.  */
11765 static void
11766 rbit32 (sim_cpu *cpu)
11767 {
11768   unsigned rn = INSTR (9, 5);
11769   unsigned rd = INSTR (4, 0);
11770   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11771   uint32_t result = 0;
11772   int i;
11773
11774   for (i = 0; i < 32; i++)
11775     {
11776       result <<= 1;
11777       result |= (value & 1);
11778       value >>= 1;
11779     }
11780   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11781 }
11782
11783 /* 64 bit reverse bits.  */
11784 static void
11785 rbit64 (sim_cpu *cpu)
11786 {
11787   unsigned rn = INSTR (9, 5);
11788   unsigned rd = INSTR (4, 0);
11789   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11790   uint64_t result = 0;
11791   int i;
11792
11793   for (i = 0; i < 64; i++)
11794     {
11795       result <<= 1;
11796       result |= (value & 1UL);
11797       value >>= 1;
11798     }
11799   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11800 }
11801
11802 /* 32 bit reverse bytes.  */
11803 static void
11804 rev32 (sim_cpu *cpu)
11805 {
11806   unsigned rn = INSTR (9, 5);
11807   unsigned rd = INSTR (4, 0);
11808   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11809   uint32_t result = 0;
11810   int i;
11811
11812   for (i = 0; i < 4; i++)
11813     {
11814       result <<= 8;
11815       result |= (value & 0xff);
11816       value >>= 8;
11817     }
11818   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11819 }
11820
11821 /* 64 bit reverse bytes.  */
11822 static void
11823 rev64 (sim_cpu *cpu)
11824 {
11825   unsigned rn = INSTR (9, 5);
11826   unsigned rd = INSTR (4, 0);
11827   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11828   uint64_t result = 0;
11829   int i;
11830
11831   for (i = 0; i < 8; i++)
11832     {
11833       result <<= 8;
11834       result |= (value & 0xffULL);
11835       value >>= 8;
11836     }
11837   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11838 }
11839
11840 /* 32 bit reverse shorts.  */
11841 /* N.B.this reverses the order of the bytes in each half word.  */
11842 static void
11843 revh32 (sim_cpu *cpu)
11844 {
11845   unsigned rn = INSTR (9, 5);
11846   unsigned rd = INSTR (4, 0);
11847   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11848   uint32_t result = 0;
11849   int i;
11850
11851   for (i = 0; i < 2; i++)
11852     {
11853       result <<= 8;
11854       result |= (value & 0x00ff00ff);
11855       value >>= 8;
11856     }
11857   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11858 }
11859
11860 /* 64 bit reverse shorts.  */
11861 /* N.B.this reverses the order of the bytes in each half word.  */
11862 static void
11863 revh64 (sim_cpu *cpu)
11864 {
11865   unsigned rn = INSTR (9, 5);
11866   unsigned rd = INSTR (4, 0);
11867   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11868   uint64_t result = 0;
11869   int i;
11870
11871   for (i = 0; i < 2; i++)
11872     {
11873       result <<= 8;
11874       result |= (value & 0x00ff00ff00ff00ffULL);
11875       value >>= 8;
11876     }
11877   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11878 }
11879
11880 static void
11881 dexDataProc1Source (sim_cpu *cpu)
11882 {
11883   /* instr[30]    = 1
11884      instr[28,21] = 111010110
11885      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
11886      instr[29]    = S : 0 ==> ok, 1 ==> UNALLOC
11887      instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
11888      instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
11889                              000010 ==> REV, 000011 ==> UNALLOC
11890                              000100 ==> CLZ, 000101 ==> CLS
11891                              ow ==> UNALLOC
11892      instr[9,5]   = rn : may not be SP
11893      instr[4,0]   = rd : may not be SP.  */
11894
11895   uint32_t S = INSTR (29, 29);
11896   uint32_t opcode2 = INSTR (20, 16);
11897   uint32_t opcode = INSTR (15, 10);
11898   uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
11899
11900   if (S == 1)
11901     HALT_UNALLOC;
11902
11903   if (opcode2 != 0)
11904     HALT_UNALLOC;
11905
11906   if (opcode & 0x38)
11907     HALT_UNALLOC;
11908
11909   switch (dispatch)
11910     {
11911     case 0: rbit32 (cpu); return;
11912     case 1: revh32 (cpu); return;
11913     case 2: rev32 (cpu); return;
11914     case 4: clz32 (cpu); return;
11915     case 5: cls32 (cpu); return;
11916     case 8: rbit64 (cpu); return;
11917     case 9: revh64 (cpu); return;
11918     case 10:rev32 (cpu); return;
11919     case 11:rev64 (cpu); return;
11920     case 12:clz64 (cpu); return;
11921     case 13:cls64 (cpu); return;
11922     default: HALT_UNALLOC;
11923     }
11924 }
11925
11926 /* Variable shift.
11927    Shifts by count supplied in register.
11928    N.B register args may not be SP.
11929    These all use the shifted auxiliary function for
11930    simplicity and clarity.  Writing the actual shift
11931    inline would avoid a branch and so be faster but
11932    would also necessitate getting signs right.  */
11933
11934 /* 32 bit arithmetic shift right.  */
11935 static void
11936 asrv32 (sim_cpu *cpu)
11937 {
11938   unsigned rm = INSTR (20, 16);
11939   unsigned rn = INSTR (9, 5);
11940   unsigned rd = INSTR (4, 0);
11941
11942   aarch64_set_reg_u64
11943     (cpu, rd, NO_SP,
11944      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
11945                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
11946 }
11947
11948 /* 64 bit arithmetic shift right.  */
11949 static void
11950 asrv64 (sim_cpu *cpu)
11951 {
11952   unsigned rm = INSTR (20, 16);
11953   unsigned rn = INSTR (9, 5);
11954   unsigned rd = INSTR (4, 0);
11955
11956   aarch64_set_reg_u64
11957     (cpu, rd, NO_SP,
11958      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
11959                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
11960 }
11961
11962 /* 32 bit logical shift left.  */
11963 static void
11964 lslv32 (sim_cpu *cpu)
11965 {
11966   unsigned rm = INSTR (20, 16);
11967   unsigned rn = INSTR (9, 5);
11968   unsigned rd = INSTR (4, 0);
11969
11970   aarch64_set_reg_u64
11971     (cpu, rd, NO_SP,
11972      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
11973                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
11974 }
11975
11976 /* 64 bit arithmetic shift left.  */
11977 static void
11978 lslv64 (sim_cpu *cpu)
11979 {
11980   unsigned rm = INSTR (20, 16);
11981   unsigned rn = INSTR (9, 5);
11982   unsigned rd = INSTR (4, 0);
11983
11984   aarch64_set_reg_u64
11985     (cpu, rd, NO_SP,
11986      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
11987                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
11988 }
11989
11990 /* 32 bit logical shift right.  */
11991 static void
11992 lsrv32 (sim_cpu *cpu)
11993 {
11994   unsigned rm = INSTR (20, 16);
11995   unsigned rn = INSTR (9, 5);
11996   unsigned rd = INSTR (4, 0);
11997
11998   aarch64_set_reg_u64
11999     (cpu, rd, NO_SP,
12000      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12001                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12002 }
12003
12004 /* 64 bit logical shift right.  */
12005 static void
12006 lsrv64 (sim_cpu *cpu)
12007 {
12008   unsigned rm = INSTR (20, 16);
12009   unsigned rn = INSTR (9, 5);
12010   unsigned rd = INSTR (4, 0);
12011
12012   aarch64_set_reg_u64
12013     (cpu, rd, NO_SP,
12014      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12015                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12016 }
12017
12018 /* 32 bit rotate right.  */
12019 static void
12020 rorv32 (sim_cpu *cpu)
12021 {
12022   unsigned rm = INSTR (20, 16);
12023   unsigned rn = INSTR (9, 5);
12024   unsigned rd = INSTR (4, 0);
12025
12026   aarch64_set_reg_u64
12027     (cpu, rd, NO_SP,
12028      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12029                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12030 }
12031
12032 /* 64 bit rotate right.  */
12033 static void
12034 rorv64 (sim_cpu *cpu)
12035 {
12036   unsigned rm = INSTR (20, 16);
12037   unsigned rn = INSTR (9, 5);
12038   unsigned rd = INSTR (4, 0);
12039
12040   aarch64_set_reg_u64
12041     (cpu, rd, NO_SP,
12042      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12043                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12044 }
12045
12046
12047 /* divide.  */
12048
12049 /* 32 bit signed divide.  */
12050 static void
12051 cpuiv32 (sim_cpu *cpu)
12052 {
12053   unsigned rm = INSTR (20, 16);
12054   unsigned rn = INSTR (9, 5);
12055   unsigned rd = INSTR (4, 0);
12056   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12057   /* TODO : check that this rounds towards zero as required.  */
12058   int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12059   int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12060
12061   aarch64_set_reg_s64 (cpu, rd, NO_SP,
12062                        divisor ? ((int32_t) (dividend / divisor)) : 0);
12063 }
12064
12065 /* 64 bit signed divide.  */
12066 static void
12067 cpuiv64 (sim_cpu *cpu)
12068 {
12069   unsigned rm = INSTR (20, 16);
12070   unsigned rn = INSTR (9, 5);
12071   unsigned rd = INSTR (4, 0);
12072
12073   /* TODO : check that this rounds towards zero as required.  */
12074   int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12075
12076   aarch64_set_reg_s64
12077     (cpu, rd, NO_SP,
12078      divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12079 }
12080
12081 /* 32 bit unsigned divide.  */
12082 static void
12083 udiv32 (sim_cpu *cpu)
12084 {
12085   unsigned rm = INSTR (20, 16);
12086   unsigned rn = INSTR (9, 5);
12087   unsigned rd = INSTR (4, 0);
12088
12089   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12090   uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12091   uint64_t divisor  = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12092
12093   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12094                        divisor ? (uint32_t) (dividend / divisor) : 0);
12095 }
12096
12097 /* 64 bit unsigned divide.  */
12098 static void
12099 udiv64 (sim_cpu *cpu)
12100 {
12101   unsigned rm = INSTR (20, 16);
12102   unsigned rn = INSTR (9, 5);
12103   unsigned rd = INSTR (4, 0);
12104
12105   /* TODO : check that this rounds towards zero as required.  */
12106   uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12107
12108   aarch64_set_reg_u64
12109     (cpu, rd, NO_SP,
12110      divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12111 }
12112
12113 static void
12114 dexDataProc2Source (sim_cpu *cpu)
12115 {
12116   /* assert instr[30] == 0
12117      instr[28,21] == 11010110
12118      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12119      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12120      instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12121                              001000 ==> LSLV, 001001 ==> LSRV
12122                              001010 ==> ASRV, 001011 ==> RORV
12123                              ow ==> UNALLOC.  */
12124
12125   uint32_t dispatch;
12126   uint32_t S = INSTR (29, 29);
12127   uint32_t opcode = INSTR (15, 10);
12128
12129   if (S == 1)
12130     HALT_UNALLOC;
12131
12132   if (opcode & 0x34)
12133     HALT_UNALLOC;
12134
12135   dispatch = (  (INSTR (31, 31) << 3)
12136               | (uimm (opcode, 3, 3) << 2)
12137               |  uimm (opcode, 1, 0));
12138   switch (dispatch)
12139     {
12140     case 2:  udiv32 (cpu); return;
12141     case 3:  cpuiv32 (cpu); return;
12142     case 4:  lslv32 (cpu); return;
12143     case 5:  lsrv32 (cpu); return;
12144     case 6:  asrv32 (cpu); return;
12145     case 7:  rorv32 (cpu); return;
12146     case 10: udiv64 (cpu); return;
12147     case 11: cpuiv64 (cpu); return;
12148     case 12: lslv64 (cpu); return;
12149     case 13: lsrv64 (cpu); return;
12150     case 14: asrv64 (cpu); return;
12151     case 15: rorv64 (cpu); return;
12152     default: HALT_UNALLOC;
12153     }
12154 }
12155
12156
12157 /* Multiply.  */
12158
12159 /* 32 bit multiply and add.  */
12160 static void
12161 madd32 (sim_cpu *cpu)
12162 {
12163   unsigned rm = INSTR (20, 16);
12164   unsigned ra = INSTR (14, 10);
12165   unsigned rn = INSTR (9, 5);
12166   unsigned rd = INSTR (4, 0);
12167
12168   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12169                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12170                        + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12171                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12172 }
12173
12174 /* 64 bit multiply and add.  */
12175 static void
12176 madd64 (sim_cpu *cpu)
12177 {
12178   unsigned rm = INSTR (20, 16);
12179   unsigned ra = INSTR (14, 10);
12180   unsigned rn = INSTR (9, 5);
12181   unsigned rd = INSTR (4, 0);
12182
12183   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12184                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
12185                        + aarch64_get_reg_u64 (cpu, rn, NO_SP)
12186                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12187 }
12188
12189 /* 32 bit multiply and sub.  */
12190 static void
12191 msub32 (sim_cpu *cpu)
12192 {
12193   unsigned rm = INSTR (20, 16);
12194   unsigned ra = INSTR (14, 10);
12195   unsigned rn = INSTR (9, 5);
12196   unsigned rd = INSTR (4, 0);
12197
12198   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12199                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12200                        - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12201                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12202 }
12203
12204 /* 64 bit multiply and sub.  */
12205 static void
12206 msub64 (sim_cpu *cpu)
12207 {
12208   unsigned rm = INSTR (20, 16);
12209   unsigned ra = INSTR (14, 10);
12210   unsigned rn = INSTR (9, 5);
12211   unsigned rd = INSTR (4, 0);
12212
12213   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12214                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
12215                        - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12216                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12217 }
12218
12219 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit.  */
12220 static void
12221 smaddl (sim_cpu *cpu)
12222 {
12223   unsigned rm = INSTR (20, 16);
12224   unsigned ra = INSTR (14, 10);
12225   unsigned rn = INSTR (9, 5);
12226   unsigned rd = INSTR (4, 0);
12227
12228   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12229      obtain a 64 bit product.  */
12230   aarch64_set_reg_s64
12231     (cpu, rd, NO_SP,
12232      aarch64_get_reg_s64 (cpu, ra, NO_SP)
12233      + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12234      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12235 }
12236
12237 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
12238 static void
12239 smsubl (sim_cpu *cpu)
12240 {
12241   unsigned rm = INSTR (20, 16);
12242   unsigned ra = INSTR (14, 10);
12243   unsigned rn = INSTR (9, 5);
12244   unsigned rd = INSTR (4, 0);
12245
12246   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12247      obtain a 64 bit product.  */
12248   aarch64_set_reg_s64
12249     (cpu, rd, NO_SP,
12250      aarch64_get_reg_s64 (cpu, ra, NO_SP)
12251      - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12252      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12253 }
12254
12255 /* Integer Multiply/Divide.  */
12256
12257 /* First some macros and a helper function.  */
12258 /* Macros to test or access elements of 64 bit words.  */
12259
12260 /* Mask used to access lo 32 bits of 64 bit unsigned int.  */
12261 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12262 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
12263 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12264 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
12265 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12266
12267 /* Offset of sign bit in 64 bit signed integger.  */
12268 #define SIGN_SHIFT_U64 63
12269 /* The sign bit itself -- also identifies the minimum negative int value.  */
12270 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12271 /* Return true if a 64 bit signed int presented as an unsigned int is the
12272    most negative value.  */
12273 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12274 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12275    int has its sign bit set to false.  */
12276 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12277 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12278    an unsigned int has its sign bit set or not.  */
12279 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12280 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int.  */
12281 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12282
12283 /* Multiply two 64 bit ints and return.
12284    the hi 64 bits of the 128 bit product.  */
12285
12286 static uint64_t
12287 mul64hi (uint64_t value1, uint64_t value2)
12288 {
12289   uint64_t resultmid1;
12290   uint64_t result;
12291   uint64_t value1_lo = lowWordToU64 (value1);
12292   uint64_t value1_hi = highWordToU64 (value1) ;
12293   uint64_t value2_lo = lowWordToU64 (value2);
12294   uint64_t value2_hi = highWordToU64 (value2);
12295
12296   /* Cross-multiply and collect results.  */
12297
12298   uint64_t xproductlo = value1_lo * value2_lo;
12299   uint64_t xproductmid1 = value1_lo * value2_hi;
12300   uint64_t xproductmid2 = value1_hi * value2_lo;
12301   uint64_t xproducthi = value1_hi * value2_hi;
12302   uint64_t carry = 0;
12303   /* Start accumulating 64 bit results.  */
12304   /* Drop bottom half of lowest cross-product.  */
12305   uint64_t resultmid = xproductlo >> 32;
12306   /* Add in middle products.  */
12307   resultmid = resultmid + xproductmid1;
12308
12309   /* Check for overflow.  */
12310   if (resultmid < xproductmid1)
12311     /* Carry over 1 into top cross-product.  */
12312     carry++;
12313
12314   resultmid1  = resultmid + xproductmid2;
12315
12316   /* Check for overflow.  */
12317   if (resultmid1 < xproductmid2)
12318     /* Carry over 1 into top cross-product.  */
12319     carry++;
12320
12321   /* Drop lowest 32 bits of middle cross-product.  */
12322   result = resultmid1 >> 32;
12323
12324   /* Add top cross-product plus and any carry.  */
12325   result += xproducthi + carry;
12326
12327   return result;
12328 }
12329
12330 /* Signed multiply high, source, source2 :
12331    64 bit, dest <-- high 64-bit of result.  */
12332 static void
12333 smulh (sim_cpu *cpu)
12334 {
12335   uint64_t uresult;
12336   int64_t  result;
12337   unsigned rm = INSTR (20, 16);
12338   unsigned rn = INSTR (9, 5);
12339   unsigned rd = INSTR (4, 0);
12340   GReg     ra = INSTR (14, 10);
12341   int64_t  value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12342   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12343   uint64_t uvalue1;
12344   uint64_t uvalue2;
12345   int64_t  signum = 1;
12346
12347   if (ra != R31)
12348     HALT_UNALLOC;
12349
12350   /* Convert to unsigned and use the unsigned mul64hi routine
12351      the fix the sign up afterwards.  */
12352   if (value1 < 0)
12353     {
12354       signum *= -1L;
12355       uvalue1 = -value1;
12356     }
12357   else
12358     {
12359       uvalue1 = value1;
12360     }
12361
12362   if (value2 < 0)
12363     {
12364       signum *= -1L;
12365       uvalue2 = -value2;
12366     }
12367   else
12368     {
12369       uvalue2 = value2;
12370     }
12371
12372   uresult = mul64hi (uvalue1, uvalue2);
12373   result = uresult;
12374   result *= signum;
12375
12376   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12377 }
12378
12379 /* Unsigned multiply add long -- source, source2 :
12380    32 bit, source3 : 64 bit.  */
12381 static void
12382 umaddl (sim_cpu *cpu)
12383 {
12384   unsigned rm = INSTR (20, 16);
12385   unsigned ra = INSTR (14, 10);
12386   unsigned rn = INSTR (9, 5);
12387   unsigned rd = INSTR (4, 0);
12388
12389   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12390      obtain a 64 bit product.  */
12391   aarch64_set_reg_u64
12392     (cpu, rd, NO_SP,
12393      aarch64_get_reg_u64 (cpu, ra, NO_SP)
12394      + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12395      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12396 }
12397
12398 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
12399 static void
12400 umsubl (sim_cpu *cpu)
12401 {
12402   unsigned rm = INSTR (20, 16);
12403   unsigned ra = INSTR (14, 10);
12404   unsigned rn = INSTR (9, 5);
12405   unsigned rd = INSTR (4, 0);
12406
12407   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12408      obtain a 64 bit product.  */
12409   aarch64_set_reg_u64
12410     (cpu, rd, NO_SP,
12411      aarch64_get_reg_u64 (cpu, ra, NO_SP)
12412      - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12413      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12414 }
12415
12416 /* Unsigned multiply high, source, source2 :
12417    64 bit, dest <-- high 64-bit of result.  */
12418 static void
12419 umulh (sim_cpu *cpu)
12420 {
12421   unsigned rm = INSTR (20, 16);
12422   unsigned rn = INSTR (9, 5);
12423   unsigned rd = INSTR (4, 0);
12424   GReg     ra = INSTR (14, 10);
12425
12426   if (ra != R31)
12427     HALT_UNALLOC;
12428
12429   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12430                        mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
12431                                 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12432 }
12433
12434 static void
12435 dexDataProc3Source (sim_cpu *cpu)
12436 {
12437   /* assert instr[28,24] == 11011.  */
12438   /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
12439      instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
12440      instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
12441      instr[15] = o0 : 0/1 ==> ok
12442      instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB,     (32/64 bit)
12443                               0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
12444                               0100 ==> SMULH,                   (64 bit only)
12445                               1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
12446                               1100 ==> UMULH                    (64 bit only)
12447                               ow ==> UNALLOC.  */
12448
12449   uint32_t dispatch;
12450   uint32_t size = INSTR (31, 31);
12451   uint32_t op54 = INSTR (30, 29);
12452   uint32_t op31 = INSTR (23, 21);
12453   uint32_t o0 = INSTR (15, 15);
12454
12455   if (op54 != 0)
12456     HALT_UNALLOC;
12457
12458   if (size == 0)
12459     {
12460       if (op31 != 0)
12461         HALT_UNALLOC;
12462
12463       if (o0 == 0)
12464         madd32 (cpu);
12465       else
12466         msub32 (cpu);
12467       return;
12468     }
12469
12470   dispatch = (op31 << 1) | o0;
12471
12472   switch (dispatch)
12473     {
12474     case 0:  madd64 (cpu); return;
12475     case 1:  msub64 (cpu); return;
12476     case 2:  smaddl (cpu); return;
12477     case 3:  smsubl (cpu); return;
12478     case 4:  smulh (cpu); return;
12479     case 10: umaddl (cpu); return;
12480     case 11: umsubl (cpu); return;
12481     case 12: umulh (cpu); return;
12482     default: HALT_UNALLOC;
12483     }
12484 }
12485
12486 static void
12487 dexDPReg (sim_cpu *cpu)
12488 {
12489   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12490      assert  group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
12491      bits [28:24:21] of a DPReg are the secondary dispatch vector.  */
12492   uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
12493
12494   switch (group2)
12495     {
12496     case DPREG_LOG_000:
12497     case DPREG_LOG_001:
12498       dexLogicalShiftedRegister (cpu); return;
12499
12500     case DPREG_ADDSHF_010:
12501       dexAddSubtractShiftedRegister (cpu); return;
12502
12503     case DPREG_ADDEXT_011:
12504       dexAddSubtractExtendedRegister (cpu); return;
12505
12506     case DPREG_ADDCOND_100:
12507       {
12508         /* This set bundles a variety of different operations.  */
12509         /* Check for.  */
12510         /* 1) add/sub w carry.  */
12511         uint32_t mask1 = 0x1FE00000U;
12512         uint32_t val1  = 0x1A000000U;
12513         /* 2) cond compare register/immediate.  */
12514         uint32_t mask2 = 0x1FE00000U;
12515         uint32_t val2  = 0x1A400000U;
12516         /* 3) cond select.  */
12517         uint32_t mask3 = 0x1FE00000U;
12518         uint32_t val3  = 0x1A800000U;
12519         /* 4) data proc 1/2 source.  */
12520         uint32_t mask4 = 0x1FE00000U;
12521         uint32_t val4  = 0x1AC00000U;
12522
12523         if ((aarch64_get_instr (cpu) & mask1) == val1)
12524           dexAddSubtractWithCarry (cpu);
12525
12526         else if ((aarch64_get_instr (cpu) & mask2) == val2)
12527           CondCompare (cpu);
12528
12529         else if ((aarch64_get_instr (cpu) & mask3) == val3)
12530           dexCondSelect (cpu);
12531
12532         else if ((aarch64_get_instr (cpu) & mask4) == val4)
12533           {
12534             /* Bit 30 is clear for data proc 2 source
12535                and set for data proc 1 source.  */
12536             if (aarch64_get_instr (cpu)  & (1U << 30))
12537               dexDataProc1Source (cpu);
12538             else
12539               dexDataProc2Source (cpu);
12540           }
12541
12542         else
12543           /* Should not reach here.  */
12544           HALT_NYI;
12545
12546         return;
12547       }
12548
12549     case DPREG_3SRC_110:
12550       dexDataProc3Source (cpu); return;
12551
12552     case DPREG_UNALLOC_101:
12553       HALT_UNALLOC;
12554
12555     case DPREG_3SRC_111:
12556       dexDataProc3Source (cpu); return;
12557
12558     default:
12559       /* Should never reach here.  */
12560       HALT_NYI;
12561     }
12562 }
12563
12564 /* Unconditional Branch immediate.
12565    Offset is a PC-relative byte offset in the range +/- 128MiB.
12566    The offset is assumed to be raw from the decode i.e. the
12567    simulator is expected to scale them from word offsets to byte.  */
12568
12569 /* Unconditional branch.  */
12570 static void
12571 buc (sim_cpu *cpu, int32_t offset)
12572 {
12573   aarch64_set_next_PC_by_offset (cpu, offset);
12574 }
12575
12576 static unsigned stack_depth = 0;
12577
12578 /* Unconditional branch and link -- writes return PC to LR.  */
12579 static void
12580 bl (sim_cpu *cpu, int32_t offset)
12581 {
12582   aarch64_save_LR (cpu);
12583   aarch64_set_next_PC_by_offset (cpu, offset);
12584
12585   if (TRACE_BRANCH_P (cpu))
12586     {
12587       ++ stack_depth;
12588       TRACE_BRANCH (cpu,
12589                     " %*scall %" PRIx64 " [%s]"
12590                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12591                     stack_depth, " ", aarch64_get_next_PC (cpu),
12592                     aarch64_get_func (aarch64_get_next_PC (cpu)),
12593                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
12594                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
12595                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
12596                     );
12597     }
12598 }
12599
12600 /* Unconditional Branch register.
12601    Branch/return address is in source register.  */
12602
12603 /* Unconditional branch.  */
12604 static void
12605 br (sim_cpu *cpu)
12606 {
12607   unsigned rn = INSTR (9, 5);
12608   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12609 }
12610
12611 /* Unconditional branch and link -- writes return PC to LR.  */
12612 static void
12613 blr (sim_cpu *cpu)
12614 {
12615   unsigned rn = INSTR (9, 5);
12616
12617   /* The pseudo code in the spec says we update LR before fetching.
12618      the value from the rn.  */
12619   aarch64_save_LR (cpu);
12620   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12621
12622   if (TRACE_BRANCH_P (cpu))
12623     {
12624       ++ stack_depth;
12625       TRACE_BRANCH (cpu,
12626                     " %*scall %" PRIx64 " [%s]"
12627                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12628                     stack_depth, " ", aarch64_get_next_PC (cpu),
12629                     aarch64_get_func (aarch64_get_next_PC (cpu)),
12630                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
12631                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
12632                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
12633                     );
12634     }
12635 }
12636
12637 /* Return -- assembler will default source to LR this is functionally
12638    equivalent to br but, presumably, unlike br it side effects the
12639    branch predictor.  */
12640 static void
12641 ret (sim_cpu *cpu)
12642 {
12643   unsigned rn = INSTR (9, 5);
12644   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12645
12646   if (TRACE_BRANCH_P (cpu))
12647     {
12648       TRACE_BRANCH (cpu,
12649                     " %*sreturn [result: %" PRIx64 "]",
12650                     stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
12651       -- stack_depth;
12652     }
12653 }
12654
12655 /* NOP -- we implement this and call it from the decode in case we
12656    want to intercept it later.  */
12657
12658 static void
12659 nop (sim_cpu *cpu)
12660 {
12661 }
12662
12663 /* Data synchronization barrier.  */
12664
12665 static void
12666 dsb (sim_cpu *cpu)
12667 {
12668 }
12669
12670 /* Data memory barrier.  */
12671
12672 static void
12673 dmb (sim_cpu *cpu)
12674 {
12675 }
12676
12677 /* Instruction synchronization barrier.  */
12678
12679 static void
12680 isb (sim_cpu *cpu)
12681 {
12682 }
12683
12684 static void
12685 dexBranchImmediate (sim_cpu *cpu)
12686 {
12687   /* assert instr[30,26] == 00101
12688      instr[31] ==> 0 == B, 1 == BL
12689      instr[25,0] == imm26 branch offset counted in words.  */
12690
12691   uint32_t top = INSTR (31, 31);
12692   /* We have a 26 byte signed word offset which we need to pass to the
12693      execute routine as a signed byte offset.  */
12694   int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
12695
12696   if (top)
12697     bl (cpu, offset);
12698   else
12699     buc (cpu, offset);
12700 }
12701
12702 /* Control Flow.  */
12703
12704 /* Conditional branch
12705
12706    Offset is a PC-relative byte offset in the range +/- 1MiB pos is
12707    a bit position in the range 0 .. 63
12708
12709    cc is a CondCode enum value as pulled out of the decode
12710
12711    N.B. any offset register (source) can only be Xn or Wn.  */
12712
12713 static void
12714 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
12715 {
12716   /* the test returns TRUE if CC is met.  */
12717   if (testConditionCode (cpu, cc))
12718     aarch64_set_next_PC_by_offset (cpu, offset);
12719 }
12720
12721 /* 32 bit branch on register non-zero.  */
12722 static void
12723 cbnz32 (sim_cpu *cpu, int32_t offset)
12724 {
12725   unsigned rt = INSTR (4, 0);
12726
12727   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
12728     aarch64_set_next_PC_by_offset (cpu, offset);
12729 }
12730
12731 /* 64 bit branch on register zero.  */
12732 static void
12733 cbnz (sim_cpu *cpu, int32_t offset)
12734 {
12735   unsigned rt = INSTR (4, 0);
12736
12737   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
12738     aarch64_set_next_PC_by_offset (cpu, offset);
12739 }
12740
12741 /* 32 bit branch on register non-zero.  */
12742 static void
12743 cbz32 (sim_cpu *cpu, int32_t offset)
12744 {
12745   unsigned rt = INSTR (4, 0);
12746
12747   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
12748     aarch64_set_next_PC_by_offset (cpu, offset);
12749 }
12750
12751 /* 64 bit branch on register zero.  */
12752 static void
12753 cbz (sim_cpu *cpu, int32_t offset)
12754 {
12755   unsigned rt = INSTR (4, 0);
12756
12757   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
12758     aarch64_set_next_PC_by_offset (cpu, offset);
12759 }
12760
12761 /* Branch on register bit test non-zero -- one size fits all.  */
12762 static void
12763 tbnz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
12764 {
12765   unsigned rt = INSTR (4, 0);
12766
12767   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
12768     aarch64_set_next_PC_by_offset (cpu, offset);
12769 }
12770
12771 /* branch on register bit test zero -- one size fits all.  */
12772 static void
12773 tbz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
12774 {
12775   unsigned rt = INSTR (4, 0);
12776
12777   if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
12778     aarch64_set_next_PC_by_offset (cpu, offset);
12779 }
12780
12781 static void
12782 dexCompareBranchImmediate (sim_cpu *cpu)
12783 {
12784   /* instr[30,25] = 01 1010
12785      instr[31]    = size : 0 ==> 32, 1 ==> 64
12786      instr[24]    = op : 0 ==> CBZ, 1 ==> CBNZ
12787      instr[23,5]  = simm19 branch offset counted in words
12788      instr[4,0]   = rt  */
12789
12790   uint32_t size = INSTR (31, 31);
12791   uint32_t op   = INSTR (24, 24);
12792   int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
12793
12794   if (size == 0)
12795     {
12796       if (op == 0)
12797         cbz32 (cpu, offset);
12798       else
12799         cbnz32 (cpu, offset);
12800     }
12801   else
12802     {
12803       if (op == 0)
12804         cbz (cpu, offset);
12805       else
12806         cbnz (cpu, offset);
12807     }
12808 }
12809
12810 static void
12811 dexTestBranchImmediate (sim_cpu *cpu)
12812 {
12813   /* instr[31]    = b5 : bit 5 of test bit idx
12814      instr[30,25] = 01 1011
12815      instr[24]    = op : 0 ==> TBZ, 1 == TBNZ
12816      instr[23,19] = b40 : bits 4 to 0 of test bit idx
12817      instr[18,5]  = simm14 : signed offset counted in words
12818      instr[4,0]   = uimm5  */
12819
12820   uint32_t pos = ((INSTR (31, 31) << 4)
12821                   | INSTR (23,19));
12822   int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
12823
12824   NYI_assert (30, 25, 0x1b);
12825
12826   if (INSTR (24, 24) == 0)
12827     tbz (cpu, pos, offset);
12828   else
12829     tbnz (cpu, pos, offset);
12830 }
12831
12832 static void
12833 dexCondBranchImmediate (sim_cpu *cpu)
12834 {
12835   /* instr[31,25] = 010 1010
12836      instr[24]    = op1; op => 00 ==> B.cond
12837      instr[23,5]  = simm19 : signed offset counted in words
12838      instr[4]     = op0
12839      instr[3,0]   = cond  */
12840
12841   int32_t offset;
12842   uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
12843
12844   NYI_assert (31, 25, 0x2a);
12845
12846   if (op != 0)
12847     HALT_UNALLOC;
12848
12849   offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
12850
12851   bcc (cpu, offset, INSTR (3, 0));
12852 }
12853
12854 static void
12855 dexBranchRegister (sim_cpu *cpu)
12856 {
12857   /* instr[31,25] = 110 1011
12858      instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
12859      instr[20,16] = op2 : must be 11111
12860      instr[15,10] = op3 : must be 000000
12861      instr[4,0]   = op2 : must be 11111.  */
12862
12863   uint32_t op = INSTR (24, 21);
12864   uint32_t op2 = INSTR (20, 16);
12865   uint32_t op3 = INSTR (15, 10);
12866   uint32_t op4 = INSTR (4, 0);
12867
12868   NYI_assert (31, 25, 0x6b);
12869
12870   if (op2 != 0x1F || op3 != 0 || op4 != 0)
12871     HALT_UNALLOC;
12872
12873   if (op == 0)
12874     br (cpu);
12875
12876   else if (op == 1)
12877     blr (cpu);
12878
12879   else if (op == 2)
12880     ret (cpu);
12881
12882   else
12883     {
12884       /* ERET and DRPS accept 0b11111 for rn = instr [4,0].  */
12885       /* anything else is unallocated.  */
12886       uint32_t rn = INSTR (4, 0);
12887
12888       if (rn != 0x1f)
12889         HALT_UNALLOC;
12890
12891       if (op == 4 || op == 5)
12892         HALT_NYI;
12893
12894       HALT_UNALLOC;
12895     }
12896 }
12897
12898 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
12899    but this may not be available.  So instead we define the values we need
12900    here.  */
12901 #define AngelSVC_Reason_Open            0x01
12902 #define AngelSVC_Reason_Close           0x02
12903 #define AngelSVC_Reason_Write           0x05
12904 #define AngelSVC_Reason_Read            0x06
12905 #define AngelSVC_Reason_IsTTY           0x09
12906 #define AngelSVC_Reason_Seek            0x0A
12907 #define AngelSVC_Reason_FLen            0x0C
12908 #define AngelSVC_Reason_Remove          0x0E
12909 #define AngelSVC_Reason_Rename          0x0F
12910 #define AngelSVC_Reason_Clock           0x10
12911 #define AngelSVC_Reason_Time            0x11
12912 #define AngelSVC_Reason_System          0x12
12913 #define AngelSVC_Reason_Errno           0x13
12914 #define AngelSVC_Reason_GetCmdLine      0x15
12915 #define AngelSVC_Reason_HeapInfo        0x16
12916 #define AngelSVC_Reason_ReportException 0x18
12917 #define AngelSVC_Reason_Elapsed         0x30
12918
12919
12920 static void
12921 handle_halt (sim_cpu *cpu, uint32_t val)
12922 {
12923   uint64_t result = 0;
12924
12925   if (val != 0xf000)
12926     {
12927       TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
12928       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
12929                        sim_stopped, SIM_SIGTRAP);
12930     }
12931
12932   /* We have encountered an Angel SVC call.  See if we can process it.  */
12933   switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
12934     {
12935     case AngelSVC_Reason_HeapInfo:
12936       {
12937         /* Get the values.  */
12938         uint64_t stack_top = aarch64_get_stack_start (cpu);
12939         uint64_t heap_base = aarch64_get_heap_start (cpu);
12940
12941         /* Get the pointer  */
12942         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
12943         ptr = aarch64_get_mem_u64 (cpu, ptr);
12944
12945         /* Fill in the memory block.  */
12946         /* Start addr of heap.  */
12947         aarch64_set_mem_u64 (cpu, ptr +  0, heap_base);
12948         /* End addr of heap.  */
12949         aarch64_set_mem_u64 (cpu, ptr +  8, stack_top);
12950         /* Lowest stack addr.  */
12951         aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
12952         /* Initial stack addr.  */
12953         aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
12954
12955         TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
12956       }
12957       break;
12958
12959     case AngelSVC_Reason_Open:
12960       {
12961         /* Get the pointer  */
12962         /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);.  */
12963         /* FIXME: For now we just assume that we will only be asked
12964            to open the standard file descriptors.  */
12965         static int fd = 0;
12966         result = fd ++;
12967
12968         TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
12969       }
12970       break;
12971
12972     case AngelSVC_Reason_Close:
12973       {
12974         uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
12975         TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
12976         result = 0;
12977       }
12978       break;
12979
12980     case AngelSVC_Reason_Errno:
12981       result = 0;
12982       TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
12983       break;
12984
12985     case AngelSVC_Reason_Clock:
12986       result =
12987 #ifdef CLOCKS_PER_SEC
12988         (CLOCKS_PER_SEC >= 100)
12989         ? (clock () / (CLOCKS_PER_SEC / 100))
12990         : ((clock () * 100) / CLOCKS_PER_SEC)
12991 #else
12992         /* Presume unix... clock() returns microseconds.  */
12993         (clock () / 10000)
12994 #endif
12995         ;
12996         TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
12997       break;
12998
12999     case AngelSVC_Reason_GetCmdLine:
13000       {
13001         /* Get the pointer  */
13002         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13003         ptr = aarch64_get_mem_u64 (cpu, ptr);
13004
13005         /* FIXME: No command line for now.  */
13006         aarch64_set_mem_u64 (cpu, ptr, 0);
13007         TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13008       }
13009       break;
13010
13011     case AngelSVC_Reason_IsTTY:
13012       result = 1;
13013         TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13014       break;
13015
13016     case AngelSVC_Reason_Write:
13017       {
13018         /* Get the pointer  */
13019         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13020         /* Get the write control block.  */
13021         uint64_t fd  = aarch64_get_mem_u64 (cpu, ptr);
13022         uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13023         uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13024
13025         TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13026                        PRIx64 " on descriptor %" PRIx64,
13027                        len, buf, fd);
13028
13029         if (len > 1280)
13030           {
13031             TRACE_SYSCALL (cpu,
13032                            " AngelSVC: Write: Suspiciously long write: %ld",
13033                            (long) len);
13034             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13035                              sim_stopped, SIM_SIGBUS);
13036           }
13037         else if (fd == 1)
13038           {
13039             printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13040           }
13041         else if (fd == 2)
13042           {
13043             TRACE (cpu, 0, "\n");
13044             sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13045                             (int) len, aarch64_get_mem_ptr (cpu, buf));
13046             TRACE (cpu, 0, "\n");
13047           }
13048         else
13049           {
13050             TRACE_SYSCALL (cpu,
13051                            " AngelSVC: Write: Unexpected file handle: %d",
13052                            (int) fd);
13053             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13054                              sim_stopped, SIM_SIGABRT);
13055           }
13056       }
13057       break;
13058
13059     case AngelSVC_Reason_ReportException:
13060       {
13061         /* Get the pointer  */
13062         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13063         /*ptr = aarch64_get_mem_u64 (cpu, ptr);.  */
13064         uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13065         uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13066
13067         TRACE_SYSCALL (cpu,
13068                        "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13069                        type, state);
13070
13071         if (type == 0x20026)
13072           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13073                            sim_exited, state);
13074         else
13075           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13076                            sim_stopped, SIM_SIGINT);
13077       }
13078       break;
13079
13080     case AngelSVC_Reason_Read:
13081     case AngelSVC_Reason_FLen:
13082     case AngelSVC_Reason_Seek:
13083     case AngelSVC_Reason_Remove:
13084     case AngelSVC_Reason_Time:
13085     case AngelSVC_Reason_System:
13086     case AngelSVC_Reason_Rename:
13087     case AngelSVC_Reason_Elapsed:
13088     default:
13089       TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13090                      aarch64_get_reg_u32 (cpu, 0, NO_SP));
13091       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13092                        sim_stopped, SIM_SIGTRAP);
13093     }
13094
13095   aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13096 }
13097
13098 static void
13099 dexExcpnGen (sim_cpu *cpu)
13100 {
13101   /* instr[31:24] = 11010100
13102      instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13103                           010 ==> HLT,       101 ==> DBG GEN EXCPN
13104      instr[20,5]  = imm16
13105      instr[4,2]   = opc2 000 ==> OK, ow ==> UNALLOC
13106      instr[1,0]   = LL : discriminates opc  */
13107
13108   uint32_t opc = INSTR (23, 21);
13109   uint32_t imm16 = INSTR (20, 5);
13110   uint32_t opc2 = INSTR (4, 2);
13111   uint32_t LL;
13112
13113   NYI_assert (31, 24, 0xd4);
13114
13115   if (opc2 != 0)
13116     HALT_UNALLOC;
13117
13118   LL = INSTR (1, 0);
13119
13120   /* We only implement HLT and BRK for now.  */
13121   if (opc == 1 && LL == 0)
13122     {
13123       TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13124       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13125                        sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13126     }
13127
13128   if (opc == 2 && LL == 0)
13129     handle_halt (cpu, imm16);
13130
13131   else if (opc == 0 || opc == 5)
13132     HALT_NYI;
13133
13134   else
13135     HALT_UNALLOC;
13136 }
13137
13138 /* Stub for accessing system registers.  */
13139
13140 static uint64_t
13141 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13142             unsigned crm, unsigned op2)
13143 {
13144   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13145     /* DCZID_EL0 - the Data Cache Zero ID register.
13146        We do not support DC ZVA at the moment, so
13147        we return a value with the disable bit set.
13148        We implement support for the DCZID register since
13149        it is used by the C library's memset function.  */
13150     return ((uint64_t) 1) << 4;
13151
13152   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13153     /* Cache Type Register.  */
13154     return 0x80008000UL;
13155
13156   if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13157     /* TPIDR_EL0 - thread pointer id.  */
13158     return aarch64_get_thread_id (cpu);
13159
13160   if (op1 == 3 && crm == 4 && op2 == 0)
13161     return aarch64_get_FPCR (cpu);
13162
13163   if (op1 == 3 && crm == 4 && op2 == 1)
13164     return aarch64_get_FPSR (cpu);
13165
13166   else if (op1 == 3 && crm == 2 && op2 == 0)
13167     return aarch64_get_CPSR (cpu);
13168
13169   HALT_NYI;
13170 }
13171
13172 static void
13173 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13174             unsigned crm, unsigned op2, uint64_t val)
13175 {
13176   if (op1 == 3 && crm == 4 && op2 == 0)
13177     aarch64_set_FPCR (cpu, val);
13178
13179   else if (op1 == 3 && crm == 4 && op2 == 1)
13180     aarch64_set_FPSR (cpu, val);
13181
13182   else if (op1 == 3 && crm == 2 && op2 == 0)
13183     aarch64_set_CPSR (cpu, val);
13184
13185   else
13186     HALT_NYI;
13187 }
13188
13189 static void
13190 do_mrs (sim_cpu *cpu)
13191 {
13192   /* instr[31:20] = 1101 0101 0001 1
13193      instr[19]    = op0
13194      instr[18,16] = op1
13195      instr[15,12] = CRn
13196      instr[11,8]  = CRm
13197      instr[7,5]   = op2
13198      instr[4,0]   = Rt  */
13199   unsigned sys_op0 = INSTR (19, 19) + 2;
13200   unsigned sys_op1 = INSTR (18, 16);
13201   unsigned sys_crn = INSTR (15, 12);
13202   unsigned sys_crm = INSTR (11, 8);
13203   unsigned sys_op2 = INSTR (7, 5);
13204   unsigned rt = INSTR (4, 0);
13205
13206   aarch64_set_reg_u64 (cpu, rt, NO_SP,
13207                        system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13208 }
13209
13210 static void
13211 do_MSR_immediate (sim_cpu *cpu)
13212 {
13213   /* instr[31:19] = 1101 0101 0000 0
13214      instr[18,16] = op1
13215      instr[15,12] = 0100
13216      instr[11,8]  = CRm
13217      instr[7,5]   = op2
13218      instr[4,0]   = 1 1111  */
13219
13220   unsigned op1 = INSTR (18, 16);
13221   /*unsigned crm = INSTR (11, 8);*/
13222   unsigned op2 = INSTR (7, 5);
13223
13224   NYI_assert (31, 19, 0x1AA0);
13225   NYI_assert (15, 12, 0x4);
13226   NYI_assert (4,  0,  0x1F);
13227
13228   if (op1 == 0)
13229     {
13230       if (op2 == 5)
13231         HALT_NYI; /* set SPSel.  */
13232       else
13233         HALT_UNALLOC;
13234     }
13235   else if (op1 == 3)
13236     {
13237       if (op2 == 6)
13238         HALT_NYI; /* set DAIFset.  */
13239       else if (op2 == 7)
13240         HALT_NYI; /* set DAIFclr.  */
13241       else
13242         HALT_UNALLOC;
13243     }
13244   else
13245     HALT_UNALLOC;
13246 }
13247
13248 static void
13249 do_MSR_reg (sim_cpu *cpu)
13250 {
13251   /* instr[31:20] = 1101 0101 0001
13252      instr[19]    = op0
13253      instr[18,16] = op1
13254      instr[15,12] = CRn
13255      instr[11,8]  = CRm
13256      instr[7,5]   = op2
13257      instr[4,0]   = Rt  */
13258
13259   unsigned sys_op0 = INSTR (19, 19) + 2;
13260   unsigned sys_op1 = INSTR (18, 16);
13261   unsigned sys_crn = INSTR (15, 12);
13262   unsigned sys_crm = INSTR (11, 8);
13263   unsigned sys_op2 = INSTR (7, 5);
13264   unsigned rt = INSTR (4, 0);
13265
13266   NYI_assert (31, 20, 0xD51);
13267
13268   system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13269               aarch64_get_reg_u64 (cpu, rt, NO_SP));
13270 }
13271
13272 static void
13273 do_SYS (sim_cpu *cpu)
13274 {
13275   /* instr[31,19] = 1101 0101 0000 1
13276      instr[18,16] = op1
13277      instr[15,12] = CRn
13278      instr[11,8]  = CRm
13279      instr[7,5]   = op2
13280      instr[4,0]   = Rt  */
13281   NYI_assert (31, 19, 0x1AA1);
13282
13283   /* FIXME: For now we just silently accept system ops.  */
13284 }
13285
13286 static void
13287 dexSystem (sim_cpu *cpu)
13288 {
13289   /* instr[31:22] = 1101 01010 0
13290      instr[21]    = L
13291      instr[20,19] = op0
13292      instr[18,16] = op1
13293      instr[15,12] = CRn
13294      instr[11,8]  = CRm
13295      instr[7,5]   = op2
13296      instr[4,0]   = uimm5  */
13297
13298   /* We are interested in HINT, DSB, DMB and ISB
13299
13300      Hint #0 encodes NOOP (this is the only hint we care about)
13301      L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13302      CRm op2  != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13303
13304      DSB, DMB, ISB are data store barrier, data memory barrier and
13305      instruction store barrier, respectively, where
13306
13307      L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13308      op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13309      CRm<3:2> ==> domain, CRm<1:0> ==> types,
13310      domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13311               10 ==> InerShareable, 11 ==> FullSystem
13312      types :  01 ==> Reads, 10 ==> Writes,
13313               11 ==> All, 00 ==> All (domain == FullSystem).  */
13314
13315   unsigned rt = INSTR (4, 0);
13316
13317   NYI_assert (31, 22, 0x354);
13318
13319   switch (INSTR (21, 12))
13320     {
13321     case 0x032:
13322       if (rt == 0x1F)
13323         {
13324           /* NOP has CRm != 0000 OR.  */
13325           /*         (CRm == 0000 AND (op2 == 000 OR op2 > 101)).  */
13326           uint32_t crm = INSTR (11, 8);
13327           uint32_t op2 = INSTR (7, 5);
13328
13329           if (crm != 0 || (op2 == 0 || op2 > 5))
13330             {
13331               /* Actually call nop method so we can reimplement it later.  */
13332               nop (cpu);
13333               return;
13334             }
13335         }
13336       HALT_NYI;
13337
13338     case 0x033:
13339       {
13340         uint32_t op2 =  INSTR (7, 5);
13341
13342         switch (op2)
13343           {
13344           case 2: HALT_NYI;
13345           case 4: dsb (cpu); return;
13346           case 5: dmb (cpu); return;
13347           case 6: isb (cpu); return;
13348           default: HALT_UNALLOC;
13349         }
13350       }
13351
13352     case 0x3B0:
13353     case 0x3B4:
13354     case 0x3BD:
13355       do_mrs (cpu);
13356       return;
13357
13358     case 0x0B7:
13359       do_SYS (cpu); /* DC is an alias of SYS.  */
13360       return;
13361
13362     default:
13363       if (INSTR (21, 20) == 0x1)
13364         do_MSR_reg (cpu);
13365       else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13366         do_MSR_immediate (cpu);
13367       else
13368         HALT_NYI;
13369       return;
13370     }
13371 }
13372
13373 static void
13374 dexBr (sim_cpu *cpu)
13375 {
13376   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13377      assert  group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13378      bits [31,29] of a BrExSys are the secondary dispatch vector.  */
13379   uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13380
13381   switch (group2)
13382     {
13383     case BR_IMM_000:
13384       return dexBranchImmediate (cpu);
13385
13386     case BR_IMMCMP_001:
13387       /* Compare has bit 25 clear while test has it set.  */
13388       if (!INSTR (25, 25))
13389         dexCompareBranchImmediate (cpu);
13390       else
13391         dexTestBranchImmediate (cpu);
13392       return;
13393
13394     case BR_IMMCOND_010:
13395       /* This is a conditional branch if bit 25 is clear otherwise
13396          unallocated.  */
13397       if (!INSTR (25, 25))
13398         dexCondBranchImmediate (cpu);
13399       else
13400         HALT_UNALLOC;
13401       return;
13402
13403     case BR_UNALLOC_011:
13404       HALT_UNALLOC;
13405
13406     case BR_IMM_100:
13407       dexBranchImmediate (cpu);
13408       return;
13409
13410     case BR_IMMCMP_101:
13411       /* Compare has bit 25 clear while test has it set.  */
13412       if (!INSTR (25, 25))
13413         dexCompareBranchImmediate (cpu);
13414       else
13415         dexTestBranchImmediate (cpu);
13416       return;
13417
13418     case BR_REG_110:
13419       /* Unconditional branch reg has bit 25 set.  */
13420       if (INSTR (25, 25))
13421         dexBranchRegister (cpu);
13422
13423       /* This includes both Excpn Gen, System and unalloc operations.
13424          We need to decode the Excpn Gen operation BRK so we can plant
13425          debugger entry points.
13426          Excpn Gen operations have instr [24] = 0.
13427          we need to decode at least one of the System operations NOP
13428          which is an alias for HINT #0.
13429          System operations have instr [24,22] = 100.  */
13430       else if (INSTR (24, 24) == 0)
13431         dexExcpnGen (cpu);
13432
13433       else if (INSTR (24, 22) == 4)
13434         dexSystem (cpu);
13435
13436       else
13437         HALT_UNALLOC;
13438
13439       return;
13440
13441     case BR_UNALLOC_111:
13442       HALT_UNALLOC;
13443
13444     default:
13445       /* Should never reach here.  */
13446       HALT_NYI;
13447     }
13448 }
13449
13450 static void
13451 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
13452 {
13453   /* We need to check if gdb wants an in here.  */
13454   /* checkBreak (cpu);.  */
13455
13456   uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
13457
13458   switch (group)
13459     {
13460     case GROUP_PSEUDO_0000:   dexPseudo (cpu); break;
13461     case GROUP_LDST_0100:     dexLdSt (cpu); break;
13462     case GROUP_DPREG_0101:    dexDPReg (cpu); break;
13463     case GROUP_LDST_0110:     dexLdSt (cpu); break;
13464     case GROUP_ADVSIMD_0111:  dexAdvSIMD0 (cpu); break;
13465     case GROUP_DPIMM_1000:    dexDPImm (cpu); break;
13466     case GROUP_DPIMM_1001:    dexDPImm (cpu); break;
13467     case GROUP_BREXSYS_1010:  dexBr (cpu); break;
13468     case GROUP_BREXSYS_1011:  dexBr (cpu); break;
13469     case GROUP_LDST_1100:     dexLdSt (cpu); break;
13470     case GROUP_DPREG_1101:    dexDPReg (cpu); break;
13471     case GROUP_LDST_1110:     dexLdSt (cpu); break;
13472     case GROUP_ADVSIMD_1111:  dexAdvSIMD1 (cpu); break;
13473
13474     case GROUP_UNALLOC_0001:
13475     case GROUP_UNALLOC_0010:
13476     case GROUP_UNALLOC_0011:
13477       HALT_UNALLOC;
13478
13479     default:
13480       /* Should never reach here.  */
13481       HALT_NYI;
13482     }
13483 }
13484
13485 static bfd_boolean
13486 aarch64_step (sim_cpu *cpu)
13487 {
13488   uint64_t pc = aarch64_get_PC (cpu);
13489
13490   if (pc == TOP_LEVEL_RETURN_PC)
13491     return FALSE;
13492
13493   aarch64_set_next_PC (cpu, pc + 4);
13494   aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc);
13495
13496   TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
13497               aarch64_get_instr (cpu));
13498   TRACE_DISASM (cpu, pc);
13499
13500   aarch64_decode_and_execute (cpu, pc);
13501
13502   return TRUE;
13503 }
13504
13505 void
13506 aarch64_run (SIM_DESC sd)
13507 {
13508   sim_cpu *cpu = STATE_CPU (sd, 0);
13509
13510   while (aarch64_step (cpu))
13511     aarch64_update_PC (cpu);
13512
13513   sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
13514                    sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13515 }
13516
13517 void
13518 aarch64_init (sim_cpu *cpu, uint64_t pc)
13519 {
13520   uint64_t sp = aarch64_get_stack_start (cpu);
13521
13522   /* Install SP, FP and PC and set LR to -20
13523      so we can detect a top-level return.  */
13524   aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
13525   aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
13526   aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
13527   aarch64_set_next_PC (cpu, pc);
13528   aarch64_update_PC (cpu);
13529   aarch64_init_LIT_table ();
13530 }